diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/lib/Make.deps | 1 | ||||
-rw-r--r-- | src/lib/Makefile | 2 | ||||
-rw-r--r-- | src/lib/format/Makefile | 76 | ||||
-rw-r--r-- | src/lib/format/format.go | 786 | ||||
-rw-r--r-- | src/lib/format/format_test.go | 365 | ||||
-rw-r--r-- | src/lib/format/parser.go | 445 |
6 files changed, 1675 insertions, 0 deletions
diff --git a/src/lib/Make.deps b/src/lib/Make.deps index 6a965e327..4a1805f4c 100644 --- a/src/lib/Make.deps +++ b/src/lib/Make.deps @@ -10,6 +10,7 @@ exec.install: os.install strings.install exvar.install: fmt.install http.install io.install log.install strconv.install sync.install flag.install: fmt.install os.install strconv.install fmt.install: io.install os.install reflect.install strconv.install utf8.install +format.install: container/vector.install flag.install fmt.install go/scanner.install go/token.install io.install os.install reflect.install runtime.install strconv.install strings.install go/ast.install: go/token.install unicode.install utf8.install go/doc.install: container/vector.install fmt.install go/ast.install go/token.install io.install once.install regexp.install sort.install strings.install template.install go/parser.install: container/vector.install fmt.install go/ast.install go/scanner.install go/token.install io.install os.install diff --git a/src/lib/Makefile b/src/lib/Makefile index 8aa70cd47..d0658605e 100644 --- a/src/lib/Makefile +++ b/src/lib/Makefile @@ -26,6 +26,7 @@ DIRS=\ exvar\ flag\ fmt\ + format\ go/ast\ go/doc\ go/parser\ @@ -73,6 +74,7 @@ TEST=\ exvar\ flag\ fmt\ + format\ go/parser\ go/scanner\ hash/adler32\ diff --git a/src/lib/format/Makefile b/src/lib/format/Makefile new file mode 100644 index 000000000..597933241 --- /dev/null +++ b/src/lib/format/Makefile @@ -0,0 +1,76 @@ +# Copyright 2009 The Go Authors. All rights reserved. +# Use of this source code is governed by a BSD-style +# license that can be found in the LICENSE file. + +# DO NOT EDIT. Automatically generated by gobuild. +# gobuild -m >Makefile + +D= + +O_arm=5 +O_amd64=6 +O_386=8 +OS=568vq + +O=$(O_$(GOARCH)) +GC=$(O)g -I_obj +CC=$(O)c -FVw +AS=$(O)a +AR=6ar + +default: packages + +clean: + rm -rf *.[$(OS)] *.a [$(OS)].out _obj + +test: packages + gotest + +coverage: packages + gotest + 6cov -g `pwd` | grep -v '_test\.go:' + +%.$O: %.go + $(GC) $*.go + +%.$O: %.c + $(CC) $*.c + +%.$O: %.s + $(AS) $*.s + +O1=\ + format.$O\ + +O2=\ + parser.$O\ + + +phases: a1 a2 +_obj$D/format.a: phases + +a1: $(O1) + $(AR) grc _obj$D/format.a format.$O + rm -f $(O1) + +a2: $(O2) + $(AR) grc _obj$D/format.a parser.$O + rm -f $(O2) + + +newpkg: clean + mkdir -p _obj$D + $(AR) grc _obj$D/format.a + +$(O1): newpkg +$(O2): a1 +$(O3): a2 + +nuke: clean + rm -f $(GOROOT)/pkg$D/format.a + +packages: _obj$D/format.a + +install: packages + test -d $(GOROOT)/pkg && mkdir -p $(GOROOT)/pkg$D + cp _obj$D/format.a $(GOROOT)/pkg$D/format.a diff --git a/src/lib/format/format.go b/src/lib/format/format.go new file mode 100644 index 000000000..392a9d0f0 --- /dev/null +++ b/src/lib/format/format.go @@ -0,0 +1,786 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +/* The format package implements syntax-directed, type-driven formatting + of arbitrary data structures. Formatting a data structure consists of + two phases: first, a parser reads a format specification and builds a + "compiled" format. Then, the format can be applied repeatedly to + arbitrary values. Applying a format to a value evaluates to a []byte + containing the formatted value bytes, or nil. + + A format specification is a set of package declarations and format rules: + + Format = [ Entry { ";" Entry } [ ";" ] ] . + Entry = PackageDecl | FormatRule . + + (The syntax of a format specification is presented in the same EBNF + notation as used in the Go language specification. The syntax of white + space, comments, identifiers, and string literals is the same as in Go.) + + A package declaration binds a package name (such as 'ast') to a + package import path (such as '"go/ast"'). Each package used (in + a type name, see below) must be declared once before use. + + PackageDecl = PackageName ImportPath . + PackageName = identifier . + ImportPath = string . + + A format rule binds a rule name to a format expression. A rule name + may be a type name or one of the special names 'default' or '/'. + A type name may be the name of a predeclared type (for example, 'int', + 'float32', etc.), the package-qualified name of a user-defined type + (for example, 'ast.MapType'), or an identifier indicating the structure + of unnamed composite types ('array', 'chan', 'func', 'interface', 'map', + or 'ptr'). Each rule must have a unique name; rules can be declared in + any order. + + FormatRule = RuleName "=" Expression . + RuleName = TypeName | "default" | "/" . + TypeName = [ PackageName "." ] identifier . + + To format a value, the value's type name is used to select the format rule + (there is an override mechanism, see below). The format expression of the + selected rule specifies how the value is formatted. Each format expression, + when applied to a value, evaluates to a byte sequence or nil. + + In its most general form, a format expression is a list of alternatives, + each of which is a sequence of operands: + + Expression = [ Sequence ] { "|" [ Sequence ] } . + Sequence = Operand { Operand } . + + The formatted result produced by an expression is the result of the first + alternative sequence that evaluates to a non-nil result; if there is no + such alternative, the expression evaluates to nil. The result produced by + an operand sequence is the concatenation of the results of its operands. + If any operand in the sequence evaluates to nil, the entire sequence + evaluates to nil. + + There are five kinds of operands: + + Operand = Literal | Field | Group | Option | Repetition . + + Literals evaluate to themselves, with two substitutions. First, + %-formats expand in the manner of fmt.Printf, with the current value + passed as the parameter. Second, the current indentation (see below) + is inserted after every newline character. + + Literal = string . + + This table shows string literals applied to the value 42 and the + corresponding formatted result: + + "foo" foo + "%x" 2a + "x = %d" x = 42 + "%#x = %d" 0x2a = 42 + + A field operand is a field name optionally followed by an alternate + rule name. The field name may be an identifier or one of the special + names ^ or *. + + Field = FieldName [ ":" RuleName ] . + FieldName = identifier | "^" | "*" . + + If the field name is an identifier, the current value must be a struct, + and there must be a field with that name in the struct. The same lookup + rules apply as in the Go language (for instance, the name of an anonymous + field is the unqualified type name). The field name denotes the field + value in the struct. If the field is not found, formatting is aborted + and an error message is returned. (TODO consider changing the semantics + such that if a field is not found, it evaluates to nil). + + The special name '^' denotes the current value. (TODO see if ^ can + change to @ or be eliminated). + + The meaning of the special name '*' depends on the type of the current + value: + + array, slice types array, slice element (inside {} only, see below) + interfaces value stored in interface + pointers value pointed to by pointer + + (Implementation restriction: channel, function and map types are not + supported due to missing reflection support). + + Fields are evaluated as follows: If the field value is nil, or an array + or slice element does not exist, the result is nil (see below for details + on array/slice elements). If the value is not nil the field value is + formatted (recursively) using the rule corresponding to its type name, + or the alternate rule name, if given. + + The following example shows a complete format specification for a + struct 'myPackage.Point'. Assume the package + + package myPackage // in directory myDir/myPackage + type Point struct { + name string; + x, y int; + } + + Applying the format specification + + myPackage "myDir/myPackage"; + int = "%d"; + hexInt = "0x%x"; + string = "---%s---"; + myPackage.Point = name "{" x ", " y:hexInt "}"; + + to the value myPackage.Point{"foo", 3, 15} results in + + ---foo---{3, 0xf} + + Finally, an operand may be a grouped, optional, or repeated expression. + A grouped expression ("group") groups a more complex expression (body) + so that it can be used in place of a single operand: + + Group = "(" [ Indentation ">>" ] Body ")" . + Indentation = Expression . + Body = Expression . + + A group body may be prefixed by an indentation expression followed by '>>'. + The indentation expression is applied to the current value like any other + expression and the result, if not nil, is appended to the current indentation + during the evaluation of the body (see also formatting state, below). + + An optional expression ("option") is enclosed in '[]' brackets. + + Option = "[" Body "]" . + + An option evaluates to its body, except that if the body evaluates to nil, + the option expression evaluates to an empty []byte. Thus an option's purpose + is to protect the expression containing the option from a nil operand. + + A repeated expression ("repetition") is enclosed in '{}' braces. + + Repetition = "{" Body [ "/" Separator ] "}" . + Separator = Expression . + + A repeated expression is evaluated as follows: The body is evaluated + repeatedly and its results are concatenated until the body evaluates + to nil. The result of the repetition is the (possibly empty) concatenation, + but it is never nil. An implicit index is supplied for the evaluation of + the body: that index is used to address elements of arrays or slices. If + the corresponding elements do not exist, the field denoting the element + evaluates to nil (which in turn may terminate the repetition). + + The body of a repetition may be followed by a '/' and a "separator" + expression. If the separator is present, it is invoked between repetitions + of the body. + + The following example shows a complete format specification for formatting + a slice of unnamed type. Applying the specification + + int = "%b"; + array = { * / ", " }; // array is the type name for an unnamed slice + + to the value '[]int{2, 3, 5, 7}' results in + + 10, 11, 101, 111 + + Default rule: If a format rule named 'default' is present, it is used for + formatting a value if no other rule was found. A common default rule is + + default = "%v" + + to provide default formatting for basic types without having to specify + a specific rule for each basic type. + + Global separator rule: If a format rule named '/' is present, it is + invoked with the current value between literals. If the separator + expression evaluates to nil, it is ignored. + + For instance, a global separator rule may be used to punctuate a sequence + of values with commas. The rules: + + default = "%v"; + / = ", "; + + will format an argument list by printing each one in its default format, + separated by a comma and a space. +*/ +package format + +import ( + "container/vector"; + "fmt"; + "go/token"; + "io"; + "os"; + "reflect"; + "runtime"; + "strconv"; + "strings"; +) + + +// ---------------------------------------------------------------------------- +// Format representation + +type State struct + +// Custom formatters implement the Formatter function type. +// A formatter is invoked with the current formatting state, the +// value to format, and the rule name under which the formatter +// was installed (the same formatter function may be installed +// under different names). The formatter may access the current state +// to guide formatting and use State.Write to append to the state's +// output. +// +// A formatter must return a boolean value indicating if it evaluated +// to a non-nil value (true), or a nil value (false). +// +type Formatter func(state *State, value interface{}, ruleName string) bool + + +// A FormatterMap is a set of custom formatters. +// It maps a rule name to a formatter function. +// +type FormatterMap map [string] Formatter; + + +// A parsed format expression is built from the following nodes. +// +type ( + expr interface {}; + + alternatives []expr; // x | y | z + + sequence []expr; // x y z + + literal [][]byte; // a list of string segments, possibly starting with '%' + + field struct { + fieldName string; // including "^", "*" + ruleName string; // "" if no rule name specified + }; + + group struct { + indent, body expr; // (indent >> body) + }; + + option struct { + body expr; // [body] + }; + + repetition struct { + body, separator expr; // {body / separator} + }; + + custom struct { + ruleName string; + fun Formatter + }; +) + + +// A Format is the result of parsing a format specification. +// The format may be applied repeatedly to format values. +// +type Format map [string] expr; + + +// ---------------------------------------------------------------------------- +// Formatting + +// An application-specific environment may be provided to Format.Apply; +// the environment is available inside custom formatters via State.Env(). +// Environments must implement copying; the Copy method must return an +// complete copy of the receiver. This is necessary so that the formatter +// can save and restore an environment (in case of an absent expression). +// +// If the Environment doesn't change during formatting (this is under +// control of the custom formatters), the Copy function can simply return +// the receiver, and thus can be very light-weight. +// +type Environment interface { + Copy() Environment +} + + +// State represents the current formatting state. +// It is provided as argument to custom formatters. +// +type State struct { + fmt Format; // format in use + env Environment; // user-supplied environment + errors chan os.Error; // not chan *Error (errors <- nil would be wrong!) + hasOutput bool; // true after the first literal has been written + indent io.ByteBuffer; // current indentation + output io.ByteBuffer; // format output + linePos token.Position; // position of line beginning (Column == 0) + default_ expr; // possibly nil + separator expr; // possibly nil +} + + +func newState(fmt Format, env Environment, errors chan os.Error) *State { + s := new(State); + s.fmt = fmt; + s.env = env; + s.errors = errors; + s.linePos = token.Position{Line: 1}; + + // if we have a default rule, cache it's expression for fast access + if x, found := fmt["default"]; found { + s.default_ = x; + } + + // if we have a global separator rule, cache it's expression for fast access + if x, found := fmt["/"]; found { + s.separator = x; + } + + return s; +} + + +// Env returns the environment passed to Format.Apply. +func (s *State) Env() interface{} { + return s.env; +} + + +// LinePos returns the position of the current line beginning +// in the state's output buffer. Line numbers start at 1. +// +func (s *State) LinePos() token.Position { + return s.linePos; +} + + +// Pos returns the position of the next byte to be written to the +// output buffer. Line numbers start at 1. +// +func (s *State) Pos() token.Position { + offs := s.output.Len(); + return token.Position{Line: s.linePos.Line, Column: offs - s.linePos.Offset, Offset: offs}; +} + + +// Write writes data to the output buffer, inserting the indentation +// string after each newline. It cannot return an error. +// +func (s *State) Write(data []byte) (int, os.Error) { + n := 0; + i0 := 0; + for i, ch := range data { + if ch == '\n' { + // write text segment and indentation + n1, _ := s.output.Write(data[i0 : i+1]); + n2, _ := s.output.Write(s.indent.Data()); + n += n1 + n2; + i0 = i + 1; + s.linePos.Offset = s.output.Len(); + s.linePos.Line++; + } + } + n3, _ := s.output.Write(data[i0 : len(data)]); + return n + n3, nil; +} + + +type checkpoint struct { + env Environment; + hasOutput bool; + outputLen int; + linePos token.Position; +} + + +func (s *State) save() checkpoint { + saved := checkpoint{nil, s.hasOutput, s.output.Len(), s.linePos}; + if s.env != nil { + saved.env = s.env.Copy(); + } + return saved; +} + + +func (s *State) restore(m checkpoint) { + s.env = m.env; + s.output.Truncate(m.outputLen); +} + + +func (s *State) error(msg string) { + s.errors <- os.NewError(msg); + runtime.Goexit(); +} + + +// getField searches in val, which must be a struct, for a field +// with the given name. It returns the value and the embedded depth +// where it was found. +// +func getField(val reflect.Value, fieldname string) (reflect.Value, int) { + // do we have a struct in the first place? + if val.Kind() != reflect.StructKind { + return nil, 0; + } + + sval, styp := val.(reflect.StructValue), val.Type().(reflect.StructType); + + // look for field at the top level + for i := 0; i < styp.Len(); i++ { + name, typ, tag, offset := styp.Field(i); + if name == fieldname || name == "" && strings.HasSuffix(typ.Name(), "." + fieldname) /* anonymous field */ { + return sval.Field(i), 0; + } + } + + // look for field in anonymous fields + var field reflect.Value; + level := 1000; // infinity (no struct has that many levels) + for i := 0; i < styp.Len(); i++ { + name, typ, tag, offset := styp.Field(i); + if name == "" { + f, l := getField(sval.Field(i), fieldname); + // keep the most shallow field + if f != nil { + switch { + case l < level: + field, level = f, l; + case l == level: + // more than one field at the same level, + // possibly an error unless there is a more + // shallow field found later + field = nil; + } + } + } + } + + return field, level + 1; +} + + +// TODO At the moment, unnamed types are simply mapped to the default +// names below. For instance, all unnamed arrays are mapped to +// 'array' which is not really sufficient. Eventually one may want +// to be able to specify rules for say an unnamed slice of T. +// +var defaultNames = map[int]string { + reflect.ArrayKind: "array", + reflect.BoolKind: "bool", + reflect.ChanKind: "chan", + reflect.DotDotDotKind: "ellipsis", + reflect.FloatKind: "float", + reflect.Float32Kind: "float32", + reflect.Float64Kind: "float64", + reflect.FuncKind: "func", + reflect.IntKind: "int", + reflect.Int16Kind: "int16", + reflect.Int32Kind: "int32", + reflect.Int64Kind: "int64", + reflect.Int8Kind: "int8", + reflect.InterfaceKind: "interface", + reflect.MapKind: "map", + reflect.PtrKind: "ptr", + reflect.StringKind: "string", + reflect.StructKind: "struct", + reflect.UintKind: "uint", + reflect.Uint16Kind: "uint16", + reflect.Uint32Kind: "uint32", + reflect.Uint64Kind: "uint64", + reflect.Uint8Kind: "uint8", + reflect.UintptrKind: "uintptr", +} + + +func typename(value reflect.Value) string { + name := value.Type().Name(); + if name == "" { + if defaultName, found := defaultNames[value.Kind()]; found { + name = defaultName; + } + } + return name; +} + + +func (s *State) getFormat(name string) expr { + if fexpr, found := s.fmt[name]; found { + return fexpr; + } + + if s.default_ != nil { + return s.default_; + } + + s.error(fmt.Sprintf("no format rule for type: '%s'", name)); + return nil; +} + + +// eval applies a format expression fexpr to a value. If the expression +// evaluates internally to a non-nil []byte, that slice is appended to +// the state's output buffer and eval returns true. Otherwise, eval +// returns false and the state remains unchanged. +// +func (s *State) eval(fexpr expr, value reflect.Value, index int) bool { + // an empty format expression always evaluates + // to a non-nil (but empty) []byte + if fexpr == nil { + return true; + } + + switch t := fexpr.(type) { + case alternatives: + // append the result of the first alternative that evaluates to + // a non-nil []byte to the state's output + mark := s.save(); + for _, x := range t { + if s.eval(x, value, index) { + return true; + } + s.restore(mark); + } + return false; + + case sequence: + // append the result of all operands to the state's output + // unless a nil result is encountered + mark := s.save(); + for _, x := range t { + if !s.eval(x, value, index) { + s.restore(mark); + return false; + } + } + return true; + + case literal: + // write separator, if any + if s.hasOutput { + // not the first literal + if s.separator != nil { + sep := s.separator; // save current separator + s.separator = nil; // and disable it (avoid recursion) + mark := s.save(); + if !s.eval(sep, value, index) { + s.restore(mark); + } + s.separator = sep; // enable it again + } + } + s.hasOutput = true; + // write literal segments + for _, lit := range t { + if lit[0] == '%' && len(lit) > 1 { + // segment contains a %-format at the beginning + if lit[1] == '%' { + // "%%" is printed as a single "%" + s.Write(lit[1 : len(lit)]); + } else { + // use s instead of s.output to get indentation right + fmt.Fprintf(s, string(lit), value.Interface()); + } + } else { + // segment contains no %-formats + s.Write(lit); + } + } + return true; // a literal never evaluates to nil + + case *field: + // determine field value + switch t.fieldName { + case "^": + // field value is current value + + case "*": + // indirection: operation is type-specific + switch v := value.(type) { + case reflect.ArrayValue: + if v.IsNil() || v.Len() <= index { + return false; + } + value = v.Elem(index); + + case reflect.MapValue: + s.error("reflection support for maps incomplete"); + + case reflect.PtrValue: + if v.IsNil() { + return false; + } + value = v.Sub(); + + case reflect.InterfaceValue: + if v.IsNil() { + return false; + } + value = v.Value(); + + case reflect.ChanValue: + s.error("reflection support for chans incomplete"); + + case reflect.FuncValue: + s.error("reflection support for funcs incomplete"); + + default: + s.error(fmt.Sprintf("error: * does not apply to `%s`", value.Type().Name())); + } + + default: + // value is value of named field + field, _ := getField(value, t.fieldName); + if field == nil { + // TODO consider just returning false in this case + s.error(fmt.Sprintf("error: no field `%s` in `%s`", t.fieldName, value.Type().Name())); + } + value = field; + } + + // determine rule + ruleName := t.ruleName; + if ruleName == "" { + // no alternate rule name, value type determines rule + ruleName = typename(value) + } + fexpr = s.getFormat(ruleName); + + mark := s.save(); + if !s.eval(fexpr, value, index) { + s.restore(mark); + return false; + } + return true; + + case *group: + // remember current indentation + indentLen := s.indent.Len(); + + // update current indentation + mark := s.save(); + s.eval(t.indent, value, index); + // if the indentation evaluates to nil, the state's output buffer + // didn't change - either way it's ok to append the difference to + // the current identation + s.indent.Write(s.output.Data()[mark.outputLen : s.output.Len()]); + s.restore(mark); + + // format group body + mark = s.save(); + b := true; + if !s.eval(t.body, value, index) { + s.restore(mark); + b = false; + } + + // reset indentation + s.indent.Truncate(indentLen); + return b; + + case *option: + // evaluate the body and append the result to the state's output + // buffer unless the result is nil + mark := s.save(); + if !s.eval(t.body, value, 0) { // TODO is 0 index correct? + s.restore(mark); + } + return true; // an option never evaluates to nil + + case *repetition: + // evaluate the body and append the result to the state's output + // buffer until a result is nil + for i := 0; ; i++ { + mark := s.save(); + // write separator, if any + if i > 0 && t.separator != nil { + // nil result from separator is ignored + mark := s.save(); + if !s.eval(t.separator, value, i) { + s.restore(mark); + } + } + if !s.eval(t.body, value, i) { + s.restore(mark); + break; + } + } + return true; // a repetition never evaluates to nil + + case *custom: + // invoke the custom formatter to obtain the result + mark := s.save(); + if !t.fun(s, value.Interface(), t.ruleName) { + s.restore(mark); + return false; + } + return true; + } + + panic("unreachable"); + return false; +} + + +// Eval formats each argument according to the format +// f and returns the resulting []byte and os.Error. If +// an error occured, the []byte contains the partially +// formatted result. An environment env may be passed +// in which is available in custom formatters through +// the state parameter. +// +func (f Format) Eval(env Environment, args ...) ([]byte, os.Error) { + errors := make(chan os.Error); + s := newState(f, env, errors); + + go func() { + value := reflect.NewValue(args).(reflect.StructValue); + for i := 0; i < value.Len(); i++ { + fld := value.Field(i); + mark := s.save(); + if !s.eval(s.getFormat(typename(fld)), fld, 0) { // TODO is 0 index correct? + s.restore(mark); + } + } + errors <- nil; // no errors + }(); + + return s.output.Data(), <- errors; +} + + +// ---------------------------------------------------------------------------- +// Convenience functions + +// Fprint formats each argument according to the format f +// and writes to w. The result is the total number of bytes +// written and an os.Error, if any. +// +func (f Format) Fprint(w io.Writer, env Environment, args ...) (int, os.Error) { + data, err := f.Eval(env, args); + if err != nil { + // TODO should we print partial result in case of error? + return 0, err; + } + return w.Write(data); +} + + +// Print formats each argument according to the format f +// and writes to standard output. The result is the total +// number of bytes written and an os.Error, if any. +// +func (f Format) Print(args ...) (int, os.Error) { + return f.Fprint(os.Stdout, nil, args); +} + + +// Sprint formats each argument according to the format f +// and returns the resulting string. If an error occurs +// during formatting, the result string contains the +// partially formatted result followed by an error message. +// +func (f Format) Sprint(args ...) string { + var buf io.ByteBuffer; + n, err := f.Fprint(&buf, nil, args); + if err != nil { + fmt.Fprintf(&buf, "--- Sprint(%s) failed: %v", fmt.Sprint(args), err); + } + return string(buf.Data()); +} diff --git a/src/lib/format/format_test.go b/src/lib/format/format_test.go new file mode 100644 index 000000000..92e0d0ea5 --- /dev/null +++ b/src/lib/format/format_test.go @@ -0,0 +1,365 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package format + +import ( + "fmt"; + "format"; + "io"; + "os"; + "testing"; +) + + +func parse(t *testing.T, form string, fmap format.FormatterMap) format.Format { + f, err := format.Parse(io.StringBytes(form), fmap); + if err != nil { + t.Errorf("Parse(%s): %v", err); + return nil; + } + return f; +} + + +func verify(t *testing.T, f format.Format, expected string, args ...) { + if f == nil { + return; // allow other tests to run + } + result := f.Sprint(args); + if result != expected { + t.Errorf( + "result : `%s`\nexpected: `%s`\n\n", + result, expected + ) + } +} + + +func formatter(s *format.State, value interface{}, rule_name string) bool { + switch rule_name { + case "/": + fmt.Fprintf(s, "%d %d %d", s.Pos().Line, s.LinePos().Column, s.Pos().Column); + return true; + case "blank": + s.Write([]byte{' '}); + return true; + case "int": + if value.(int) & 1 == 0 { + fmt.Fprint(s, "even "); + } else { + fmt.Fprint(s, "odd "); + } + return true; + case "nil": + return false; + } + panic("unreachable"); + return false; +} + + +func TestCustomFormatters(t *testing.T) { + fmap0 := format.FormatterMap{ "/": formatter }; + fmap1 := format.FormatterMap{ "int": formatter, "blank": formatter, "nil": formatter }; + + f := parse(t, `int=`, fmap0); + verify(t, f, ``, 1, 2, 3); + + f = parse(t, `int="#"`, nil); + verify(t, f, `###`, 1, 2, 3); + + f = parse(t, `int="#";string="%s"`, fmap0); + verify(t, f, "#1 0 1#1 0 7#1 0 13\n2 0 0foo2 0 8\n", 1, 2, 3, "\n", "foo", "\n"); + + f = parse(t, ``, fmap1); + verify(t, f, `even odd even odd `, 0, 1, 2, 3); + + f = parse(t, `/ =^:blank; float="#"`, fmap1); + verify(t, f, `# # #`, 0.0, 1.0, 2.0); + + f = parse(t, `float=^:nil`, fmap1); + verify(t, f, ``, 0.0, 1.0, 2.0); + + // TODO needs more tests +} + + +// ---------------------------------------------------------------------------- +// Formatting of basic and simple composite types + +func check(t *testing.T, form, expected string, args ...) { + f := parse(t, form, nil); + result := f.Sprint(args); + if result != expected { + t.Errorf( + "format : %s\nresult : `%s`\nexpected: `%s`\n\n", + form, result, expected + ) + } +} + + +func TestBasicTypes(t *testing.T) { + check(t, ``, ``); + check(t, `bool=":%v"`, `:true:false`, true, false); + check(t, `int="%b %d %o 0x%x"`, `101010 42 52 0x2a`, 42); + + check(t, `int="%"`, `%`, 42); + check(t, `int="%%"`, `%`, 42); + check(t, `int="**%%**"`, `**%**`, 42); + check(t, `int="%%%%%%"`, `%%%`, 42); + check(t, `int="%%%d%%"`, `%42%`, 42); + + const i = -42; + const is = `-42`; + check(t, `int ="%d"`, is, i); + check(t, `int8 ="%d"`, is, int8(i)); + check(t, `int16="%d"`, is, int16(i)); + check(t, `int32="%d"`, is, int32(i)); + check(t, `int64="%d"`, is, int64(i)); + + const u = 42; + const us = `42`; + check(t, `uint ="%d"`, us, uint(u)); + check(t, `uint8 ="%d"`, us, uint8(u)); + check(t, `uint16="%d"`, us, uint16(u)); + check(t, `uint32="%d"`, us, uint32(u)); + check(t, `uint64="%d"`, us, uint64(u)); + + const f = 3.141592; + const fs = `3.141592`; + check(t, `float ="%g"`, fs, f); + check(t, `float32="%g"`, fs, float32(f)); + check(t, `float64="%g"`, fs, float64(f)); +} + + +func TestArrayTypes(t *testing.T) { + var a0 [10]int; + check(t, `array="array";`, `array`, a0); + + a1 := [...]int{1, 2, 3}; + check(t, `array="array";`, `array`, a1); + check(t, `array={*}; int="%d";`, `123`, a1); + check(t, `array={* / ", "}; int="%d";`, `1, 2, 3`, a1); + check(t, `array={* / *}; int="%d";`, `12233`, a1); + + a2 := []interface{}{42, "foo", 3.14}; + check(t, `array={* / ", "}; interface=*; string="bar"; default="%v";`, `42, bar, 3.14`, a2); +} + + +func TestChanTypes(t *testing.T) { + var c0 chan int; + check(t, `chan="chan"`, `chan`, c0); + + c1 := make(chan int); + go func(){ c1 <- 42 }(); + check(t, `chan="chan"`, `chan`, c1); + // check(t, `chan=*`, `42`, c1); // reflection support for chans incomplete +} + + +func TestFuncTypes(t *testing.T) { + var f0 func() int; + check(t, `func="func"`, `func`, f0); + + f1 := func() int { return 42; }; + check(t, `func="func"`, `func`, f1); + // check(t, `func=*`, `42`, f1); // reflection support for funcs incomplete +} + + +func TestInterfaceTypes(t *testing.T) { + var i0 interface{}; + check(t, `interface="interface"`, `interface`, i0); + + i0 = "foo"; + check(t, `interface="interface"`, `interface`, i0); + check(t, `interface=*; string="%s"`, `foo`, i0); +} + + +func TestMapTypes(t *testing.T) { + var m0 map[string]int; + check(t, `map="map"`, `map`, m0); + + m1 := map[string]int{}; + check(t, `map="map"`, `map`, m1); + // check(t, `map=*`, ``, m1); // reflection support for maps incomplete +} + + +func TestPointerTypes(t *testing.T) { + var p0 *int; + check(t, `ptr="ptr"`, `ptr`, p0); + check(t, `ptr=*`, ``, p0); + check(t, `ptr=*|"nil"`, `nil`, p0); + + x := 99991; + p1 := &x; + check(t, `ptr="ptr"`, `ptr`, p1); + check(t, `ptr=*; int="%d"`, `99991`, p1); +} + + +func TestDefaultRule(t *testing.T) { + check(t, `default="%v"`, `42foo3.14`, 42, "foo", 3.14); + check(t, `default="%v"; int="%x"`, `abcdef`, 10, 11, 12, 13, 14, 15); + check(t, `default="%v"; int="%x"`, `ab**ef`, 10, 11, "**", 14, 15); + check(t, `default="%x"; int=^:default`, `abcdef`, 10, 11, 12, 13, 14, 15); +} + + +func TestGlobalSeparatorRule(t *testing.T) { + check(t, `int="%d"; / ="-"`, `1-2-3-4`, 1, 2, 3, 4); + check(t, `int="%x%x"; / ="*"`, `aa*aa`, 10, 10); +} + + +// ---------------------------------------------------------------------------- +// Formatting of a struct + +type T1 struct { + a int; +} + +const F1 = + `format "format";` + `int = "%d";` + `format.T1 = "<" a ">";` + +func TestStruct1(t *testing.T) { + check(t, F1, "<42>", T1{42}); +} + + +// ---------------------------------------------------------------------------- +// Formatting of a struct with an optional field (ptr) + +type T2 struct { + s string; + p *T1; +} + +const F2a = + F1 + + `string = "%s";` + `ptr = *;` + `format.T2 = s ["-" p "-"];` + +const F2b = + F1 + + `string = "%s";` + `ptr = *;` + `format.T2 = s ("-" p "-" | "empty");`; + +func TestStruct2(t *testing.T) { + check(t, F2a, "foo", T2{"foo", nil}); + check(t, F2a, "bar-<17>-", T2{"bar", &T1{17}}); + check(t, F2b, "fooempty", T2{"foo", nil}); +} + + +// ---------------------------------------------------------------------------- +// Formatting of a struct with a repetitive field (slice) + +type T3 struct { + s string; + a []int; +} + +const F3a = + `format "format";` + `default = "%v";` + `array = *;` + `format.T3 = s {" " a a / ","};` + +const F3b = + `format "format";` + `int = "%d";` + `string = "%s";` + `array = *;` + `nil = ;` + `empty = *:nil;` + `format.T3 = s [a:empty ": " {a / "-"}]` + +func TestStruct3(t *testing.T) { + check(t, F3a, "foo", T3{"foo", nil}); + check(t, F3a, "foo 00, 11, 22", T3{"foo", []int{0, 1, 2}}); + check(t, F3b, "bar", T3{"bar", nil}); + check(t, F3b, "bal: 2-3-5", T3{"bal", []int{2, 3, 5}}); +} + + +// ---------------------------------------------------------------------------- +// Formatting of a struct with alternative field + +type T4 struct { + x *int; + a []int; +} + +const F4a = + `format "format";` + `int = "%d";` + `ptr = *;` + `array = *;` + `nil = ;` + `empty = *:nil;` + `format.T4 = "<" (x:empty x | "-") ">" ` + +const F4b = + `format "format";` + `int = "%d";` + `ptr = *;` + `array = *;` + `nil = ;` + `empty = *:nil;` + `format.T4 = "<" (a:empty {a / ", "} | "-") ">" ` + +func TestStruct4(t *testing.T) { + x := 7; + check(t, F4a, "<->", T4{nil, nil}); + check(t, F4a, "<7>", T4{&x, nil}); + check(t, F4b, "<->", T4{nil, nil}); + check(t, F4b, "<2, 3, 7>", T4{nil, []int{2, 3, 7}}); +} + + +// ---------------------------------------------------------------------------- +// Formatting a struct (documentation example) + +type Point struct { + name string; + x, y int; +} + +const FPoint = + `format "format";` + `int = "%d";` + `hexInt = "0x%x";` + `string = "---%s---";` + `format.Point = name "{" x ", " y:hexInt "}";` + +func TestStructPoint(t *testing.T) { + p := Point{"foo", 3, 15}; + check(t, FPoint, "---foo---{3, 0xf}", p); +} + + +// ---------------------------------------------------------------------------- +// Formatting a slice (documentation example) + +const FSlice = + `int = "%b";` + `array = { * / ", " }` + +func TestSlice(t *testing.T) { + check(t, FSlice, "10, 11, 101, 111", []int{2, 3, 5, 7}); +} + + +// TODO add more tests diff --git a/src/lib/format/parser.go b/src/lib/format/parser.go new file mode 100644 index 000000000..a6e6e5e8e --- /dev/null +++ b/src/lib/format/parser.go @@ -0,0 +1,445 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package format + +import ( + "container/vector"; + "fmt"; + "format"; + "go/scanner"; + "go/token"; + "io"; + "os"; + "strconv"; + "strings"; +) + +// ---------------------------------------------------------------------------- +// Error handling + +// Error describes an individual error. The position Pos, if valid, +// indicates the format source position the error relates to. The +// error is specified with the Msg string. +// +type Error struct { + Pos token.Position; + Msg string; +} + + +func (e *Error) String() string { + pos := ""; + if e.Pos.IsValid() { + pos = fmt.Sprintf("%d:%d: ", e.Pos.Line, e.Pos.Column); + } + return pos + e.Msg; +} + + +// An ErrorList is a list of errors encountered during parsing. +type ErrorList []*Error + + +// ErrorList implements SortInterface and the os.Error interface. + +func (p ErrorList) Len() int { return len(p); } +func (p ErrorList) Swap(i, j int) { p[i], p[j] = p[j], p[i]; } +func (p ErrorList) Less(i, j int) bool { return p[i].Pos.Offset < p[j].Pos.Offset; } + + +func (p ErrorList) String() string { + switch len(p) { + case 0: return "unspecified error"; + case 1: return p[0].String(); + } + return fmt.Sprintf("%s (and %d more errors)", p[0].String(), len(p) - 1); +} + + +// ---------------------------------------------------------------------------- +// Parsing + +type parser struct { + errors vector.Vector; + scanner scanner.Scanner; + pos token.Position; // token position + tok token.Token; // one token look-ahead + lit []byte; // token literal + + packs map [string] string; // PackageName -> ImportPath + rules map [string] expr; // RuleName -> Expression +} + + +func (p *parser) next() { + p.pos, p.tok, p.lit = p.scanner.Scan(); + switch p.tok { + case token.CHAN, token.FUNC, token.INTERFACE, token.MAP, token.STRUCT: + // Go keywords for composite types are type names + // returned by reflect. Accept them as identifiers. + p.tok = token.IDENT; // p.lit is already set correctly + } +} + + +func (p *parser) init(src []byte) { + p.errors.Init(0); + p.scanner.Init(src, p, 0); + p.next(); // initializes pos, tok, lit + p.packs = make(map [string] string); + p.rules = make(map [string] expr); +} + + +// The parser implements scanner.Error. +func (p *parser) Error(pos token.Position, msg string) { + // Don't collect errors that are on the same line as the previous error + // in the hope to reduce the number of spurious errors due to incorrect + // parser synchronization. + if p.errors.Len() == 0 || p.errors.Last().(*Error).Pos.Line != pos.Line { + p.errors.Push(&Error{pos, msg}); + } +} + + +func (p *parser) errorExpected(pos token.Position, msg string) { + msg = "expected " + msg; + if pos.Offset == p.pos.Offset { + // the error happened at the current position; + // make the error message more specific + msg += ", found '" + p.tok.String() + "'"; + if p.tok.IsLiteral() { + msg += " " + string(p.lit); + } + } + p.Error(pos, msg); +} + + +func (p *parser) expect(tok token.Token) token.Position { + pos := p.pos; + if p.tok != tok { + p.errorExpected(pos, "'" + tok.String() + "'"); + } + p.next(); // make progress in any case + return pos; +} + + +func (p *parser) parseIdentifier() string { + name := string(p.lit); + p.expect(token.IDENT); + return name; +} + + +func (p *parser) parseTypeName() (string, bool) { + pos := p.pos; + name, isIdent := p.parseIdentifier(), true; + if p.tok == token.PERIOD { + // got a package name, lookup package + if importPath, found := p.packs[name]; found { + name = importPath; + } else { + p.Error(pos, "package not declared: " + name); + } + p.next(); + name, isIdent = name + "." + p.parseIdentifier(), false; + } + return name, isIdent; +} + + +// Parses a rule name and returns it. If the rule name is +// a package-qualified type name, the package name is resolved. +// The 2nd result value is true iff the rule name consists of a +// single identifier only (and thus could be a package name). +// +func (p *parser) parseRuleName() (string, bool) { + name, isIdent := "", false; + switch p.tok { + case token.IDENT: + name, isIdent = p.parseTypeName(); + case token.DEFAULT: + name = "default"; + p.next(); + case token.QUO: + name = "/"; + p.next(); + default: + p.errorExpected(p.pos, "rule name"); + p.next(); // make progress in any case + } + return name, isIdent; +} + + +func (p *parser) parseString() string { + s := ""; + if p.tok == token.STRING { + var err os.Error; + s, err = strconv.Unquote(string(p.lit)); + // Unquote may fail with an error, but only if the scanner found + // an illegal string in the first place. In this case the error + // has already been reported. + p.next(); + return s; + } else { + p.expect(token.STRING); + } + return s; +} + + +func (p *parser) parseLiteral() literal { + s := io.StringBytes(p.parseString()); + + // A string literal may contain %-format specifiers. To simplify + // and speed up printing of the literal, split it into segments + // that start with "%" possibly followed by a last segment that + // starts with some other character. + var list vector.Vector; + list.Init(0); + i0 := 0; + for i := 0; i < len(s); i++ { + if s[i] == '%' && i+1 < len(s) { + // the next segment starts with a % format + if i0 < i { + // the current segment is not empty, split it off + list.Push(s[i0 : i]); + i0 = i; + } + i++; // skip %; let loop skip over char after % + } + } + // the final segment may start with any character + // (it is empty iff the string is empty) + list.Push(s[i0 : len(s)]); + + // convert list into a literal + lit := make(literal, list.Len()); + for i := 0; i < list.Len(); i++ { + lit[i] = list.At(i).([]byte); + } + + return lit; +} + + +func (p *parser) parseField() expr { + var fname string; + switch p.tok { + case token.XOR: + fname = "^"; + p.next(); + case token.MUL: + fname = "*"; + p.next(); + case token.IDENT: + fname = p.parseIdentifier(); + default: + return nil; + } + + var ruleName string; + if p.tok == token.COLON { + p.next(); + var _ bool; + ruleName, _ = p.parseRuleName(); + } + + return &field{fname, ruleName}; +} + + +func (p *parser) parseExpression() expr + +func (p *parser) parseOperand() (x expr) { + switch p.tok { + case token.STRING: + x = p.parseLiteral(); + + case token.LPAREN: + p.next(); + x = p.parseExpression(); + if p.tok == token.SHR { + p.next(); + x = &group{x, p.parseExpression()}; + } + p.expect(token.RPAREN); + + case token.LBRACK: + p.next(); + x = &option{p.parseExpression()}; + p.expect(token.RBRACK); + + case token.LBRACE: + p.next(); + x = p.parseExpression(); + var div expr; + if p.tok == token.QUO { + p.next(); + div = p.parseExpression(); + } + x = &repetition{x, div}; + p.expect(token.RBRACE); + + default: + x = p.parseField(); // may be nil + } + + return x; +} + + +func (p *parser) parseSequence() expr { + var list vector.Vector; + list.Init(0); + + for x := p.parseOperand(); x != nil; x = p.parseOperand() { + list.Push(x); + } + + // no need for a sequence if list.Len() < 2 + switch list.Len() { + case 0: return nil; + case 1: return list.At(0).(expr); + } + + // convert list into a sequence + seq := make(sequence, list.Len()); + for i := 0; i < list.Len(); i++ { + seq[i] = list.At(i).(expr); + } + return seq; +} + + +func (p *parser) parseExpression() expr { + var list vector.Vector; + list.Init(0); + + for { + x := p.parseSequence(); + if x != nil { + list.Push(x); + } + if p.tok != token.OR { + break; + } + p.next(); + } + + // no need for an alternatives if list.Len() < 2 + switch list.Len() { + case 0: return nil; + case 1: return list.At(0).(expr); + } + + // convert list into a alternatives + alt := make(alternatives, list.Len()); + for i := 0; i < list.Len(); i++ { + alt[i] = list.At(i).(expr); + } + return alt; +} + + +func (p *parser) parseFormat() { + for p.tok != token.EOF { + pos := p.pos; + + name, isIdent := p.parseRuleName(); + switch p.tok { + case token.STRING: + // package declaration + importPath := p.parseString(); + + // add package declaration + if !isIdent { + p.Error(pos, "illegal package name: " + name); + } else if _, found := p.packs[name]; !found { + p.packs[name] = importPath; + } else { + p.Error(pos, "package already declared: " + name); + } + + case token.ASSIGN: + // format rule + p.next(); + x := p.parseExpression(); + + // add rule + if _, found := p.rules[name]; !found { + p.rules[name] = x; + } else { + p.Error(pos, "format rule already declared: " + name); + } + + default: + p.errorExpected(p.pos, "package declaration or format rule"); + p.next(); // make progress in any case + } + + if p.tok == token.SEMICOLON { + p.next(); + } else { + break; + } + } + p.expect(token.EOF); +} + + +func remap(p *parser, name string) string { + i := strings.Index(name, "."); + if i >= 0 { + packageName := name[0 : i]; + typeName := name[i : len(name)]; + // lookup package + if importPath, found := p.packs[packageName]; found { + name = importPath + "." + typeName; + } else { + var invalidPos token.Position; + p.Error(invalidPos, "package not declared: " + packageName); + } + } + return name; +} + + +// Parse parses a set of format productions from source src. Custom +// formatters may be provided via a map of formatter functions. If +// there are no errors, the result is a Format and the error is nil. +// Otherwise the format is nil and a non-empty ErrorList is returned. +// +func Parse(src []byte, fmap FormatterMap) (Format, os.Error) { + // parse source + var p parser; + p.init(src); + p.parseFormat(); + + // add custom formatters, if any + for name, form := range fmap { + name = remap(&p, name); + if t, found := p.rules[name]; !found { + p.rules[name] = &custom{name, form}; + } else { + var invalidPos token.Position; + p.Error(invalidPos, "formatter already declared: " + name); + } + } + + // convert errors list, if any + if p.errors.Len() > 0 { + errors := make(ErrorList, p.errors.Len()); + for i := 0; i < p.errors.Len(); i++ { + errors[i] = p.errors.At(i).(*Error); + } + return nil, errors; + } + + return p.rules, nil; +} |