diff options
author | Rob Pike <r@golang.org> | 2009-06-09 09:53:44 -0700 |
---|---|---|
committer | Rob Pike <r@golang.org> | 2009-06-09 09:53:44 -0700 |
commit | 7249ea4df2b4f12a4e7ed446f270cea87e4ffd34 (patch) | |
tree | 7032a11d0cac2ae4d3e90f7a189b575b5a50f848 /src/lib/datafmt/datafmt.go | |
parent | acf6ef7a82b3fe61516a1bac4563706552bdf078 (diff) | |
download | golang-7249ea4df2b4f12a4e7ed446f270cea87e4ffd34.tar.gz |
mv src/lib to src/pkg
tests: all.bash passes, gobuild still works, godoc still works.
R=rsc
OCL=30096
CL=30102
Diffstat (limited to 'src/lib/datafmt/datafmt.go')
-rw-r--r-- | src/lib/datafmt/datafmt.go | 789 |
1 files changed, 0 insertions, 789 deletions
diff --git a/src/lib/datafmt/datafmt.go b/src/lib/datafmt/datafmt.go deleted file mode 100644 index 0aedbbbb0..000000000 --- a/src/lib/datafmt/datafmt.go +++ /dev/null @@ -1,789 +0,0 @@ -// Copyright 2009 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -/* The datafmt package implements syntax-directed, type-driven formatting - of arbitrary data structures. Formatting a data structure consists of - two phases: first, a parser reads a format specification and builds a - "compiled" format. Then, the format can be applied repeatedly to - arbitrary values. Applying a format to a value evaluates to a []byte - containing the formatted value bytes, or nil. - - A format specification is a set of package declarations and format rules: - - Format = [ Entry { ";" Entry } [ ";" ] ] . - Entry = PackageDecl | FormatRule . - - (The syntax of a format specification is presented in the same EBNF - notation as used in the Go language specification. The syntax of white - space, comments, identifiers, and string literals is the same as in Go.) - - A package declaration binds a package name (such as 'ast') to a - package import path (such as '"go/ast"'). Each package used (in - a type name, see below) must be declared once before use. - - PackageDecl = PackageName ImportPath . - PackageName = identifier . - ImportPath = string . - - A format rule binds a rule name to a format expression. A rule name - may be a type name or one of the special names 'default' or '/'. - A type name may be the name of a predeclared type (for example, 'int', - 'float32', etc.), the package-qualified name of a user-defined type - (for example, 'ast.MapType'), or an identifier indicating the structure - of unnamed composite types ('array', 'chan', 'func', 'interface', 'map', - or 'ptr'). Each rule must have a unique name; rules can be declared in - any order. - - FormatRule = RuleName "=" Expression . - RuleName = TypeName | "default" | "/" . - TypeName = [ PackageName "." ] identifier . - - To format a value, the value's type name is used to select the format rule - (there is an override mechanism, see below). The format expression of the - selected rule specifies how the value is formatted. Each format expression, - when applied to a value, evaluates to a byte sequence or nil. - - In its most general form, a format expression is a list of alternatives, - each of which is a sequence of operands: - - Expression = [ Sequence ] { "|" [ Sequence ] } . - Sequence = Operand { Operand } . - - The formatted result produced by an expression is the result of the first - alternative sequence that evaluates to a non-nil result; if there is no - such alternative, the expression evaluates to nil. The result produced by - an operand sequence is the concatenation of the results of its operands. - If any operand in the sequence evaluates to nil, the entire sequence - evaluates to nil. - - There are five kinds of operands: - - Operand = Literal | Field | Group | Option | Repetition . - - Literals evaluate to themselves, with two substitutions. First, - %-formats expand in the manner of fmt.Printf, with the current value - passed as the parameter. Second, the current indentation (see below) - is inserted after every newline or form feed character. - - Literal = string . - - This table shows string literals applied to the value 42 and the - corresponding formatted result: - - "foo" foo - "%x" 2a - "x = %d" x = 42 - "%#x = %d" 0x2a = 42 - - A field operand is a field name optionally followed by an alternate - rule name. The field name may be an identifier or one of the special - names @ or *. - - Field = FieldName [ ":" RuleName ] . - FieldName = identifier | "@" | "*" . - - If the field name is an identifier, the current value must be a struct, - and there must be a field with that name in the struct. The same lookup - rules apply as in the Go language (for instance, the name of an anonymous - field is the unqualified type name). The field name denotes the field - value in the struct. If the field is not found, formatting is aborted - and an error message is returned. (TODO consider changing the semantics - such that if a field is not found, it evaluates to nil). - - The special name '@' denotes the current value. - - The meaning of the special name '*' depends on the type of the current - value: - - array, slice types array, slice element (inside {} only, see below) - interfaces value stored in interface - pointers value pointed to by pointer - - (Implementation restriction: channel, function and map types are not - supported due to missing reflection support). - - Fields are evaluated as follows: If the field value is nil, or an array - or slice element does not exist, the result is nil (see below for details - on array/slice elements). If the value is not nil the field value is - formatted (recursively) using the rule corresponding to its type name, - or the alternate rule name, if given. - - The following example shows a complete format specification for a - struct 'myPackage.Point'. Assume the package - - package myPackage // in directory myDir/myPackage - type Point struct { - name string; - x, y int; - } - - Applying the format specification - - myPackage "myDir/myPackage"; - int = "%d"; - hexInt = "0x%x"; - string = "---%s---"; - myPackage.Point = name "{" x ", " y:hexInt "}"; - - to the value myPackage.Point{"foo", 3, 15} results in - - ---foo---{3, 0xf} - - Finally, an operand may be a grouped, optional, or repeated expression. - A grouped expression ("group") groups a more complex expression (body) - so that it can be used in place of a single operand: - - Group = "(" [ Indentation ">>" ] Body ")" . - Indentation = Expression . - Body = Expression . - - A group body may be prefixed by an indentation expression followed by '>>'. - The indentation expression is applied to the current value like any other - expression and the result, if not nil, is appended to the current indentation - during the evaluation of the body (see also formatting state, below). - - An optional expression ("option") is enclosed in '[]' brackets. - - Option = "[" Body "]" . - - An option evaluates to its body, except that if the body evaluates to nil, - the option expression evaluates to an empty []byte. Thus an option's purpose - is to protect the expression containing the option from a nil operand. - - A repeated expression ("repetition") is enclosed in '{}' braces. - - Repetition = "{" Body [ "/" Separator ] "}" . - Separator = Expression . - - A repeated expression is evaluated as follows: The body is evaluated - repeatedly and its results are concatenated until the body evaluates - to nil. The result of the repetition is the (possibly empty) concatenation, - but it is never nil. An implicit index is supplied for the evaluation of - the body: that index is used to address elements of arrays or slices. If - the corresponding elements do not exist, the field denoting the element - evaluates to nil (which in turn may terminate the repetition). - - The body of a repetition may be followed by a '/' and a "separator" - expression. If the separator is present, it is invoked between repetitions - of the body. - - The following example shows a complete format specification for formatting - a slice of unnamed type. Applying the specification - - int = "%b"; - array = { * / ", " }; // array is the type name for an unnamed slice - - to the value '[]int{2, 3, 5, 7}' results in - - 10, 11, 101, 111 - - Default rule: If a format rule named 'default' is present, it is used for - formatting a value if no other rule was found. A common default rule is - - default = "%v" - - to provide default formatting for basic types without having to specify - a specific rule for each basic type. - - Global separator rule: If a format rule named '/' is present, it is - invoked with the current value between literals. If the separator - expression evaluates to nil, it is ignored. - - For instance, a global separator rule may be used to punctuate a sequence - of values with commas. The rules: - - default = "%v"; - / = ", "; - - will format an argument list by printing each one in its default format, - separated by a comma and a space. -*/ -package datafmt - -import ( - "container/vector"; - "fmt"; - "go/token"; - "io"; - "os"; - "reflect"; - "runtime"; - "strconv"; - "strings"; -) - - -// ---------------------------------------------------------------------------- -// Format representation - -type State struct - -// Custom formatters implement the Formatter function type. -// A formatter is invoked with the current formatting state, the -// value to format, and the rule name under which the formatter -// was installed (the same formatter function may be installed -// under different names). The formatter may access the current state -// to guide formatting and use State.Write to append to the state's -// output. -// -// A formatter must return a boolean value indicating if it evaluated -// to a non-nil value (true), or a nil value (false). -// -type Formatter func(state *State, value interface{}, ruleName string) bool - - -// A FormatterMap is a set of custom formatters. -// It maps a rule name to a formatter function. -// -type FormatterMap map [string] Formatter; - - -// A parsed format expression is built from the following nodes. -// -type ( - expr interface {}; - - alternatives []expr; // x | y | z - - sequence []expr; // x y z - - literal [][]byte; // a list of string segments, possibly starting with '%' - - field struct { - fieldName string; // including "@", "*" - ruleName string; // "" if no rule name specified - }; - - group struct { - indent, body expr; // (indent >> body) - }; - - option struct { - body expr; // [body] - }; - - repetition struct { - body, separator expr; // {body / separator} - }; - - custom struct { - ruleName string; - fun Formatter - }; -) - - -// A Format is the result of parsing a format specification. -// The format may be applied repeatedly to format values. -// -type Format map [string] expr; - - -// ---------------------------------------------------------------------------- -// Formatting - -// An application-specific environment may be provided to Format.Apply; -// the environment is available inside custom formatters via State.Env(). -// Environments must implement copying; the Copy method must return an -// complete copy of the receiver. This is necessary so that the formatter -// can save and restore an environment (in case of an absent expression). -// -// If the Environment doesn't change during formatting (this is under -// control of the custom formatters), the Copy function can simply return -// the receiver, and thus can be very light-weight. -// -type Environment interface { - Copy() Environment -} - - -// State represents the current formatting state. -// It is provided as argument to custom formatters. -// -type State struct { - fmt Format; // format in use - env Environment; // user-supplied environment - errors chan os.Error; // not chan *Error (errors <- nil would be wrong!) - hasOutput bool; // true after the first literal has been written - indent io.ByteBuffer; // current indentation - output io.ByteBuffer; // format output - linePos token.Position; // position of line beginning (Column == 0) - default_ expr; // possibly nil - separator expr; // possibly nil -} - - -func newState(fmt Format, env Environment, errors chan os.Error) *State { - s := new(State); - s.fmt = fmt; - s.env = env; - s.errors = errors; - s.linePos = token.Position{Line: 1}; - - // if we have a default rule, cache it's expression for fast access - if x, found := fmt["default"]; found { - s.default_ = x; - } - - // if we have a global separator rule, cache it's expression for fast access - if x, found := fmt["/"]; found { - s.separator = x; - } - - return s; -} - - -// Env returns the environment passed to Format.Apply. -func (s *State) Env() interface{} { - return s.env; -} - - -// LinePos returns the position of the current line beginning -// in the state's output buffer. Line numbers start at 1. -// -func (s *State) LinePos() token.Position { - return s.linePos; -} - - -// Pos returns the position of the next byte to be written to the -// output buffer. Line numbers start at 1. -// -func (s *State) Pos() token.Position { - offs := s.output.Len(); - return token.Position{Line: s.linePos.Line, Column: offs - s.linePos.Offset, Offset: offs}; -} - - -// Write writes data to the output buffer, inserting the indentation -// string after each newline or form feed character. It cannot return an error. -// -func (s *State) Write(data []byte) (int, os.Error) { - n := 0; - i0 := 0; - for i, ch := range data { - if ch == '\n' || ch == '\f' { - // write text segment and indentation - n1, _ := s.output.Write(data[i0 : i+1]); - n2, _ := s.output.Write(s.indent.Data()); - n += n1 + n2; - i0 = i + 1; - s.linePos.Offset = s.output.Len(); - s.linePos.Line++; - } - } - n3, _ := s.output.Write(data[i0 : len(data)]); - return n + n3, nil; -} - - -type checkpoint struct { - env Environment; - hasOutput bool; - outputLen int; - linePos token.Position; -} - - -func (s *State) save() checkpoint { - saved := checkpoint{nil, s.hasOutput, s.output.Len(), s.linePos}; - if s.env != nil { - saved.env = s.env.Copy(); - } - return saved; -} - - -func (s *State) restore(m checkpoint) { - s.env = m.env; - s.output.Truncate(m.outputLen); -} - - -func (s *State) error(msg string) { - s.errors <- os.NewError(msg); - runtime.Goexit(); -} - - -// getField searches in val, which must be a struct, for a field -// with the given name. It returns the value and the embedded depth -// where it was found. -// -func getField(val reflect.Value, fieldname string) (reflect.Value, int) { - // do we have a struct in the first place? - if val.Kind() != reflect.StructKind { - return nil, 0; - } - - sval, styp := val.(reflect.StructValue), val.Type().(reflect.StructType); - - // look for field at the top level - for i := 0; i < styp.Len(); i++ { - name, typ, tag, offset := styp.Field(i); - if name == fieldname || name == "" && strings.HasSuffix(typ.Name(), "." + fieldname) /* anonymous field */ { - return sval.Field(i), 0; - } - } - - // look for field in anonymous fields - var field reflect.Value; - level := 1000; // infinity (no struct has that many levels) - for i := 0; i < styp.Len(); i++ { - name, typ, tag, offset := styp.Field(i); - if name == "" { - f, l := getField(sval.Field(i), fieldname); - // keep the most shallow field - if f != nil { - switch { - case l < level: - field, level = f, l; - case l == level: - // more than one field at the same level, - // possibly an error unless there is a more - // shallow field found later - field = nil; - } - } - } - } - - return field, level + 1; -} - - -// TODO At the moment, unnamed types are simply mapped to the default -// names below. For instance, all unnamed arrays are mapped to -// 'array' which is not really sufficient. Eventually one may want -// to be able to specify rules for say an unnamed slice of T. -// -var defaultNames = map[int]string { - reflect.ArrayKind: "array", - reflect.BoolKind: "bool", - reflect.ChanKind: "chan", - reflect.DotDotDotKind: "ellipsis", - reflect.FloatKind: "float", - reflect.Float32Kind: "float32", - reflect.Float64Kind: "float64", - reflect.FuncKind: "func", - reflect.IntKind: "int", - reflect.Int16Kind: "int16", - reflect.Int32Kind: "int32", - reflect.Int64Kind: "int64", - reflect.Int8Kind: "int8", - reflect.InterfaceKind: "interface", - reflect.MapKind: "map", - reflect.PtrKind: "ptr", - reflect.StringKind: "string", - reflect.StructKind: "struct", - reflect.UintKind: "uint", - reflect.Uint16Kind: "uint16", - reflect.Uint32Kind: "uint32", - reflect.Uint64Kind: "uint64", - reflect.Uint8Kind: "uint8", - reflect.UintptrKind: "uintptr", -} - - -func typename(value reflect.Value) string { - name := value.Type().Name(); - if name == "" { - if defaultName, found := defaultNames[value.Kind()]; found { - name = defaultName; - } - } - return name; -} - - -func (s *State) getFormat(name string) expr { - if fexpr, found := s.fmt[name]; found { - return fexpr; - } - - if s.default_ != nil { - return s.default_; - } - - s.error(fmt.Sprintf("no format rule for type: '%s'", name)); - return nil; -} - - -// eval applies a format expression fexpr to a value. If the expression -// evaluates internally to a non-nil []byte, that slice is appended to -// the state's output buffer and eval returns true. Otherwise, eval -// returns false and the state remains unchanged. -// -func (s *State) eval(fexpr expr, value reflect.Value, index int) bool { - // an empty format expression always evaluates - // to a non-nil (but empty) []byte - if fexpr == nil { - return true; - } - - switch t := fexpr.(type) { - case alternatives: - // append the result of the first alternative that evaluates to - // a non-nil []byte to the state's output - mark := s.save(); - for _, x := range t { - if s.eval(x, value, index) { - return true; - } - s.restore(mark); - } - return false; - - case sequence: - // append the result of all operands to the state's output - // unless a nil result is encountered - mark := s.save(); - for _, x := range t { - if !s.eval(x, value, index) { - s.restore(mark); - return false; - } - } - return true; - - case literal: - // write separator, if any - if s.hasOutput { - // not the first literal - if s.separator != nil { - sep := s.separator; // save current separator - s.separator = nil; // and disable it (avoid recursion) - mark := s.save(); - if !s.eval(sep, value, index) { - s.restore(mark); - } - s.separator = sep; // enable it again - } - } - s.hasOutput = true; - // write literal segments - for _, lit := range t { - if len(lit) > 1 && lit[0] == '%' { - // segment contains a %-format at the beginning - if lit[1] == '%' { - // "%%" is printed as a single "%" - s.Write(lit[1 : len(lit)]); - } else { - // use s instead of s.output to get indentation right - fmt.Fprintf(s, string(lit), value.Interface()); - } - } else { - // segment contains no %-formats - s.Write(lit); - } - } - return true; // a literal never evaluates to nil - - case *field: - // determine field value - switch t.fieldName { - case "@": - // field value is current value - - case "*": - // indirection: operation is type-specific - switch v := value.(type) { - case reflect.ArrayValue: - if v.IsNil() || v.Len() <= index { - return false; - } - value = v.Elem(index); - - case reflect.MapValue: - s.error("reflection support for maps incomplete"); - - case reflect.PtrValue: - if v.IsNil() { - return false; - } - value = v.Sub(); - - case reflect.InterfaceValue: - if v.IsNil() { - return false; - } - value = v.Value(); - - case reflect.ChanValue: - s.error("reflection support for chans incomplete"); - - case reflect.FuncValue: - s.error("reflection support for funcs incomplete"); - - default: - s.error(fmt.Sprintf("error: * does not apply to `%s`", value.Type().Name())); - } - - default: - // value is value of named field - field, _ := getField(value, t.fieldName); - if field == nil { - // TODO consider just returning false in this case - s.error(fmt.Sprintf("error: no field `%s` in `%s`", t.fieldName, value.Type().Name())); - } - value = field; - } - - // determine rule - ruleName := t.ruleName; - if ruleName == "" { - // no alternate rule name, value type determines rule - ruleName = typename(value) - } - fexpr = s.getFormat(ruleName); - - mark := s.save(); - if !s.eval(fexpr, value, index) { - s.restore(mark); - return false; - } - return true; - - case *group: - // remember current indentation - indentLen := s.indent.Len(); - - // update current indentation - mark := s.save(); - s.eval(t.indent, value, index); - // if the indentation evaluates to nil, the state's output buffer - // didn't change - either way it's ok to append the difference to - // the current identation - s.indent.Write(s.output.Data()[mark.outputLen : s.output.Len()]); - s.restore(mark); - - // format group body - mark = s.save(); - b := true; - if !s.eval(t.body, value, index) { - s.restore(mark); - b = false; - } - - // reset indentation - s.indent.Truncate(indentLen); - return b; - - case *option: - // evaluate the body and append the result to the state's output - // buffer unless the result is nil - mark := s.save(); - if !s.eval(t.body, value, 0) { // TODO is 0 index correct? - s.restore(mark); - } - return true; // an option never evaluates to nil - - case *repetition: - // evaluate the body and append the result to the state's output - // buffer until a result is nil - for i := 0; ; i++ { - mark := s.save(); - // write separator, if any - if i > 0 && t.separator != nil { - // nil result from separator is ignored - mark := s.save(); - if !s.eval(t.separator, value, i) { - s.restore(mark); - } - } - if !s.eval(t.body, value, i) { - s.restore(mark); - break; - } - } - return true; // a repetition never evaluates to nil - - case *custom: - // invoke the custom formatter to obtain the result - mark := s.save(); - if !t.fun(s, value.Interface(), t.ruleName) { - s.restore(mark); - return false; - } - return true; - } - - panic("unreachable"); - return false; -} - - -// Eval formats each argument according to the format -// f and returns the resulting []byte and os.Error. If -// an error occured, the []byte contains the partially -// formatted result. An environment env may be passed -// in which is available in custom formatters through -// the state parameter. -// -func (f Format) Eval(env Environment, args ...) ([]byte, os.Error) { - if f == nil { - return nil, os.NewError("format is nil"); - } - - errors := make(chan os.Error); - s := newState(f, env, errors); - - go func() { - value := reflect.NewValue(args).(reflect.StructValue); - for i := 0; i < value.Len(); i++ { - fld := value.Field(i); - mark := s.save(); - if !s.eval(s.getFormat(typename(fld)), fld, 0) { // TODO is 0 index correct? - s.restore(mark); - } - } - errors <- nil; // no errors - }(); - - return s.output.Data(), <- errors; -} - - -// ---------------------------------------------------------------------------- -// Convenience functions - -// Fprint formats each argument according to the format f -// and writes to w. The result is the total number of bytes -// written and an os.Error, if any. -// -func (f Format) Fprint(w io.Writer, env Environment, args ...) (int, os.Error) { - data, err := f.Eval(env, args); - if err != nil { - // TODO should we print partial result in case of error? - return 0, err; - } - return w.Write(data); -} - - -// Print formats each argument according to the format f -// and writes to standard output. The result is the total -// number of bytes written and an os.Error, if any. -// -func (f Format) Print(args ...) (int, os.Error) { - return f.Fprint(os.Stdout, nil, args); -} - - -// Sprint formats each argument according to the format f -// and returns the resulting string. If an error occurs -// during formatting, the result string contains the -// partially formatted result followed by an error message. -// -func (f Format) Sprint(args ...) string { - var buf io.ByteBuffer; - n, err := f.Fprint(&buf, nil, args); - if err != nil { - fmt.Fprintf(&buf, "--- Sprint(%s) failed: %v", fmt.Sprint(args), err); - } - return string(buf.Data()); -} |