summaryrefslogtreecommitdiff
path: root/src/lib/datafmt/datafmt.go
diff options
context:
space:
mode:
authorRob Pike <r@golang.org>2009-06-09 09:53:44 -0700
committerRob Pike <r@golang.org>2009-06-09 09:53:44 -0700
commit7249ea4df2b4f12a4e7ed446f270cea87e4ffd34 (patch)
tree7032a11d0cac2ae4d3e90f7a189b575b5a50f848 /src/lib/datafmt/datafmt.go
parentacf6ef7a82b3fe61516a1bac4563706552bdf078 (diff)
downloadgolang-7249ea4df2b4f12a4e7ed446f270cea87e4ffd34.tar.gz
mv src/lib to src/pkg
tests: all.bash passes, gobuild still works, godoc still works. R=rsc OCL=30096 CL=30102
Diffstat (limited to 'src/lib/datafmt/datafmt.go')
-rw-r--r--src/lib/datafmt/datafmt.go789
1 files changed, 0 insertions, 789 deletions
diff --git a/src/lib/datafmt/datafmt.go b/src/lib/datafmt/datafmt.go
deleted file mode 100644
index 0aedbbbb0..000000000
--- a/src/lib/datafmt/datafmt.go
+++ /dev/null
@@ -1,789 +0,0 @@
-// Copyright 2009 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-/* The datafmt package implements syntax-directed, type-driven formatting
- of arbitrary data structures. Formatting a data structure consists of
- two phases: first, a parser reads a format specification and builds a
- "compiled" format. Then, the format can be applied repeatedly to
- arbitrary values. Applying a format to a value evaluates to a []byte
- containing the formatted value bytes, or nil.
-
- A format specification is a set of package declarations and format rules:
-
- Format = [ Entry { ";" Entry } [ ";" ] ] .
- Entry = PackageDecl | FormatRule .
-
- (The syntax of a format specification is presented in the same EBNF
- notation as used in the Go language specification. The syntax of white
- space, comments, identifiers, and string literals is the same as in Go.)
-
- A package declaration binds a package name (such as 'ast') to a
- package import path (such as '"go/ast"'). Each package used (in
- a type name, see below) must be declared once before use.
-
- PackageDecl = PackageName ImportPath .
- PackageName = identifier .
- ImportPath = string .
-
- A format rule binds a rule name to a format expression. A rule name
- may be a type name or one of the special names 'default' or '/'.
- A type name may be the name of a predeclared type (for example, 'int',
- 'float32', etc.), the package-qualified name of a user-defined type
- (for example, 'ast.MapType'), or an identifier indicating the structure
- of unnamed composite types ('array', 'chan', 'func', 'interface', 'map',
- or 'ptr'). Each rule must have a unique name; rules can be declared in
- any order.
-
- FormatRule = RuleName "=" Expression .
- RuleName = TypeName | "default" | "/" .
- TypeName = [ PackageName "." ] identifier .
-
- To format a value, the value's type name is used to select the format rule
- (there is an override mechanism, see below). The format expression of the
- selected rule specifies how the value is formatted. Each format expression,
- when applied to a value, evaluates to a byte sequence or nil.
-
- In its most general form, a format expression is a list of alternatives,
- each of which is a sequence of operands:
-
- Expression = [ Sequence ] { "|" [ Sequence ] } .
- Sequence = Operand { Operand } .
-
- The formatted result produced by an expression is the result of the first
- alternative sequence that evaluates to a non-nil result; if there is no
- such alternative, the expression evaluates to nil. The result produced by
- an operand sequence is the concatenation of the results of its operands.
- If any operand in the sequence evaluates to nil, the entire sequence
- evaluates to nil.
-
- There are five kinds of operands:
-
- Operand = Literal | Field | Group | Option | Repetition .
-
- Literals evaluate to themselves, with two substitutions. First,
- %-formats expand in the manner of fmt.Printf, with the current value
- passed as the parameter. Second, the current indentation (see below)
- is inserted after every newline or form feed character.
-
- Literal = string .
-
- This table shows string literals applied to the value 42 and the
- corresponding formatted result:
-
- "foo" foo
- "%x" 2a
- "x = %d" x = 42
- "%#x = %d" 0x2a = 42
-
- A field operand is a field name optionally followed by an alternate
- rule name. The field name may be an identifier or one of the special
- names @ or *.
-
- Field = FieldName [ ":" RuleName ] .
- FieldName = identifier | "@" | "*" .
-
- If the field name is an identifier, the current value must be a struct,
- and there must be a field with that name in the struct. The same lookup
- rules apply as in the Go language (for instance, the name of an anonymous
- field is the unqualified type name). The field name denotes the field
- value in the struct. If the field is not found, formatting is aborted
- and an error message is returned. (TODO consider changing the semantics
- such that if a field is not found, it evaluates to nil).
-
- The special name '@' denotes the current value.
-
- The meaning of the special name '*' depends on the type of the current
- value:
-
- array, slice types array, slice element (inside {} only, see below)
- interfaces value stored in interface
- pointers value pointed to by pointer
-
- (Implementation restriction: channel, function and map types are not
- supported due to missing reflection support).
-
- Fields are evaluated as follows: If the field value is nil, or an array
- or slice element does not exist, the result is nil (see below for details
- on array/slice elements). If the value is not nil the field value is
- formatted (recursively) using the rule corresponding to its type name,
- or the alternate rule name, if given.
-
- The following example shows a complete format specification for a
- struct 'myPackage.Point'. Assume the package
-
- package myPackage // in directory myDir/myPackage
- type Point struct {
- name string;
- x, y int;
- }
-
- Applying the format specification
-
- myPackage "myDir/myPackage";
- int = "%d";
- hexInt = "0x%x";
- string = "---%s---";
- myPackage.Point = name "{" x ", " y:hexInt "}";
-
- to the value myPackage.Point{"foo", 3, 15} results in
-
- ---foo---{3, 0xf}
-
- Finally, an operand may be a grouped, optional, or repeated expression.
- A grouped expression ("group") groups a more complex expression (body)
- so that it can be used in place of a single operand:
-
- Group = "(" [ Indentation ">>" ] Body ")" .
- Indentation = Expression .
- Body = Expression .
-
- A group body may be prefixed by an indentation expression followed by '>>'.
- The indentation expression is applied to the current value like any other
- expression and the result, if not nil, is appended to the current indentation
- during the evaluation of the body (see also formatting state, below).
-
- An optional expression ("option") is enclosed in '[]' brackets.
-
- Option = "[" Body "]" .
-
- An option evaluates to its body, except that if the body evaluates to nil,
- the option expression evaluates to an empty []byte. Thus an option's purpose
- is to protect the expression containing the option from a nil operand.
-
- A repeated expression ("repetition") is enclosed in '{}' braces.
-
- Repetition = "{" Body [ "/" Separator ] "}" .
- Separator = Expression .
-
- A repeated expression is evaluated as follows: The body is evaluated
- repeatedly and its results are concatenated until the body evaluates
- to nil. The result of the repetition is the (possibly empty) concatenation,
- but it is never nil. An implicit index is supplied for the evaluation of
- the body: that index is used to address elements of arrays or slices. If
- the corresponding elements do not exist, the field denoting the element
- evaluates to nil (which in turn may terminate the repetition).
-
- The body of a repetition may be followed by a '/' and a "separator"
- expression. If the separator is present, it is invoked between repetitions
- of the body.
-
- The following example shows a complete format specification for formatting
- a slice of unnamed type. Applying the specification
-
- int = "%b";
- array = { * / ", " }; // array is the type name for an unnamed slice
-
- to the value '[]int{2, 3, 5, 7}' results in
-
- 10, 11, 101, 111
-
- Default rule: If a format rule named 'default' is present, it is used for
- formatting a value if no other rule was found. A common default rule is
-
- default = "%v"
-
- to provide default formatting for basic types without having to specify
- a specific rule for each basic type.
-
- Global separator rule: If a format rule named '/' is present, it is
- invoked with the current value between literals. If the separator
- expression evaluates to nil, it is ignored.
-
- For instance, a global separator rule may be used to punctuate a sequence
- of values with commas. The rules:
-
- default = "%v";
- / = ", ";
-
- will format an argument list by printing each one in its default format,
- separated by a comma and a space.
-*/
-package datafmt
-
-import (
- "container/vector";
- "fmt";
- "go/token";
- "io";
- "os";
- "reflect";
- "runtime";
- "strconv";
- "strings";
-)
-
-
-// ----------------------------------------------------------------------------
-// Format representation
-
-type State struct
-
-// Custom formatters implement the Formatter function type.
-// A formatter is invoked with the current formatting state, the
-// value to format, and the rule name under which the formatter
-// was installed (the same formatter function may be installed
-// under different names). The formatter may access the current state
-// to guide formatting and use State.Write to append to the state's
-// output.
-//
-// A formatter must return a boolean value indicating if it evaluated
-// to a non-nil value (true), or a nil value (false).
-//
-type Formatter func(state *State, value interface{}, ruleName string) bool
-
-
-// A FormatterMap is a set of custom formatters.
-// It maps a rule name to a formatter function.
-//
-type FormatterMap map [string] Formatter;
-
-
-// A parsed format expression is built from the following nodes.
-//
-type (
- expr interface {};
-
- alternatives []expr; // x | y | z
-
- sequence []expr; // x y z
-
- literal [][]byte; // a list of string segments, possibly starting with '%'
-
- field struct {
- fieldName string; // including "@", "*"
- ruleName string; // "" if no rule name specified
- };
-
- group struct {
- indent, body expr; // (indent >> body)
- };
-
- option struct {
- body expr; // [body]
- };
-
- repetition struct {
- body, separator expr; // {body / separator}
- };
-
- custom struct {
- ruleName string;
- fun Formatter
- };
-)
-
-
-// A Format is the result of parsing a format specification.
-// The format may be applied repeatedly to format values.
-//
-type Format map [string] expr;
-
-
-// ----------------------------------------------------------------------------
-// Formatting
-
-// An application-specific environment may be provided to Format.Apply;
-// the environment is available inside custom formatters via State.Env().
-// Environments must implement copying; the Copy method must return an
-// complete copy of the receiver. This is necessary so that the formatter
-// can save and restore an environment (in case of an absent expression).
-//
-// If the Environment doesn't change during formatting (this is under
-// control of the custom formatters), the Copy function can simply return
-// the receiver, and thus can be very light-weight.
-//
-type Environment interface {
- Copy() Environment
-}
-
-
-// State represents the current formatting state.
-// It is provided as argument to custom formatters.
-//
-type State struct {
- fmt Format; // format in use
- env Environment; // user-supplied environment
- errors chan os.Error; // not chan *Error (errors <- nil would be wrong!)
- hasOutput bool; // true after the first literal has been written
- indent io.ByteBuffer; // current indentation
- output io.ByteBuffer; // format output
- linePos token.Position; // position of line beginning (Column == 0)
- default_ expr; // possibly nil
- separator expr; // possibly nil
-}
-
-
-func newState(fmt Format, env Environment, errors chan os.Error) *State {
- s := new(State);
- s.fmt = fmt;
- s.env = env;
- s.errors = errors;
- s.linePos = token.Position{Line: 1};
-
- // if we have a default rule, cache it's expression for fast access
- if x, found := fmt["default"]; found {
- s.default_ = x;
- }
-
- // if we have a global separator rule, cache it's expression for fast access
- if x, found := fmt["/"]; found {
- s.separator = x;
- }
-
- return s;
-}
-
-
-// Env returns the environment passed to Format.Apply.
-func (s *State) Env() interface{} {
- return s.env;
-}
-
-
-// LinePos returns the position of the current line beginning
-// in the state's output buffer. Line numbers start at 1.
-//
-func (s *State) LinePos() token.Position {
- return s.linePos;
-}
-
-
-// Pos returns the position of the next byte to be written to the
-// output buffer. Line numbers start at 1.
-//
-func (s *State) Pos() token.Position {
- offs := s.output.Len();
- return token.Position{Line: s.linePos.Line, Column: offs - s.linePos.Offset, Offset: offs};
-}
-
-
-// Write writes data to the output buffer, inserting the indentation
-// string after each newline or form feed character. It cannot return an error.
-//
-func (s *State) Write(data []byte) (int, os.Error) {
- n := 0;
- i0 := 0;
- for i, ch := range data {
- if ch == '\n' || ch == '\f' {
- // write text segment and indentation
- n1, _ := s.output.Write(data[i0 : i+1]);
- n2, _ := s.output.Write(s.indent.Data());
- n += n1 + n2;
- i0 = i + 1;
- s.linePos.Offset = s.output.Len();
- s.linePos.Line++;
- }
- }
- n3, _ := s.output.Write(data[i0 : len(data)]);
- return n + n3, nil;
-}
-
-
-type checkpoint struct {
- env Environment;
- hasOutput bool;
- outputLen int;
- linePos token.Position;
-}
-
-
-func (s *State) save() checkpoint {
- saved := checkpoint{nil, s.hasOutput, s.output.Len(), s.linePos};
- if s.env != nil {
- saved.env = s.env.Copy();
- }
- return saved;
-}
-
-
-func (s *State) restore(m checkpoint) {
- s.env = m.env;
- s.output.Truncate(m.outputLen);
-}
-
-
-func (s *State) error(msg string) {
- s.errors <- os.NewError(msg);
- runtime.Goexit();
-}
-
-
-// getField searches in val, which must be a struct, for a field
-// with the given name. It returns the value and the embedded depth
-// where it was found.
-//
-func getField(val reflect.Value, fieldname string) (reflect.Value, int) {
- // do we have a struct in the first place?
- if val.Kind() != reflect.StructKind {
- return nil, 0;
- }
-
- sval, styp := val.(reflect.StructValue), val.Type().(reflect.StructType);
-
- // look for field at the top level
- for i := 0; i < styp.Len(); i++ {
- name, typ, tag, offset := styp.Field(i);
- if name == fieldname || name == "" && strings.HasSuffix(typ.Name(), "." + fieldname) /* anonymous field */ {
- return sval.Field(i), 0;
- }
- }
-
- // look for field in anonymous fields
- var field reflect.Value;
- level := 1000; // infinity (no struct has that many levels)
- for i := 0; i < styp.Len(); i++ {
- name, typ, tag, offset := styp.Field(i);
- if name == "" {
- f, l := getField(sval.Field(i), fieldname);
- // keep the most shallow field
- if f != nil {
- switch {
- case l < level:
- field, level = f, l;
- case l == level:
- // more than one field at the same level,
- // possibly an error unless there is a more
- // shallow field found later
- field = nil;
- }
- }
- }
- }
-
- return field, level + 1;
-}
-
-
-// TODO At the moment, unnamed types are simply mapped to the default
-// names below. For instance, all unnamed arrays are mapped to
-// 'array' which is not really sufficient. Eventually one may want
-// to be able to specify rules for say an unnamed slice of T.
-//
-var defaultNames = map[int]string {
- reflect.ArrayKind: "array",
- reflect.BoolKind: "bool",
- reflect.ChanKind: "chan",
- reflect.DotDotDotKind: "ellipsis",
- reflect.FloatKind: "float",
- reflect.Float32Kind: "float32",
- reflect.Float64Kind: "float64",
- reflect.FuncKind: "func",
- reflect.IntKind: "int",
- reflect.Int16Kind: "int16",
- reflect.Int32Kind: "int32",
- reflect.Int64Kind: "int64",
- reflect.Int8Kind: "int8",
- reflect.InterfaceKind: "interface",
- reflect.MapKind: "map",
- reflect.PtrKind: "ptr",
- reflect.StringKind: "string",
- reflect.StructKind: "struct",
- reflect.UintKind: "uint",
- reflect.Uint16Kind: "uint16",
- reflect.Uint32Kind: "uint32",
- reflect.Uint64Kind: "uint64",
- reflect.Uint8Kind: "uint8",
- reflect.UintptrKind: "uintptr",
-}
-
-
-func typename(value reflect.Value) string {
- name := value.Type().Name();
- if name == "" {
- if defaultName, found := defaultNames[value.Kind()]; found {
- name = defaultName;
- }
- }
- return name;
-}
-
-
-func (s *State) getFormat(name string) expr {
- if fexpr, found := s.fmt[name]; found {
- return fexpr;
- }
-
- if s.default_ != nil {
- return s.default_;
- }
-
- s.error(fmt.Sprintf("no format rule for type: '%s'", name));
- return nil;
-}
-
-
-// eval applies a format expression fexpr to a value. If the expression
-// evaluates internally to a non-nil []byte, that slice is appended to
-// the state's output buffer and eval returns true. Otherwise, eval
-// returns false and the state remains unchanged.
-//
-func (s *State) eval(fexpr expr, value reflect.Value, index int) bool {
- // an empty format expression always evaluates
- // to a non-nil (but empty) []byte
- if fexpr == nil {
- return true;
- }
-
- switch t := fexpr.(type) {
- case alternatives:
- // append the result of the first alternative that evaluates to
- // a non-nil []byte to the state's output
- mark := s.save();
- for _, x := range t {
- if s.eval(x, value, index) {
- return true;
- }
- s.restore(mark);
- }
- return false;
-
- case sequence:
- // append the result of all operands to the state's output
- // unless a nil result is encountered
- mark := s.save();
- for _, x := range t {
- if !s.eval(x, value, index) {
- s.restore(mark);
- return false;
- }
- }
- return true;
-
- case literal:
- // write separator, if any
- if s.hasOutput {
- // not the first literal
- if s.separator != nil {
- sep := s.separator; // save current separator
- s.separator = nil; // and disable it (avoid recursion)
- mark := s.save();
- if !s.eval(sep, value, index) {
- s.restore(mark);
- }
- s.separator = sep; // enable it again
- }
- }
- s.hasOutput = true;
- // write literal segments
- for _, lit := range t {
- if len(lit) > 1 && lit[0] == '%' {
- // segment contains a %-format at the beginning
- if lit[1] == '%' {
- // "%%" is printed as a single "%"
- s.Write(lit[1 : len(lit)]);
- } else {
- // use s instead of s.output to get indentation right
- fmt.Fprintf(s, string(lit), value.Interface());
- }
- } else {
- // segment contains no %-formats
- s.Write(lit);
- }
- }
- return true; // a literal never evaluates to nil
-
- case *field:
- // determine field value
- switch t.fieldName {
- case "@":
- // field value is current value
-
- case "*":
- // indirection: operation is type-specific
- switch v := value.(type) {
- case reflect.ArrayValue:
- if v.IsNil() || v.Len() <= index {
- return false;
- }
- value = v.Elem(index);
-
- case reflect.MapValue:
- s.error("reflection support for maps incomplete");
-
- case reflect.PtrValue:
- if v.IsNil() {
- return false;
- }
- value = v.Sub();
-
- case reflect.InterfaceValue:
- if v.IsNil() {
- return false;
- }
- value = v.Value();
-
- case reflect.ChanValue:
- s.error("reflection support for chans incomplete");
-
- case reflect.FuncValue:
- s.error("reflection support for funcs incomplete");
-
- default:
- s.error(fmt.Sprintf("error: * does not apply to `%s`", value.Type().Name()));
- }
-
- default:
- // value is value of named field
- field, _ := getField(value, t.fieldName);
- if field == nil {
- // TODO consider just returning false in this case
- s.error(fmt.Sprintf("error: no field `%s` in `%s`", t.fieldName, value.Type().Name()));
- }
- value = field;
- }
-
- // determine rule
- ruleName := t.ruleName;
- if ruleName == "" {
- // no alternate rule name, value type determines rule
- ruleName = typename(value)
- }
- fexpr = s.getFormat(ruleName);
-
- mark := s.save();
- if !s.eval(fexpr, value, index) {
- s.restore(mark);
- return false;
- }
- return true;
-
- case *group:
- // remember current indentation
- indentLen := s.indent.Len();
-
- // update current indentation
- mark := s.save();
- s.eval(t.indent, value, index);
- // if the indentation evaluates to nil, the state's output buffer
- // didn't change - either way it's ok to append the difference to
- // the current identation
- s.indent.Write(s.output.Data()[mark.outputLen : s.output.Len()]);
- s.restore(mark);
-
- // format group body
- mark = s.save();
- b := true;
- if !s.eval(t.body, value, index) {
- s.restore(mark);
- b = false;
- }
-
- // reset indentation
- s.indent.Truncate(indentLen);
- return b;
-
- case *option:
- // evaluate the body and append the result to the state's output
- // buffer unless the result is nil
- mark := s.save();
- if !s.eval(t.body, value, 0) { // TODO is 0 index correct?
- s.restore(mark);
- }
- return true; // an option never evaluates to nil
-
- case *repetition:
- // evaluate the body and append the result to the state's output
- // buffer until a result is nil
- for i := 0; ; i++ {
- mark := s.save();
- // write separator, if any
- if i > 0 && t.separator != nil {
- // nil result from separator is ignored
- mark := s.save();
- if !s.eval(t.separator, value, i) {
- s.restore(mark);
- }
- }
- if !s.eval(t.body, value, i) {
- s.restore(mark);
- break;
- }
- }
- return true; // a repetition never evaluates to nil
-
- case *custom:
- // invoke the custom formatter to obtain the result
- mark := s.save();
- if !t.fun(s, value.Interface(), t.ruleName) {
- s.restore(mark);
- return false;
- }
- return true;
- }
-
- panic("unreachable");
- return false;
-}
-
-
-// Eval formats each argument according to the format
-// f and returns the resulting []byte and os.Error. If
-// an error occured, the []byte contains the partially
-// formatted result. An environment env may be passed
-// in which is available in custom formatters through
-// the state parameter.
-//
-func (f Format) Eval(env Environment, args ...) ([]byte, os.Error) {
- if f == nil {
- return nil, os.NewError("format is nil");
- }
-
- errors := make(chan os.Error);
- s := newState(f, env, errors);
-
- go func() {
- value := reflect.NewValue(args).(reflect.StructValue);
- for i := 0; i < value.Len(); i++ {
- fld := value.Field(i);
- mark := s.save();
- if !s.eval(s.getFormat(typename(fld)), fld, 0) { // TODO is 0 index correct?
- s.restore(mark);
- }
- }
- errors <- nil; // no errors
- }();
-
- return s.output.Data(), <- errors;
-}
-
-
-// ----------------------------------------------------------------------------
-// Convenience functions
-
-// Fprint formats each argument according to the format f
-// and writes to w. The result is the total number of bytes
-// written and an os.Error, if any.
-//
-func (f Format) Fprint(w io.Writer, env Environment, args ...) (int, os.Error) {
- data, err := f.Eval(env, args);
- if err != nil {
- // TODO should we print partial result in case of error?
- return 0, err;
- }
- return w.Write(data);
-}
-
-
-// Print formats each argument according to the format f
-// and writes to standard output. The result is the total
-// number of bytes written and an os.Error, if any.
-//
-func (f Format) Print(args ...) (int, os.Error) {
- return f.Fprint(os.Stdout, nil, args);
-}
-
-
-// Sprint formats each argument according to the format f
-// and returns the resulting string. If an error occurs
-// during formatting, the result string contains the
-// partially formatted result followed by an error message.
-//
-func (f Format) Sprint(args ...) string {
- var buf io.ByteBuffer;
- n, err := f.Fprint(&buf, nil, args);
- if err != nil {
- fmt.Fprintf(&buf, "--- Sprint(%s) failed: %v", fmt.Sprint(args), err);
- }
- return string(buf.Data());
-}