1 files changed, 786 insertions, 0 deletions
diff --git a/src/lib/datafmt/datafmt.go b/src/lib/datafmt/datafmt.go
new file mode 100644
index 000000000..baeb3ac41
--- /dev/null
+++ b/src/lib/datafmt/datafmt.go
@@ -0,0 +1,786 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+/*	The datafmt package implements syntax-directed, type-driven formatting
+	of arbitrary data structures. Formatting a data structure consists of
+	two phases: first, a parser reads a format specification and builds a
+	"compiled" format. Then, the format can be applied repeatedly to
+	arbitrary values. Applying a format to a value evaluates to a []byte
+	containing the formatted value bytes, or nil.
+
+	A format specification is a set of package declarations and format rules:
+
+		Format      = [ Entry { ";" Entry } [ ";" ] ] .
+		Entry       = PackageDecl | FormatRule . 
+
+	(The syntax of a format specification is presented in the same EBNF
+	notation as used in the Go language specification. The syntax of white
+	space, comments, identifiers, and string literals is the same as in Go.)
+
+	A package declaration binds a package name (such as 'ast') to a
+	package import path (such as '"go/ast"'). Each package used (in
+	a type name, see below) must be declared once before use.
+
+		PackageDecl = PackageName ImportPath .
+		PackageName = identifier .
+		ImportPath  = string .
+
+	A format rule binds a rule name to a format expression. A rule name
+	may be a type name or one of the special names 'default' or '/'.
+	A type name may be the name of a predeclared type (for example, 'int',
+	'float32', etc.), the package-qualified name of a user-defined type
+	(for example, 'ast.MapType'), or an identifier indicating the structure
+	of unnamed composite types ('array', 'chan', 'func', 'interface', 'map',
+	or 'ptr'). Each rule must have a unique name; rules can be declared in
+	any order.
+
+		FormatRule  = RuleName "=" Expression .
+		RuleName    = TypeName | "default" | "/" .
+		TypeName    = [ PackageName "." ] identifier .
+
+	To format a value, the value's type name is used to select the format rule
+	(there is an override mechanism, see below). The format expression of the
+	selected rule specifies how the value is formatted. Each format expression,
+	when applied to a value, evaluates to a byte sequence or nil.
+
+	In its most general form, a format expression is a list of alternatives,
+	each of which is a sequence of operands:
+
+		Expression  = [ Sequence ] { "|" [ Sequence ] } .
+		Sequence    = Operand { Operand } .
+
+	The formatted result produced by an expression is the result of the first
+	alternative sequence that evaluates to a non-nil result; if there is no
+	such alternative, the expression evaluates to nil. The result produced by
+	an operand sequence is the concatenation of the results of its operands.
+	If any operand in the sequence evaluates to nil, the entire sequence
+	evaluates to nil.
+
+	There are five kinds of operands:
+
+		Operand     = Literal | Field | Group | Option | Repetition .
+
+	Literals evaluate to themselves, with two substitutions. First,
+	%-formats expand in the manner of fmt.Printf, with the current value
+	passed as the parameter. Second, the current indentation (see below)
+	is inserted after every newline character.
+
+		Literal     = string .
+
+	This table shows string literals applied to the value 42 and the
+	corresponding formatted result:
+
+		"foo"       foo
+		"%x"        2a
+		"x = %d"    x = 42
+		"%#x = %d"  0x2a = 42
+
+	A field operand is a field name optionally followed by an alternate
+	rule name. The field name may be an identifier or one of the special
+	names ^ or *.
+
+		Field       = FieldName [ ":" RuleName ] .
+		FieldName   = identifier | "^" | "*" .
+
+	If the field name is an identifier, the current value must be a struct,
+	and there must be a field with that name in the struct. The same lookup
+	rules apply as in the Go language (for instance, the name of an anonymous
+	field is the unqualified type name). The field name denotes the field
+	value in the struct. If the field is not found, formatting is aborted
+	and an error message is returned. (TODO consider changing the semantics
+	such that if a field is not found, it evaluates to nil).
+
+	The special name '^' denotes the current value. (TODO see if ^ can
+	change to @ or be eliminated).
+
+	The meaning of the special name '*' depends on the type of the current
+	value:
+
+		array, slice types   array, slice element (inside {} only, see below)
+		interfaces           value stored in interface
+		pointers             value pointed to by pointer
+
+	(Implementation restriction: channel, function and map types are not
+	supported due to missing reflection support).
+
+	Fields are evaluated as follows: If the field value is nil, or an array
+	or slice element does not exist, the result is nil (see below for details
+	on array/slice elements). If the value is not nil the field value is
+	formatted (recursively) using the rule corresponding to its type name,
+	or the alternate rule name, if given.
+
+	The following example shows a complete format specification for a
+	struct 'myPackage.Point'. Assume the package
+	
+		package myPackage  // in directory myDir/myPackage
+		type Point struct {
+			name string;
+			x, y int;
+		}
+
+	Applying the format specification
+
+		myPackage "myDir/myPackage";
+		int = "%d";
+		hexInt = "0x%x";
+		string = "---%s---";
+		myPackage.Point = name "{" x ", " y:hexInt "}";
+
+	to the value myPackage.Point{"foo", 3, 15} results in
+
+		---foo---{3, 0xf}
+
+	Finally, an operand may be a grouped, optional, or repeated expression.
+	A grouped expression ("group") groups a more complex expression (body)
+	so that it can be used in place of a single operand:
+
+		Group       = "(" [ Indentation ">>" ] Body ")" .
+		Indentation = Expression .
+		Body        = Expression .
+
+	A group body may be prefixed by an indentation expression followed by '>>'.
+	The indentation expression is applied to the current value like any other
+	expression and the result, if not nil, is appended to the current indentation
+	during the evaluation of the body (see also formatting state, below).
+
+	An optional expression ("option") is enclosed in '[]' brackets.
+
+		Option      = "[" Body "]" .
+
+	An option evaluates to its body, except that if the body evaluates to nil,
+	the option expression evaluates to an empty []byte. Thus an option's purpose
+	is to protect the expression containing the option from a nil operand.
+
+	A repeated expression ("repetition") is enclosed in '{}' braces.
+
+		Repetition  = "{" Body [ "/" Separator ] "}" .
+		Separator   = Expression .
+	
+	A repeated expression is evaluated as follows: The body is evaluated
+	repeatedly and its results are concatenated until the body evaluates
+	to nil. The result of the repetition is the (possibly empty) concatenation,
+	but it is never nil. An implicit index is supplied for the evaluation of
+	the body: that index is used to address elements of arrays or slices. If
+	the corresponding elements do not exist, the field denoting the element
+	evaluates to nil (which in turn may terminate the repetition).
+
+	The body of a repetition may be followed by a '/' and a "separator"
+	expression. If the separator is present, it is invoked between repetitions
+	of the body.
+
+	The following example shows a complete format specification for formatting
+	a slice of unnamed type. Applying the specification
+
+		int = "%b";
+		array = { * / ", " };  // array is the type name for an unnamed slice
+
+	to the value '[]int{2, 3, 5, 7}' results in
+
+		10, 11, 101, 111
+
+	Default rule: If a format rule named 'default' is present, it is used for
+	formatting a value if no other rule was found. A common default rule is
+
+		default = "%v"
+
+	to provide default formatting for basic types without having to specify
+	a specific rule for each basic type.
+
+	Global separator rule: If a format rule named '/' is present, it is
+	invoked with the current value between literals. If the separator
+	expression evaluates to nil, it is ignored.
+
+	For instance, a global separator rule may be used to punctuate a sequence
+	of values with commas. The rules:
+
+		default = "%v";
+		/ = ", ";
+
+	will format an argument list by printing each one in its default format,
+	separated by a comma and a space.
+*/
+package datafmt
+
+import (
+	"container/vector";
+	"fmt";
+	"go/token";
+	"io";
+	"os";
+	"reflect";
+	"runtime";
+	"strconv";
+	"strings";
+)
+
+
+// ----------------------------------------------------------------------------
+// Format representation
+
+type State struct
+
+// Custom formatters implement the Formatter function type.
+// A formatter is invoked with the current formatting state, the
+// value to format, and the rule name under which the formatter
+// was installed (the same formatter function may be installed
+// under different names). The formatter may access the current state
+// to guide formatting and use State.Write to append to the state's
+// output.
+//
+// A formatter must return a boolean value indicating if it evaluated
+// to a non-nil value (true), or a nil value (false).
+//
+type Formatter func(state *State, value interface{}, ruleName string) bool
+
+
+// A FormatterMap is a set of custom formatters.
+// It maps a rule name to a formatter function.
+//
+type FormatterMap map [string] Formatter;
+
+
+// A parsed format expression is built from the following nodes.
+//
+type (
+	expr interface {};
+
+	alternatives []expr;  // x | y | z
+
+	sequence []expr;  // x y z
+
+	literal [][]byte;  // a list of string segments, possibly starting with '%'
+
+	field struct {
+		fieldName string;  // including "^", "*"
+		ruleName string;  // "" if no rule name specified
+	};
+
+	group struct {
+		indent, body expr;  // (indent >> body)
+	};
+
+	option struct {
+		body expr;  // [body]
+	};
+
+	repetition struct {
+		body, separator expr;  // {body / separator}
+	};
+
+	custom struct {
+		ruleName string;
+		fun Formatter
+	};
+)
+
+
+// A Format is the result of parsing a format specification.
+// The format may be applied repeatedly to format values.
+//
+type Format map [string] expr;
+
+
+// ----------------------------------------------------------------------------
+// Formatting
+
+// An application-specific environment may be provided to Format.Apply;
+// the environment is available inside custom formatters via State.Env().
+// Environments must implement copying; the Copy method must return an
+// complete copy of the receiver. This is necessary so that the formatter
+// can save and restore an environment (in case of an absent expression).
+//
+// If the Environment doesn't change during formatting (this is under
+// control of the custom formatters), the Copy function can simply return
+// the receiver, and thus can be very light-weight.
+//
+type Environment interface {
+	Copy() Environment
+}
+
+
+// State represents the current formatting state.
+// It is provided as argument to custom formatters.
+//
+type State struct {
+	fmt Format;  // format in use
+	env Environment;  // user-supplied environment
+	errors chan os.Error;  // not chan *Error (errors <- nil would be wrong!)
+	hasOutput bool;  // true after the first literal has been written
+	indent io.ByteBuffer;  // current indentation
+	output io.ByteBuffer;  // format output
+	linePos token.Position;  // position of line beginning (Column == 0)
+	default_ expr;  // possibly nil
+	separator expr;  // possibly nil
+}
+
+
+func newState(fmt Format, env Environment, errors chan os.Error) *State {
+	s := new(State);
+	s.fmt = fmt;
+	s.env = env;
+	s.errors = errors;
+	s.linePos = token.Position{Line: 1};
+
+	// if we have a default rule, cache it's expression for fast access
+	if x, found := fmt["default"]; found {
+		s.default_ = x;
+	}
+
+	// if we have a global separator rule, cache it's expression for fast access
+	if x, found := fmt["/"]; found {
+		s.separator = x;
+	}
+
+	return s;
+}
+
+
+// Env returns the environment passed to Format.Apply.
+func (s *State) Env() interface{} {
+	return s.env;
+}
+
+
+// LinePos returns the position of the current line beginning
+// in the state's output buffer. Line numbers start at 1.
+//
+func (s *State) LinePos() token.Position {
+	return s.linePos;
+}
+
+
+// Pos returns the position of the next byte to be written to the
+// output buffer. Line numbers start at 1.
+//
+func (s *State) Pos() token.Position {
+	offs := s.output.Len();
+	return token.Position{Line: s.linePos.Line, Column: offs - s.linePos.Offset, Offset: offs};
+}
+
+
+// Write writes data to the output buffer, inserting the indentation
+// string after each newline. It cannot return an error.
+//
+func (s *State) Write(data []byte) (int, os.Error) {
+	n := 0;
+	i0 := 0;
+	for i, ch := range data {
+		if ch == '\n' {
+			// write text segment and indentation
+			n1, _ := s.output.Write(data[i0 : i+1]);
+			n2, _ := s.output.Write(s.indent.Data());
+			n += n1 + n2;
+			i0 = i + 1;
+			s.linePos.Offset = s.output.Len();
+			s.linePos.Line++;
+		}
+	}
+	n3, _ := s.output.Write(data[i0 : len(data)]);
+	return n + n3, nil;
+}
+
+
+type checkpoint struct {
+	env Environment;
+	hasOutput bool;
+	outputLen int;
+	linePos token.Position;
+}
+
+
+func (s *State) save() checkpoint {
+	saved := checkpoint{nil, s.hasOutput, s.output.Len(), s.linePos};
+	if s.env != nil {
+		saved.env = s.env.Copy();
+	}
+	return saved;
+}
+
+
+func (s *State) restore(m checkpoint) {
+	s.env = m.env;
+	s.output.Truncate(m.outputLen);
+}
+
+
+func (s *State) error(msg string) {
+	s.errors <- os.NewError(msg);
+	runtime.Goexit();
+}
+
+
+// getField searches in val, which must be a struct, for a field
+// with the given name. It returns the value and the embedded depth
+// where it was found.
+//
+func getField(val reflect.Value, fieldname string) (reflect.Value, int) {
+	// do we have a struct in the first place?
+	if val.Kind() != reflect.StructKind {
+		return nil, 0;
+	}
+	
+	sval, styp := val.(reflect.StructValue), val.Type().(reflect.StructType);
+
+	// look for field at the top level
+	for i := 0; i < styp.Len(); i++ {
+		name, typ, tag, offset := styp.Field(i);
+		if name == fieldname || name == "" && strings.HasSuffix(typ.Name(), "." + fieldname) /* anonymous field */ {
+			return sval.Field(i), 0;
+		}
+	}
+
+	// look for field in anonymous fields
+	var field reflect.Value;
+	level := 1000;  // infinity (no struct has that many levels)
+	for i := 0; i < styp.Len(); i++ {
+		name, typ, tag, offset := styp.Field(i);
+		if name == "" {
+			f, l := getField(sval.Field(i), fieldname);
+			// keep the most shallow field
+			if f != nil {
+				switch {
+				case l < level:
+					field, level = f, l;
+				case l == level:
+					// more than one field at the same level,
+					// possibly an error unless there is a more
+					// shallow field found later
+					field = nil;
+				}
+			}
+		}
+	}
+	
+	return field, level + 1;
+}
+
+
+// TODO At the moment, unnamed types are simply mapped to the default
+//      names below. For instance, all unnamed arrays are mapped to
+//      'array' which is not really sufficient. Eventually one may want
+//      to be able to specify rules for say an unnamed slice of T.
+//
+var defaultNames = map[int]string {
+	reflect.ArrayKind: "array",
+	reflect.BoolKind: "bool",
+	reflect.ChanKind: "chan",
+	reflect.DotDotDotKind: "ellipsis",
+	reflect.FloatKind: "float",
+	reflect.Float32Kind: "float32",
+	reflect.Float64Kind: "float64",
+	reflect.FuncKind: "func",
+	reflect.IntKind: "int",
+	reflect.Int16Kind: "int16",
+	reflect.Int32Kind: "int32",
+	reflect.Int64Kind: "int64",
+	reflect.Int8Kind: "int8",
+	reflect.InterfaceKind: "interface",
+	reflect.MapKind: "map",
+	reflect.PtrKind: "ptr",
+	reflect.StringKind: "string",
+	reflect.StructKind: "struct",
+	reflect.UintKind: "uint",
+	reflect.Uint16Kind: "uint16",
+	reflect.Uint32Kind: "uint32",
+	reflect.Uint64Kind: "uint64",
+	reflect.Uint8Kind: "uint8",
+	reflect.UintptrKind: "uintptr",
+}
+
+
+func typename(value reflect.Value) string {
+	name := value.Type().Name();
+	if name == "" {
+		if defaultName, found := defaultNames[value.Kind()]; found {
+			name = defaultName;
+		}
+	}
+	return name;
+}
+
+
+func (s *State) getFormat(name string) expr {
+	if fexpr, found := s.fmt[name]; found {
+		return fexpr;
+	}
+
+	if s.default_ != nil {
+		return s.default_;
+	}
+
+	s.error(fmt.Sprintf("no format rule for type: '%s'", name));
+	return nil;
+}
+
+
+// eval applies a format expression fexpr to a value. If the expression
+// evaluates internally to a non-nil []byte, that slice is appended to
+// the state's output buffer and eval returns true. Otherwise, eval
+// returns false and the state remains unchanged.
+//
+func (s *State) eval(fexpr expr, value reflect.Value, index int) bool {
+	// an empty format expression always evaluates
+	// to a non-nil (but empty) []byte
+	if fexpr == nil {
+		return true;
+	}
+
+	switch t := fexpr.(type) {
+	case alternatives:
+		// append the result of the first alternative that evaluates to
+		// a non-nil []byte to the state's output
+		mark := s.save();
+		for _, x := range t {
+			if s.eval(x, value, index) {
+				return true;
+			}
+			s.restore(mark);
+		}
+		return false;
+
+	case sequence:
+		// append the result of all operands to the state's output
+		// unless a nil result is encountered
+		mark := s.save();
+		for _, x := range t {
+			if !s.eval(x, value, index) {
+				s.restore(mark);
+				return false;
+			}
+		}
+		return true;
+
+	case literal:
+		// write separator, if any
+		if s.hasOutput {
+			// not the first literal
+			if s.separator != nil {
+				sep := s.separator;  // save current separator
+				s.separator = nil;  // and disable it (avoid recursion)
+				mark := s.save();
+				if !s.eval(sep, value, index) {
+					s.restore(mark);
+				}
+				s.separator = sep;  // enable it again
+			}
+		}
+		s.hasOutput = true;
+		// write literal segments
+		for _, lit := range t {
+			if lit[0] == '%' && len(lit) > 1 {
+				// segment contains a %-format at the beginning
+				if lit[1] == '%' {
+					// "%%" is printed as a single "%"
+					s.Write(lit[1 : len(lit)]);
+				} else {
+					// use s instead of s.output to get indentation right
+					fmt.Fprintf(s, string(lit), value.Interface());
+				}
+			} else {
+				// segment contains no %-formats
+				s.Write(lit);
+			}
+		}
+		return true;  // a literal never evaluates to nil
+
+	case *field:
+		// determine field value
+		switch t.fieldName {
+		case "^":
+			// field value is current value
+
+		case "*":
+			// indirection: operation is type-specific
+			switch v := value.(type) {
+			case reflect.ArrayValue:
+				if v.IsNil() || v.Len() <= index {
+					return false;
+				}
+				value = v.Elem(index);
+
+			case reflect.MapValue:
+				s.error("reflection support for maps incomplete");
+
+			case reflect.PtrValue:
+				if v.IsNil() {
+					return false;
+				}
+				value = v.Sub();
+
+			case reflect.InterfaceValue:
+				if v.IsNil() {
+					return false;
+				}
+				value = v.Value();
+
+			case reflect.ChanValue:
+				s.error("reflection support for chans incomplete");
+
+			case reflect.FuncValue:
+				s.error("reflection support for funcs incomplete");
+
+			default:
+				s.error(fmt.Sprintf("error: * does not apply to `%s`", value.Type().Name()));
+			}
+
+		default:
+			// value is value of named field
+			field, _ := getField(value, t.fieldName);
+			if field == nil {
+				// TODO consider just returning false in this case
+				s.error(fmt.Sprintf("error: no field `%s` in `%s`", t.fieldName, value.Type().Name()));
+			}
+			value = field;
+		}
+
+		// determine rule
+		ruleName := t.ruleName;
+		if ruleName == "" {
+			// no alternate rule name, value type determines rule
+			ruleName = typename(value)
+		}
+		fexpr = s.getFormat(ruleName);
+
+		mark := s.save();
+		if !s.eval(fexpr, value, index) {
+			s.restore(mark);
+			return false;
+		}
+		return true;
+
+	case *group:
+		// remember current indentation
+		indentLen := s.indent.Len();
+
+		// update current indentation
+		mark := s.save();
+		s.eval(t.indent, value, index);
+		// if the indentation evaluates to nil, the state's output buffer
+		// didn't change - either way it's ok to append the difference to
+		// the current identation
+		s.indent.Write(s.output.Data()[mark.outputLen : s.output.Len()]);
+		s.restore(mark);
+
+		// format group body
+		mark = s.save();
+		b := true;
+		if !s.eval(t.body, value, index) {
+			s.restore(mark);
+			b = false;
+		}
+		
+		// reset indentation
+		s.indent.Truncate(indentLen);
+		return b;
+
+	case *option:
+		// evaluate the body and append the result to the state's output
+		// buffer unless the result is nil
+		mark := s.save();
+		if !s.eval(t.body, value, 0) {  // TODO is 0 index correct?
+			s.restore(mark);
+		}
+		return true;  // an option never evaluates to nil
+
+	case *repetition:
+		// evaluate the body and append the result to the state's output
+		// buffer until a result is nil
+		for i := 0; ; i++ {
+			mark := s.save();
+			// write separator, if any
+			if i > 0 && t.separator != nil {
+				// nil result from separator is ignored
+				mark := s.save();
+				if !s.eval(t.separator, value, i) {
+					s.restore(mark);
+				}
+			}
+			if !s.eval(t.body, value, i) {
+				s.restore(mark);
+				break;
+			}
+		}
+		return true;  // a repetition never evaluates to nil
+
+	case *custom:
+		// invoke the custom formatter to obtain the result
+		mark := s.save();
+		if !t.fun(s, value.Interface(), t.ruleName) {
+			s.restore(mark);
+			return false;
+		}
+		return true;
+	}
+
+	panic("unreachable");
+	return false;
+}
+
+
+// Eval formats each argument according to the format
+// f and returns the resulting []byte and os.Error. If
+// an error occured, the []byte contains the partially
+// formatted result. An environment env may be passed
+// in which is available in custom formatters through
+// the state parameter.
+//
+func (f Format) Eval(env Environment, args ...) ([]byte, os.Error) {
+	errors := make(chan os.Error);
+	s := newState(f, env, errors);
+
+	go func() {
+		value := reflect.NewValue(args).(reflect.StructValue);
+		for i := 0; i < value.Len(); i++ {
+			fld := value.Field(i);
+			mark := s.save();
+			if !s.eval(s.getFormat(typename(fld)), fld, 0) {  // TODO is 0 index correct?
+				s.restore(mark);
+			}
+		}
+		errors <- nil;  // no errors
+	}();
+
+	return s.output.Data(), <- errors;
+}
+
+
+// ----------------------------------------------------------------------------
+// Convenience functions
+
+// Fprint formats each argument according to the format f
+// and writes to w. The result is the total number of bytes
+// written and an os.Error, if any.
+//
+func (f Format) Fprint(w io.Writer, env Environment, args ...) (int, os.Error) {
+	data, err := f.Eval(env, args);
+	if err != nil {
+		// TODO should we print partial result in case of error?
+		return 0, err;
+	}
+	return w.Write(data);
+}
+
+
+// Print formats each argument according to the format f
+// and writes to standard output. The result is the total
+// number of bytes written and an os.Error, if any.
+//
+func (f Format) Print(args ...) (int, os.Error) {
+	return f.Fprint(os.Stdout, nil, args);
+}
+
+
+// Sprint formats each argument according to the format f
+// and returns the resulting string. If an error occurs
+// during formatting, the result string contains the
+// partially formatted result followed by an error message.
+//
+func (f Format) Sprint(args ...) string {
+	var buf io.ByteBuffer;
+	n, err := f.Fprint(&buf, nil, args);
+	if err != nil {
+		fmt.Fprintf(&buf, "--- Sprint(%s) failed: %v", fmt.Sprint(args), err);
+	}
+	return string(buf.Data());
+}