diff options
author | Robert Griesemer <gri@golang.org> | 2009-04-25 16:36:17 -0700 |
---|---|---|
committer | Robert Griesemer <gri@golang.org> | 2009-04-25 16:36:17 -0700 |
commit | 04e6a56fe22d888ee854cd43328fe2322ed86f18 (patch) | |
tree | 394c8acb4f21de8aca895591c343526ded32b963 | |
parent | f2f98b58e3b2fce70494c709421bc49dfd738642 (diff) | |
download | golang-04e6a56fe22d888ee854cd43328fe2322ed86f18.tar.gz |
daily snapshot:
- more work on template-driven ast formatting
- added preliminary test suite
- added documentation
TBR=r
OCL=27858
CL=27858
-rw-r--r-- | usr/gri/pretty/ast.txt | 22 | ||||
-rw-r--r-- | usr/gri/pretty/format.go | 461 | ||||
-rw-r--r-- | usr/gri/pretty/format_test.go | 95 | ||||
-rw-r--r-- | usr/gri/pretty/pretty.go | 2 |
4 files changed, 397 insertions, 183 deletions
diff --git a/usr/gri/pretty/ast.txt b/usr/gri/pretty/ast.txt index 998352cca..fd9d9302c 100644 --- a/usr/gri/pretty/ast.txt +++ b/usr/gri/pretty/ast.txt @@ -1,26 +1,26 @@ //string = -// "%s" ; +// "%s"; pointer = - ^ ; + *; array = - ^ ; + *; //token.Token = -// "token<%d>" ; // this should be a Go-installed formatter +// "token<%d>"; // this could be a Go-installed formatter ast ; Comments = - "comments\n" ; + "comments\n"; Ident = - Value ; + Value; Program = - "package " Name "\n\n" { Decls "\n\n" } ; + "package " Name "\n\n" {Decls "\n\n"}; GenDecl = Doc @@ -28,13 +28,13 @@ GenDecl = ")\n"; FuncType = - "(" { Params } ")" ; + "(" ")"; BlockStmt = - "{\n" "}\n" ; + "{\n" "}\n"; FuncDecl = - "func " Name Type [ " " Body ] ; + "func " Name Type [" " Body]; Decl = - ^ ;
\ No newline at end of file + ^;
\ No newline at end of file diff --git a/usr/gri/pretty/format.go b/usr/gri/pretty/format.go index d069ef4c8..39fe51935 100644 --- a/usr/gri/pretty/format.go +++ b/usr/gri/pretty/format.go @@ -2,6 +2,22 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. +/* The format package implements syntax-directed formatting of arbitrary + data structures. + + A format specification consists of a set of named productions in EBNF. + The production names correspond to the type names of the data structure + to be printed. The production expressions consist of literal values + (strings), references to fields, and alternative, grouped, optional, + and repetitive sub-expressions. + + When printing a value, its type name is used to lookup the production + to be printed. Literal values are printed as is, field references are + resolved and the respective field value is printed instead (using its + type-specific production), and alternative, grouped, optional, and + repetitive sub-expressions are printed depending on whether they contain + "empty" fields or not. A field is empty if its value is nil. +*/ package format import ( @@ -9,22 +25,20 @@ import ( "go/scanner"; "go/token"; "io"; - "reflect"; "os"; + "reflect"; + "strconv"; ) -// ----------------------------------------------------------------------------- -// Format +// ---------------------------------------------------------------------------- +// Format representation // A production expression is built from the following nodes. // type ( expr interface { - implements_expr(); - }; - - empty struct { + String() string; }; alternative struct { @@ -37,7 +51,7 @@ type ( field struct { name string; // including "^", "*" - format expr; // nil if no format specified + fexpr expr; // nil if no fexpr specified }; literal struct { @@ -61,50 +75,118 @@ type ( ) -// These methods are used to enforce the "implements" relationship for -// better compile-time type checking. -// // TODO If we had a basic accessor mechanism in the language (a field // "f T" automatically implements a corresponding accessor "f() T", this // could be expressed more easily by simply providing the field. // -func (x *empty) implements_expr() {} -func (x *alternative) implements_expr() {} -func (x *sequence) implements_expr() {} -func (x *field) implements_expr() {} -func (x *literal) implements_expr() {} -func (x *option) implements_expr() {} -func (x *repetition) implements_expr() {} -func (x *custom) implements_expr() {} +func (x *alternative) String() string { + return fmt.Sprintf("(%v | %v)", x.x, x.y); +} + + +func (x *sequence) String() string { + return fmt.Sprintf("%v %v", x.x, x.y); +} + + +func (x *field) String() string { + if x.fexpr == nil { + return x.name; + } + return fmt.Sprintf("%s: (%v)", x.name, x.fexpr); +} + + +func (x *literal) String() string { + return strconv.Quote(string(x.value)); +} + + +func (x *option) String() string { + return fmt.Sprintf("[%v]", x.x); +} + + +func (x *repetition) String() string { + return fmt.Sprintf("{%v}", x.x); +} + + +func (x *custom) String() string { + return fmt.Sprintf("<custom %s>", x.name); +} + + +/* A Format is a set of production expressions. A new format is + created explicitly by calling Parse, or implicitly by one of + the Xprintf functions. + + Formatting rules are specified in the following syntax: + + Format = { Production } . + Production = Name [ "=" [ Expression ] ] ";" . + Name = identifier { "." identifier } . + Expression = Term { "|" Term } . + Term = Factor { Factor } . + Factor = string_literal | Field | Group | Option | Repetition . + Field = ( "^" | "*" | Name ) [ ":" Expression ] . + Group = "(" Expression ")" . + Option = "[" Expression "]" . + Repetition = "{" Expression "}" . + + The syntax of white space, comments, identifiers, and string literals is + the same as in Go. + + A production name corresponds to a Go type name of the form + + PackageName.TypeName + + (for instance format.Format). A production of the form + + Name; -// A Format is a set of production expressions. + specifies a package name which is prepended to all subsequent production + names: + + format; + Format = ... // this production matches the type format.Format + + The basic operands of productions are string literals, field names, and + designators. String literals are printed as is, unless they contain a + single %-style format specifier (such as "%d"). In that case, they are + used as the format for fmt.Printf, with the current value as argument. + + The designator "^" stands for the current value; a "*" denotes indirection + (pointers, arrays, maps, and interfaces). + + A field may contain a format specifier of the form + + : Expression + + which specifies the field format irrespective of the field type. + + Default formats are used for types without specific formating rules: + The "%v" format is used for values of all types expect pointer, array, + map, and interface types. They are using the "^" designator. + + TODO complete this description +*/ type Format map [string] expr; -// ----------------------------------------------------------------------------- +// ---------------------------------------------------------------------------- // Parsing /* TODO - - EBNF vs Kleene notation - - default formatters for basic types (may imply scopes so we can override) - installable custom formatters (like for template.go) - - format strings + - have a format to select type name, field tag, field offset? + - use field tag as default format for that field + - field format override (":") is not working as it should + (cannot refer to another production - syntactially not possible + at the moment) */ -/* Format = { Production } . - Production = Name [ "=" [ Expression ] ] ";" . - Name = identifier { "." identifier } . - Expression = Term { "|" Term } . - Term = Factor { Factor } . - Factor = string_literal | Field | Group | Option | Repetition . - Field = ( "^" | "*" | Name ) [ ":" Expression ] . - Group = "(" Expression ")" . - Option = "[" Expression "]" . - Repetition = "{" Expression "}" . -*/ - - type parser struct { scanner scanner.Scanner; @@ -181,52 +263,21 @@ func writeByte(buf *io.ByteBuffer, b byte) { } -// TODO make this complete -func escapeString(s []byte) []byte { - // the string syntax is correct since it comes from the scannner - var buf io.ByteBuffer; - i0 := 0; - for i := 0; i < len(s); { - if s[i] == '\\' { - buf.Write(s[i0 : i]); - i++; - var esc byte; - switch s[i] { - case 'n': esc = '\n'; - case 't': esc = '\t'; - default: panic("unhandled escape:", string(s[i])); - } - writeByte(&buf, esc); - i++; - i0 = i; - } else { - i++; - } - } - - if i0 == 0 { - // no escape sequences - return s; - } - - buf.Write(s[i0 : len(s)]); - return buf.Data(); -} - - func (p *parser) parseValue() []byte { if p.tok != token.STRING { p.expect(token.STRING); - return nil; + return nil; // TODO should return something else? } - s := p.lit[1 : len(p.lit)-1]; // strip quotes - if p.lit[0] == '"' { - s = escapeString(s); + // TODO get rid of back-and-forth conversions + // (change value to string?) + s, err := strconv.Unquote(string(p.lit)); + if err != nil { + panic("scanner error?"); } - + p.next(); - return s; + return io.StringBytes(s); } @@ -244,24 +295,21 @@ func (p *parser) parseField() expr { case token.IDENT: name = p.parseName(); default: - panic("unreachable"); + return nil; } - var format expr; + var fexpr expr; if p.tok == token.COLON { p.next(); - format = p.parseExpr(); + fexpr = p.parseExpr(); } - return &field{name, format}; + return &field{name, fexpr}; } func (p *parser) parseFactor() (x expr) { switch p.tok { - case token.XOR, token.MUL, token.IDENT: - x = p.parseField(); - case token.STRING: x = &literal{p.parseValue()}; @@ -281,8 +329,7 @@ func (p *parser) parseFactor() (x expr) { p.expect(token.RBRACE); default: - p.error_expected(p.pos, "factor"); - p.next(); // make progress + x = p.parseField(); } return x; @@ -291,16 +338,17 @@ func (p *parser) parseFactor() (x expr) { func (p *parser) parseTerm() expr { x := p.parseFactor(); + if x == nil { + p.error_expected(p.pos, "factor"); + p.next(); // make progress + return nil; + } - for p.tok == token.XOR || - p.tok == token.MUL || - p.tok == token.IDENT || - p.tok == token.STRING || - p.tok == token.LPAREN || - p.tok == token.LBRACK || - p.tok == token.LBRACE - { + for { y := p.parseFactor(); + if y == nil { + break; + } x = &sequence{x, y}; } @@ -321,51 +369,37 @@ func (p *parser) parseExpr() expr { } -func (p *parser) parseProduction() (string, expr) { - name := p.parseName(); - - var x expr; - if p.tok == token.ASSIGN { - p.next(); - if p.tok == token.SEMICOLON { - x = &empty{}; - } else { - x = p.parseExpr(); - } - } - - p.expect(token.SEMICOLON); - - return name, x; -} - - func (p *parser) parseFormat() Format { format := make(Format); - + prefix := ""; for p.tok != token.EOF { pos := p.pos; - name, x := p.parseProduction(); - if x == nil { - // prefix declaration - prefix = name + "."; - } else { - // production declaration - // add package prefix, if any - if prefix != "" { - name = prefix + name; + name := p.parseName(); + + if p.tok == token.ASSIGN { + // production + p.next(); + var x expr; + if p.tok != token.SEMICOLON { + x = p.parseExpr(); } // add production to format + name = prefix + name; if t, found := format[name]; !found { format[name] = x; } else { p.Error(pos, "production already declared: " + name); } + + } else { + // prefix only + prefix = name + "."; } + + p.expect(token.SEMICOLON); } - p.expect(token.EOF); - + return format; } @@ -401,6 +435,12 @@ func readSource(src interface{}, err scanner.ErrorHandler) []byte { } +// TODO do better error handling + +// Parse parses a set of format productions. The format src may be +// a string, a []byte, or implement io.Read. The result is a Format +// if no errors occured; otherwise Parse returns nil. +// func Parse(src interface{}) Format { // initialize parser var p parser; @@ -416,8 +456,8 @@ func Parse(src interface{}) Format { } -// ----------------------------------------------------------------------------- -// Application +// ---------------------------------------------------------------------------- +// Formatting func fieldIndex(v reflect.StructValue, fieldname string) int { t := v.Type().(reflect.StructType); @@ -479,14 +519,25 @@ func typename(value reflect.Value) string { } -var defaultFormat = &literal{io.StringBytes("%v")}; +var defaults = map [int] expr { + reflect.ArrayKind: &field{"*", nil}, + reflect.MapKind: &field{"*", nil}, + reflect.PtrKind: &field{"*", nil}, +} + +var catchAll = &literal{io.StringBytes("%v")}; func (f Format) getFormat(value reflect.Value) expr { - if format, found := f[typename(value)]; found { - return format; + if fexpr, found := f[typename(value)]; found { + return fexpr; + } + + // no fexpr found - return kind-specific default value, if any + if fexpr, found := defaults[value.Kind()]; found { + return fexpr; } - // no format found - return defaultFormat; + + return catchAll; } @@ -518,78 +569,121 @@ func printf(w io.Write, format []byte, value reflect.Value) { // Returns true if a non-empty field value was found. -func (f Format) print(w io.Write, format expr, value reflect.Value, index int) bool { - switch t := format.(type) { - case *empty: +func (f Format) print(w io.Write, fexpr expr, value reflect.Value, index int) bool { + debug := false; // enable for debugging + if debug { + fmt.Printf("print(%v, = %v, %v, %d)\n", w, fexpr, value.Interface(), index); + } + + if fexpr == nil { return true; + } + switch t := fexpr.(type) { case *alternative: - // print the contents of the first alternative with a non-empty field + // - print the contents of the first alternative with a non-empty field + // - result is true if there is at least one non-empty field + b := false; var buf io.ByteBuffer; - b := f.print(&buf, t.x, value, index); - if !b { - b = f.print(&buf, t.y, value, index); - } - if b { + if f.print(&buf, t.x, value, index) { w.Write(buf.Data()); + b = true; + } else { + buf.Reset(); + if f.print(&buf, t.y, value, 0) { + w.Write(buf.Data()); + b = true; + } } - return index < 0 || b; + return b; case *sequence: + // - print the contents of the sequence + // - result is true if there is no empty field + // TODO do we need to buffer here? why not? b1 := f.print(w, t.x, value, index); b2 := f.print(w, t.y, value, index); - return index < 0 || b1 && b2; + return b1 && b2; case *field: - var x reflect.Value; + // - print the contents of the field + // - format is either the field format or the type-specific format + // - TODO look at field tag for default format + // - result is true if the field is not empty switch t.name { case "^": - if v, is_ptr := value.(reflect.PtrValue); is_ptr { + // identity - value doesn't change + + case "*": + // indirect + switch v := value.(type) { + case reflect.PtrValue: if v.Get() == nil { return false; } - x = v.Sub(); - } else if v, is_array := value.(reflect.ArrayValue); is_array { + value = v.Sub(); + + case reflect.ArrayValue: if index < 0 || v.Len() <= index { return false; } - x = v.Elem(index); - } else if v, is_interface := value.(reflect.InterfaceValue); is_interface { + value = v.Elem(index); + + case reflect.MapValue: + panic("reflection support for maps incomplete"); + + case reflect.InterfaceValue: if v.Get() == nil { return false; } - x = v.Value(); - } else { - panic("not a ptr, array, or interface"); // TODO fix this + value = v.Value(); + + default: + panic("not a ptr, array, map, or interface"); // TODO fix this } - case "*": - x = value; + default: - if v, is_struct := value.(reflect.StructValue); is_struct { - x = getField(v, t.name); + // field + if s, is_struct := value.(reflect.StructValue); is_struct { + value = getField(s, t.name); } else { panic ("not a struct"); // TODO fix this } } - format = t.format; - if format == nil { - format = f.getFormat(x); + + // determine format + fexpr = t.fexpr; + if fexpr == nil { + // no field format - use type-specific format + fexpr = f.getFormat(value); + } + + return f.print(w, fexpr, value, index); + // BUG (6g?) crash with code below + /* + var buf io.ByteBuffer; + if f.print(&buf, fexpr, value, index) { + w.Write(buf.Data()); + return true; } - b := f.print(w, format, x, index); - return index < 0 || b; + return false; + */ case *literal: + // - print the literal + // - result is always true (literal is never empty) printf(w, t.value, value); return true; case *option: - // print the contents of the option if there is a non-empty field + // print the contents of the option if it contains a non-empty field + //var foobar bool; // BUG w/o this declaration the code works!!! var buf io.ByteBuffer; - b := f.print(&buf, t.x, value, -1); - if b { + if f.print(&buf, t.x, value, 0) { w.Write(buf.Data()); + return true; } - return index < 0 || b; + return false; case *repetition: // print the contents of the repetition while there is a non-empty field @@ -603,19 +697,44 @@ func (f Format) print(w io.Write, format expr, value reflect.Value, index int) b break; } } - return index < 0 || b; + return b; case *custom: - b := t.f(w, value.Interface(), t.name); - return index < 0 || b; + return t.f(w, value.Interface(), t.name); } - + panic("unreachable"); return false; } -func (f Format) Apply(w io.Write, data interface{}) { - value := reflect.NewValue(data); - f.print(w, f.getFormat(value), value, -1); +// TODO proper error reporting + +// Fprint formats each argument according to the format f +// and writes to w. +// +func (f Format) Fprint(w io.Write, args ...) { + value := reflect.NewValue(args).(reflect.StructValue); + for i := 0; i < value.Len(); i++ { + fld := value.Field(i); + f.print(w, f.getFormat(fld), fld, -1); + } +} + + +// Fprint formats each argument according to the format f +// and writes to standard output. +// +func (f Format) Print(args ...) { + f.Print(os.Stdout, args); +} + + +// Fprint formats each argument according to the format f +// and returns the resulting string. +// +func (f Format) Sprint(args ...) string { + var buf io.ByteBuffer; + f.Fprint(&buf, args); + return string(buf.Data()); } diff --git a/usr/gri/pretty/format_test.go b/usr/gri/pretty/format_test.go new file mode 100644 index 000000000..e2948cc34 --- /dev/null +++ b/usr/gri/pretty/format_test.go @@ -0,0 +1,95 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package format + +import ( + "format"; + "testing"; +) + + +func check(t *testing.T, form, expected string, args ...) { + result := format.Parse(form).Sprint(args); + if result != expected { + t.Errorf( + "format : %s\nresult : %s\nexpected: %s\n\n", + form, result, expected + ) + } +} + + +// ---------------------------------------------------------------------------- +// - formatting of basic type int + +const F0 = + `int = "0x%x";` + +func Test0(t *testing.T) { + check(t, F0, "0x2a", 42); +} + + +// ---------------------------------------------------------------------------- +// - default formatting of basic type int +// - formatting of a struct + +type T1 struct { + a int; +} + +const F1 = + `format.T1 = "<" a ">";` + +func Test1(t *testing.T) { + check(t, F1, "<42>", T1{42}); +} + + +// ---------------------------------------------------------------------------- +// - formatting of a struct with an optional field (pointer) +// - default formatting for pointers + +type T2 struct { + s string; + p *T1; +} + +const F2a = + F1 + + `pointer = *;` + `format.T2 = s ["-" p "-"];`; + +const F2b = + F1 + + `format.T2 = s ("-" p "-" | "empty");`; + +func Test2(t *testing.T) { + check(t, F2a, "foo", T2{"foo", nil}); + check(t, F2a, "bar-<17>-", T2{"bar", &T1{17}}); + check(t, F2b, "fooempty", T2{"foo", nil}); +} + + +// ---------------------------------------------------------------------------- +// - formatting of a struct with a repetitive field (slice) + +type T3 struct { + s string; + a []int; +} + +const F3a = + `format.T3 = s { " " a a "," };`; + +const F3b = + `format.T3 = [a:""] s | "nothing";`; // use 'a' to select alternative w/o printing a + +func Test3(t *testing.T) { + check(t, F3a, "foo", T3{"foo", nil}); + check(t, F3a, "foo 00, 11, 22,", T3{"foo", []int{0, 1, 2}}); + //check(t, F3b, "nothing", T3{"bar", nil}); // TODO fix this + check(t, F3b, "bar", T3{"bar", []int{0}}); +} diff --git a/usr/gri/pretty/pretty.go b/usr/gri/pretty/pretty.go index 83d0bf1b9..b37f80bfb 100644 --- a/usr/gri/pretty/pretty.go +++ b/usr/gri/pretty/pretty.go @@ -135,7 +135,7 @@ func main() { if ok && !*silent { tw := makeTabwriter(os.Stdout); if *formatter { - ast_format.Apply(tw, prog); + ast_format.Fprint(tw, prog); } else { var p astPrinter.Printer; p.Init(tw, nil, nil /*prog.Comments*/, false); |