// Copyright 2009 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. // Annotate Crefs in Prog with C types by parsing gcc debug output. // Conversion of debug output to Go types. package main import ( "bytes" "debug/dwarf" "debug/elf" "debug/macho" "fmt" "go/ast" "go/parser" "go/token" "os" "strconv" "strings" ) func (p *Prog) loadDebugInfo() { var b bytes.Buffer b.WriteString(p.Preamble) stdout := p.gccPostProc(b.Bytes()) defines := make(map[string]string) for _, line := range strings.Split(stdout, "\n", -1) { if len(line) < 9 || line[0:7] != "#define" { continue } line = strings.TrimSpace(line[8:]) var key, val string spaceIndex := strings.Index(line, " ") tabIndex := strings.Index(line, "\t") if spaceIndex == -1 && tabIndex == -1 { continue } else if tabIndex == -1 || (spaceIndex != -1 && spaceIndex < tabIndex) { key = line[0:spaceIndex] val = strings.TrimSpace(line[spaceIndex:]) } else { key = line[0:tabIndex] val = strings.TrimSpace(line[tabIndex:]) } // Only allow string, character, and numeric constants. Ignoring #defines for // symbols allows those symbols to be referenced in Go, as they will be // translated by gcc later. _, err := strconv.Atoi(string(val[0])) if err == nil || val[0] == '\'' || val[0] == '"' { defines[key] = val } } // Construct a slice of unique names from p.Crefs. m := make(map[string]int) for _, c := range p.Crefs { // If we've already found this name as a define, it is not a Cref. if val, ok := defines[c.Name]; ok { _, err := parser.ParseExpr("", val, nil) if err != nil { fmt.Fprintf(os.Stderr, "The value in C.%s does not parse as a Go expression; cannot use.\n", c.Name) os.Exit(2) } c.Context = "const" c.TypeName = false p.Constdef[c.Name] = val continue } m[c.Name] = -1 } names := make([]string, 0, len(m)) for name, _ := range m { i := len(names) names = names[0 : i+1] names[i] = name m[name] = i } // Coerce gcc into telling us whether each name is // a type, a value, or undeclared. We compile a function // containing the line: // name; // If name is a type, gcc will print: // x.c:2: warning: useless type name in empty declaration // If name is a value, gcc will print // x.c:2: warning: statement with no effect // If name is undeclared, gcc will print // x.c:2: error: 'name' undeclared (first use in this function) // A line number directive causes the line number to // correspond to the index in the names array. b.Reset() b.WriteString(p.Preamble) b.WriteString("void f(void) {\n") b.WriteString("#line 0 \"cgo-test\"\n") for _, n := range names { b.WriteString(n) b.WriteString(";\n") } b.WriteString("}\n") kind := make(map[string]string) _, stderr := p.gccDebug(b.Bytes()) if stderr == "" { fatal("gcc produced no output") } for _, line := range strings.Split(stderr, "\n", -1) { if len(line) < 9 || line[0:9] != "cgo-test:" { continue } line = line[9:] colon := strings.Index(line, ":") if colon < 0 { continue } i, err := strconv.Atoi(line[0:colon]) if err != nil { continue } what := "" switch { default: continue case strings.Index(line, ": useless type name in empty declaration") >= 0: what = "type" case strings.Index(line, ": statement with no effect") >= 0: what = "value" case strings.Index(line, "undeclared") >= 0: what = "error" } if old, ok := kind[names[i]]; ok && old != what { error(noPos, "inconsistent gcc output about C.%s", names[i]) } kind[names[i]] = what } for _, n := range names { if _, ok := kind[n]; !ok { error(noPos, "could not determine kind of name for C.%s", n) } } if nerrors > 0 { fatal("failed to interpret gcc output:\n%s", stderr) } // Extract the types from the DWARF section of an object // from a well-formed C program. Gcc only generates DWARF info // for symbols in the object file, so it is not enough to print the // preamble and hope the symbols we care about will be there. // Instead, emit // typeof(names[i]) *__cgo__i; // for each entry in names and then dereference the type we // learn for __cgo__i. b.Reset() b.WriteString(p.Preamble) for i, n := range names { fmt.Fprintf(&b, "typeof(%s) *__cgo__%d;\n", n, i) } d, stderr := p.gccDebug(b.Bytes()) if d == nil { fatal("gcc failed:\n%s\non input:\n%s", stderr, b.Bytes()) } // Scan DWARF info for top-level TagVariable entries with AttrName __cgo__i. types := make([]dwarf.Type, len(names)) enums := make([]dwarf.Offset, len(names)) r := d.Reader() for { e, err := r.Next() if err != nil { fatal("reading DWARF entry: %s", err) } if e == nil { break } switch e.Tag { case dwarf.TagEnumerationType: offset := e.Offset for { e, err := r.Next() if err != nil { fatal("reading DWARF entry: %s", err) } if e.Tag == 0 { break } if e.Tag == dwarf.TagEnumerator { entryName := e.Val(dwarf.AttrName).(string) i, ok := m[entryName] if ok { enums[i] = offset } } } case dwarf.TagVariable: name, _ := e.Val(dwarf.AttrName).(string) typOff, _ := e.Val(dwarf.AttrType).(dwarf.Offset) if name == "" || typOff == 0 { fatal("malformed DWARF TagVariable entry") } if !strings.HasPrefix(name, "__cgo__") { break } typ, err := d.Type(typOff) if err != nil { fatal("loading DWARF type: %s", err) } t, ok := typ.(*dwarf.PtrType) if !ok || t == nil { fatal("internal error: %s has non-pointer type", name) } i, err := strconv.Atoi(name[7:]) if err != nil { fatal("malformed __cgo__ name: %s", name) } if enums[i] != 0 { t, err := d.Type(enums[i]) if err != nil { fatal("loading DWARF type: %s", err) } types[i] = t } else { types[i] = t.Type } } if e.Tag != dwarf.TagCompileUnit { r.SkipChildren() } } // Record types and typedef information in Crefs. var conv typeConv conv.Init(p.PtrSize) for _, c := range p.Crefs { i, ok := m[c.Name] if !ok { if _, ok := p.Constdef[c.Name]; !ok { fatal("Cref %s is no longer around", c.Name) } continue } c.TypeName = kind[c.Name] == "type" f, fok := types[i].(*dwarf.FuncType) if c.Context == "call" && !c.TypeName && fok { c.FuncType = conv.FuncType(f) } else { c.Type = conv.Type(types[i]) } } p.Typedef = conv.typedef } func concat(a, b []string) []string { c := make([]string, len(a)+len(b)) for i, s := range a { c[i] = s } for i, s := range b { c[i+len(a)] = s } return c } // gccDebug runs gcc -gdwarf-2 over the C program stdin and // returns the corresponding DWARF data and any messages // printed to standard error. func (p *Prog) gccDebug(stdin []byte) (*dwarf.Data, string) { machine := "-m32" if p.PtrSize == 8 { machine = "-m64" } tmp := "_cgo_.o" base := []string{ "gcc", machine, "-Wall", // many warnings "-Werror", // warnings are errors "-o" + tmp, // write object to tmp "-gdwarf-2", // generate DWARF v2 debugging symbols "-fno-eliminate-unused-debug-types", // gets rid of e.g. untyped enum otherwise "-c", // do not link "-xc", // input language is C "-", // read input from standard input } _, stderr, ok := run(stdin, concat(base, p.GccOptions)) if !ok { return nil, string(stderr) } // Try to parse f as ELF and Mach-O and hope one works. var f interface { DWARF() (*dwarf.Data, os.Error) } var err os.Error if f, err = elf.Open(tmp); err != nil { if f, err = macho.Open(tmp); err != nil { fatal("cannot parse gcc output %s as ELF or Mach-O object", tmp) } } d, err := f.DWARF() if err != nil { fatal("cannot load DWARF debug information from %s: %s", tmp, err) } return d, "" } func (p *Prog) gccPostProc(stdin []byte) string { machine := "-m32" if p.PtrSize == 8 { machine = "-m64" } base := []string{"gcc", machine, "-E", "-dM", "-xc", "-"} stdout, stderr, ok := run(stdin, concat(base, p.GccOptions)) if !ok { return string(stderr) } return string(stdout) } // A typeConv is a translator from dwarf types to Go types // with equivalent memory layout. type typeConv struct { // Cache of already-translated or in-progress types. m map[dwarf.Type]*Type typedef map[string]ast.Expr // Predeclared types. bool ast.Expr byte ast.Expr // denotes padding int8, int16, int32, int64 ast.Expr uint8, uint16, uint32, uint64, uintptr ast.Expr float32, float64 ast.Expr void ast.Expr unsafePointer ast.Expr string ast.Expr ptrSize int64 tagGen int } func (c *typeConv) Init(ptrSize int64) { c.ptrSize = ptrSize c.m = make(map[dwarf.Type]*Type) c.typedef = make(map[string]ast.Expr) c.bool = c.Ident("bool") c.byte = c.Ident("byte") c.int8 = c.Ident("int8") c.int16 = c.Ident("int16") c.int32 = c.Ident("int32") c.int64 = c.Ident("int64") c.uint8 = c.Ident("uint8") c.uint16 = c.Ident("uint16") c.uint32 = c.Ident("uint32") c.uint64 = c.Ident("uint64") c.uintptr = c.Ident("uintptr") c.float32 = c.Ident("float32") c.float64 = c.Ident("float64") c.unsafePointer = c.Ident("unsafe.Pointer") c.void = c.Ident("void") c.string = c.Ident("string") } // base strips away qualifiers and typedefs to get the underlying type func base(dt dwarf.Type) dwarf.Type { for { if d, ok := dt.(*dwarf.QualType); ok { dt = d.Type continue } if d, ok := dt.(*dwarf.TypedefType); ok { dt = d.Type continue } break } return dt } // Map from dwarf text names to aliases we use in package "C". var cnameMap = map[string]string{ "long int": "long", "long unsigned int": "ulong", "unsigned int": "uint", "short unsigned int": "ushort", "short int": "short", "long long int": "longlong", "long long unsigned int": "ulonglong", "signed char": "schar", } // Type returns a *Type with the same memory layout as // dtype when used as the type of a variable or a struct field. func (c *typeConv) Type(dtype dwarf.Type) *Type { if t, ok := c.m[dtype]; ok { if t.Go == nil { fatal("type conversion loop at %s", dtype) } return t } t := new(Type) t.Size = dtype.Size() t.Align = -1 t.C = dtype.Common().Name t.EnumValues = nil c.m[dtype] = t if t.Size < 0 { // Unsized types are [0]byte t.Size = 0 t.Go = c.Opaque(0) if t.C == "" { t.C = "void" } return t } switch dt := dtype.(type) { default: fatal("unexpected type: %s", dtype) case *dwarf.AddrType: if t.Size != c.ptrSize { fatal("unexpected: %d-byte address type - %s", t.Size, dtype) } t.Go = c.uintptr t.Align = t.Size case *dwarf.ArrayType: if dt.StrideBitSize > 0 { // Cannot represent bit-sized elements in Go. t.Go = c.Opaque(t.Size) break } gt := &ast.ArrayType{ Len: c.intExpr(dt.Count), } t.Go = gt // publish before recursive call sub := c.Type(dt.Type) t.Align = sub.Align gt.Elt = sub.Go t.C = fmt.Sprintf("typeof(%s[%d])", sub.C, dt.Count) case *dwarf.BoolType: t.Go = c.bool t.Align = c.ptrSize case *dwarf.CharType: if t.Size != 1 { fatal("unexpected: %d-byte char type - %s", t.Size, dtype) } t.Go = c.int8 t.Align = 1 case *dwarf.EnumType: switch t.Size { default: fatal("unexpected: %d-byte enum type - %s", t.Size, dtype) case 1: t.Go = c.uint8 case 2: t.Go = c.uint16 case 4: t.Go = c.uint32 case 8: t.Go = c.uint64 } if t.Align = t.Size; t.Align >= c.ptrSize { t.Align = c.ptrSize } t.C = "enum " + dt.EnumName t.EnumValues = make(map[string]int64) for _, ev := range dt.Val { t.EnumValues[ev.Name] = ev.Val } case *dwarf.FloatType: switch t.Size { default: fatal("unexpected: %d-byte float type - %s", t.Size, dtype) case 4: t.Go = c.float32 case 8: t.Go = c.float64 } if t.Align = t.Size; t.Align >= c.ptrSize { t.Align = c.ptrSize } case *dwarf.FuncType: // No attempt at translation: would enable calls // directly between worlds, but we need to moderate those. t.Go = c.uintptr t.Align = c.ptrSize case *dwarf.IntType: if dt.BitSize > 0 { fatal("unexpected: %d-bit int type - %s", dt.BitSize, dtype) } switch t.Size { default: fatal("unexpected: %d-byte int type - %s", t.Size, dtype) case 1: t.Go = c.int8 case 2: t.Go = c.int16 case 4: t.Go = c.int32 case 8: t.Go = c.int64 } if t.Align = t.Size; t.Align >= c.ptrSize { t.Align = c.ptrSize } case *dwarf.PtrType: t.Align = c.ptrSize // Translate void* as unsafe.Pointer if _, ok := base(dt.Type).(*dwarf.VoidType); ok { t.Go = c.unsafePointer t.C = "void*" break } gt := &ast.StarExpr{} t.Go = gt // publish before recursive call sub := c.Type(dt.Type) gt.X = sub.Go t.C = sub.C + "*" case *dwarf.QualType: // Ignore qualifier. t = c.Type(dt.Type) c.m[dtype] = t return t case *dwarf.StructType: // Convert to Go struct, being careful about alignment. // Have to give it a name to simulate C "struct foo" references. tag := dt.StructName if tag == "" { tag = "__" + strconv.Itoa(c.tagGen) c.tagGen++ } else if t.C == "" { t.C = dt.Kind + " " + tag } name := c.Ident("_C" + dt.Kind + "_" + tag) t.Go = name // publish before recursive calls switch dt.Kind { case "union", "class": c.typedef[name.Name()] = c.Opaque(t.Size) if t.C == "" { t.C = fmt.Sprintf("typeof(unsigned char[%d])", t.Size) } case "struct": g, csyntax, align := c.Struct(dt) if t.C == "" { t.C = csyntax } t.Align = align c.typedef[name.Name()] = g } case *dwarf.TypedefType: // Record typedef for printing. if dt.Name == "_GoString_" { // Special C name for Go string type. // Knows string layout used by compilers: pointer plus length, // which rounds up to 2 pointers after alignment. t.Go = c.string t.Size = c.ptrSize * 2 t.Align = c.ptrSize break } name := c.Ident("_C_" + dt.Name) t.Go = name // publish before recursive call sub := c.Type(dt.Type) t.Size = sub.Size t.Align = sub.Align if _, ok := c.typedef[name.Name()]; !ok { c.typedef[name.Name()] = sub.Go } case *dwarf.UcharType: if t.Size != 1 { fatal("unexpected: %d-byte uchar type - %s", t.Size, dtype) } t.Go = c.uint8 t.Align = 1 case *dwarf.UintType: if dt.BitSize > 0 { fatal("unexpected: %d-bit uint type - %s", dt.BitSize, dtype) } switch t.Size { default: fatal("unexpected: %d-byte uint type - %s", t.Size, dtype) case 1: t.Go = c.uint8 case 2: t.Go = c.uint16 case 4: t.Go = c.uint32 case 8: t.Go = c.uint64 } if t.Align = t.Size; t.Align >= c.ptrSize { t.Align = c.ptrSize } case *dwarf.VoidType: t.Go = c.void t.C = "void" } switch dtype.(type) { case *dwarf.AddrType, *dwarf.BoolType, *dwarf.CharType, *dwarf.IntType, *dwarf.FloatType, *dwarf.UcharType, *dwarf.UintType: s := dtype.Common().Name if s != "" { if ss, ok := cnameMap[s]; ok { s = ss } s = strings.Join(strings.Split(s, " ", -1), "") // strip spaces name := c.Ident("_C_" + s) c.typedef[name.Name()] = t.Go t.Go = name } } if t.C == "" { fatal("internal error: did not create C name for %s", dtype) } return t } // FuncArg returns a Go type with the same memory layout as // dtype when used as the type of a C function argument. func (c *typeConv) FuncArg(dtype dwarf.Type) *Type { t := c.Type(dtype) switch dt := dtype.(type) { case *dwarf.ArrayType: // Arrays are passed implicitly as pointers in C. // In Go, we must be explicit. return &Type{ Size: c.ptrSize, Align: c.ptrSize, Go: &ast.StarExpr{X: t.Go}, C: t.C + "*", } case *dwarf.TypedefType: // C has much more relaxed rules than Go for // implicit type conversions. When the parameter // is type T defined as *X, simulate a little of the // laxness of C by making the argument *X instead of T. if ptr, ok := base(dt.Type).(*dwarf.PtrType); ok { // Unless the typedef happens to point to void* since // Go has special rules around using unsafe.Pointer. if _, void := base(ptr.Type).(*dwarf.VoidType); !void { return c.Type(ptr) } } } return t } // FuncType returns the Go type analogous to dtype. // There is no guarantee about matching memory layout. func (c *typeConv) FuncType(dtype *dwarf.FuncType) *FuncType { p := make([]*Type, len(dtype.ParamType)) gp := make([]*ast.Field, len(dtype.ParamType)) for i, f := range dtype.ParamType { // gcc's DWARF generator outputs a single DotDotDotType parameter for // function pointers that specify no parameters (e.g. void // (*__cgo_0)()). Treat this special case as void. This case is // invalid according to ISO C anyway (i.e. void (*__cgo_1)(...) is not // legal). if _, ok := f.(*dwarf.DotDotDotType); ok && i == 0 { p, gp = nil, nil break } p[i] = c.FuncArg(f) gp[i] = &ast.Field{Type: p[i].Go} } var r *Type var gr []*ast.Field if _, ok := dtype.ReturnType.(*dwarf.VoidType); !ok && dtype.ReturnType != nil { r = c.Type(dtype.ReturnType) gr = []*ast.Field{&ast.Field{Type: r.Go}} } return &FuncType{ Params: p, Result: r, Go: &ast.FuncType{ Params: &ast.FieldList{List: gp}, Results: &ast.FieldList{List: gr}, }, } } // Identifier func (c *typeConv) Ident(s string) *ast.Ident { return ast.NewIdent(s) } // Opaque type of n bytes. func (c *typeConv) Opaque(n int64) ast.Expr { return &ast.ArrayType{ Len: c.intExpr(n), Elt: c.byte, } } // Expr for integer n. func (c *typeConv) intExpr(n int64) ast.Expr { return &ast.BasicLit{ Kind: token.INT, Value: []byte(strconv.Itoa64(n)), } } // Add padding of given size to fld. func (c *typeConv) pad(fld []*ast.Field, size int64) []*ast.Field { n := len(fld) fld = fld[0 : n+1] fld[n] = &ast.Field{Names: []*ast.Ident{c.Ident("_")}, Type: c.Opaque(size)} return fld } // Struct conversion func (c *typeConv) Struct(dt *dwarf.StructType) (expr *ast.StructType, csyntax string, align int64) { csyntax = "struct { " fld := make([]*ast.Field, 0, 2*len(dt.Field)+1) // enough for padding around every field off := int64(0) // Mangle struct fields that happen to be named Go keywords into // _{keyword}. Create a map from C ident -> Go ident. The Go ident will // be mangled. Any existing identifier that already has the same name on // the C-side will cause the Go-mangled version to be prefixed with _. // (e.g. in a struct with fields '_type' and 'type', the latter would be // rendered as '__type' in Go). ident := make(map[string]string) used := make(map[string]bool) for _, f := range dt.Field { ident[f.Name] = f.Name used[f.Name] = true } for cid, goid := range ident { if token.Lookup([]byte(goid)).IsKeyword() { // Avoid keyword goid = "_" + goid // Also avoid existing fields for _, exist := used[goid]; exist; _, exist = used[goid] { goid = "_" + goid } used[goid] = true ident[cid] = goid } } for _, f := range dt.Field { if f.BitSize > 0 && f.BitSize != f.ByteSize*8 { continue } if f.ByteOffset > off { fld = c.pad(fld, f.ByteOffset-off) off = f.ByteOffset } t := c.Type(f.Type) n := len(fld) fld = fld[0 : n+1] fld[n] = &ast.Field{Names: []*ast.Ident{c.Ident(ident[f.Name])}, Type: t.Go} off += t.Size csyntax += t.C + " " + f.Name + "; " if t.Align > align { align = t.Align } } if off < dt.ByteSize { fld = c.pad(fld, dt.ByteSize-off) off = dt.ByteSize } if off != dt.ByteSize { fatal("struct size calculation error") } csyntax += "}" expr = &ast.StructType{Fields: &ast.FieldList{List: fld}} return }