3 files changed, 746 insertions, 0 deletions
diff --git a/src/cmd/cgo/Makefile b/src/cmd/cgo/Makefile
new file mode 100644
index 000000000..8d9e11e53
--- /dev/null
+++ b/src/cmd/cgo/Makefile
@@ -0,0 +1,11 @@
+# Copyright 2009 The Go Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style
+# license that can be found in the LICENSE file.
+
+include $(GOROOT)/src/Make.$(GOARCH)
+
+TARG=cgo
+GOFILES=\
+	cgo.go\
+
+include $(GOROOT)/src/Make.cmd
diff --git a/src/cmd/cgo/cgo.go b/src/cmd/cgo/cgo.go
new file mode 100644
index 000000000..f174b7854
--- /dev/null
+++ b/src/cmd/cgo/cgo.go
@@ -0,0 +1,308 @@
+// Copyright 2009 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Cgo; see gmp.go for an overview.
+
+// TODO(rsc):
+//	Emit correct line number annotations.
+//	Make 6g understand the annotations.
+package main
+
+import (
+	"bufio";
+	"container/vector";
+	"debug/dwarf";
+	"debug/elf";
+	"flag";
+	"fmt";
+	"go/ast";
+	"go/doc";
+	"go/parser";
+	"go/scanner";
+	"go/token";
+	"io";
+	"os";
+)
+
+// Map of uses of C.xxx.  The key is the pointer
+// to the use (a pointer so it can be rewritten)
+// and the value is the context ("call", "expr", "type").
+type cmap map[*ast.Expr] string
+
+var noPos token.Position
+
+func usage() {
+	fmt.Fprint(os.Stderr, "usage: cgo [options] file.cgo\n");
+	flag.PrintDefaults();
+}
+
+func main() {
+	flag.Usage = usage;
+	flag.Parse();
+
+	args := flag.Args();
+	if len(args) != 1 {
+		flag.Usage();
+	}
+	filename := args[0];
+
+	prog, err := parser.ParsePkgFile("", filename, parser.ParseComments);
+	if err != nil {
+		fatal(err);
+	}
+
+	// Find the import "C" line and get any extra C preamble.
+	preamble := "";
+	found := false;
+	for _, d := range prog.Decls {
+		d, ok := d.(*ast.GenDecl);
+		if !ok {
+			continue;
+		}
+		for _, s := range d.Specs {
+			s, ok := s.(*ast.ImportSpec);
+			if !ok {
+				continue;
+			}
+			if len(s.Path) != 1 || string(s.Path[0].Value) != `"C"` {
+				continue;
+			}
+			found = true;
+			if s.Name != nil {
+				error(s.Path[0].Pos(), `cannot rename import "C"`);
+			}
+			if s.Doc != nil {
+				preamble += doc.CommentText(s.Doc) + "\n";
+			}
+			else if len(d.Specs) == 1 && d.Doc != nil {
+				preamble += doc.CommentText(d.Doc) + "\n";
+			}
+		}
+	}
+	if !found {
+		error(noPos, `cannot find import "C"`);
+	}
+
+	// Accumulate pointers to uses of C.x.
+	m := make(cmap);
+	walk(prog, m, "prog");
+
+	fmt.Print(preamble);
+	for p, context := range m {
+		sel := (*p).(*ast.SelectorExpr);
+		fmt.Printf("%s: %s as %s\n", sel.Pos(), sel.Sel.Value, context);
+	}
+}
+
+func walk(x interface{}, m cmap, context string) {
+	switch n := x.(type) {
+	case *ast.Expr:
+		if sel, ok := (*n).(*ast.SelectorExpr); ok {
+			// For now, assume that the only instance of capital C is
+			// when used as the imported package identifier.
+			// The parser should take care of scoping in the future,
+			// so that we will be able to distinguish a "top-level C"
+			// from a local C.
+			if l, ok := sel.X.(*ast.Ident); ok && l.Value == "C" {
+				m[n] = context;
+				break;
+			}
+		}
+		walk(*n, m, context);
+
+	// everything else just recurs
+	default:
+		error(noPos, "unexpected type %T in walk", x);
+		panic();
+
+	case nil:
+
+	// These are ordered and grouped to match ../../pkg/go/ast/ast.go
+	case *ast.Field:
+		walk(&n.Type, m, "type");
+	case *ast.BadExpr:
+	case *ast.Ident:
+	case *ast.Ellipsis:
+	case *ast.BasicLit:
+	case *ast.StringList:
+	case *ast.FuncLit:
+		walk(n.Type, m, "type");
+		walk(n.Body, m, "stmt");
+	case *ast.CompositeLit:
+		walk(&n.Type, m, "type");
+		walk(n.Elts, m, "expr");
+	case *ast.ParenExpr:
+		walk(&n.X, m, context);
+	case *ast.SelectorExpr:
+		walk(&n.X, m, "selector");
+	case *ast.IndexExpr:
+		walk(&n.X, m, "expr");
+		walk(&n.Index, m, "expr");
+		if n.End != nil {
+			walk(&n.End, m, "expr");
+		}
+	case *ast.TypeAssertExpr:
+		walk(&n.X, m, "expr");
+		walk(&n.Type, m, "type");
+	case *ast.CallExpr:
+		walk(&n.Fun, m, "call");
+		walk(n.Args, m, "expr");
+	case *ast.StarExpr:
+		walk(&n.X, m, context);
+	case *ast.UnaryExpr:
+		walk(&n.X, m, "expr");
+	case *ast.BinaryExpr:
+		walk(&n.X, m, "expr");
+		walk(&n.Y, m, "expr");
+	case *ast.KeyValueExpr:
+		walk(&n.Key, m, "expr");
+		walk(&n.Value, m, "expr");
+
+	case *ast.ArrayType:
+		walk(&n.Len, m, "expr");
+		walk(&n.Elt, m, "type");
+	case *ast.StructType:
+		walk(n.Fields, m, "field");
+	case *ast.FuncType:
+		walk(n.Params, m, "field");
+		walk(n.Results, m, "field");
+	case *ast.InterfaceType:
+		walk(n.Methods, m, "field");
+	case *ast.MapType:
+		walk(&n.Key, m, "type");
+		walk(&n.Value, m, "type");
+	case *ast.ChanType:
+		walk(&n.Value, m, "type");
+
+	case *ast.BadStmt:
+	case *ast.DeclStmt:
+		walk(n.Decl, m, "decl");
+	case *ast.EmptyStmt:
+	case *ast.LabeledStmt:
+		walk(n.Stmt, m, "stmt");
+	case *ast.ExprStmt:
+		walk(&n.X, m, "expr");
+	case *ast.IncDecStmt:
+		walk(&n.X, m, "expr");
+	case *ast.AssignStmt:
+		walk(n.Lhs, m, "expr");
+		walk(n.Rhs, m, "expr");
+	case *ast.GoStmt:
+		walk(&n.Call, m, "expr");
+	case *ast.DeferStmt:
+		walk(&n.Call, m, "expr");
+	case *ast.ReturnStmt:
+		walk(n.Results, m, "expr");
+	case *ast.BranchStmt:
+	case *ast.BlockStmt:
+		walk(n.List, m, "stmt");
+	case *ast.IfStmt:
+		walk(n.Init, m, "stmt");
+		walk(&n.Cond, m, "expr");
+		walk(n.Body, m, "stmt");
+		walk(n.Else, m, "stmt");
+	case *ast.CaseClause:
+		walk(n.Values, m, "expr");
+		walk(n.Body, m, "stmt");
+	case *ast.SwitchStmt:
+		walk(n.Init, m, "stmt");
+		walk(&n.Tag, m, "expr");
+		walk(n.Body, m, "stmt");
+	case *ast.TypeCaseClause:
+		walk(n.Types, m, "type");
+		walk(n.Body, m, "stmt");
+	case *ast.TypeSwitchStmt:
+		walk(n.Init, m, "stmt");
+		walk(n.Assign, m, "stmt");
+		walk(n.Body, m, "stmt");
+	case *ast.CommClause:
+		walk(n.Lhs, m, "expr");
+		walk(n.Rhs, m, "expr");
+		walk(n.Body, m, "stmt");
+	case *ast.SelectStmt:
+		walk(n.Body, m, "stmt");
+	case *ast.ForStmt:
+		walk(n.Init, m, "stmt");
+		walk(&n.Cond, m, "expr");
+		walk(n.Post, m, "stmt");
+		walk(n.Body, m, "stmt");
+	case *ast.RangeStmt:
+		walk(&n.Key, m, "expr");
+		walk(&n.Value, m, "expr");
+		walk(&n.X, m, "expr");
+		walk(n.Body, m, "stmt");
+
+	case *ast.ImportSpec:
+	case *ast.ValueSpec:
+		walk(&n.Type, m, "type");
+		walk(n.Values, m, "expr");
+	case *ast.TypeSpec:
+		walk(&n.Type, m, "type");
+
+	case *ast.BadDecl:
+	case *ast.GenDecl:
+		walk(n.Specs, m, "spec");
+	case *ast.FuncDecl:
+		if n.Recv != nil {
+			walk(n.Recv, m, "field");
+		}
+		walk(n.Type, m, "type");
+		walk(n.Body, m, "stmt");
+
+	case *ast.File:
+		walk(n.Decls, m, "decl");
+
+	case *ast.Package:
+		for _, f := range n.Files {
+			walk(f, m, "file");
+		}
+
+	case []ast.Decl:
+		for _, d := range n {
+			walk(d, m, context);
+		}
+	case []ast.Expr:
+		for i := range n {
+			walk(&n[i], m, context);
+		}
+	case []*ast.Field:
+		for _, f := range n {
+			walk(f, m, context);
+		}
+	case []ast.Stmt:
+		for _, s := range n {
+			walk(s, m, context);
+		}
+	case []ast.Spec:
+		for _, s := range n {
+			walk(s, m, context);
+		}
+	}
+}
+
+func fatal(err os.Error) {
+	// If err is a scanner.ErrorList, its String will print just
+	// the first error and then (+n more errors).
+	// Instead, turn it into a new Error that will return
+	// details for all the errors.
+	if list, ok := err.(scanner.ErrorList); ok {
+		for _, e := range list {
+			fmt.Fprintln(os.Stderr, e);
+		}
+	} else {
+		fmt.Fprintln(os.Stderr, err);
+	}
+	os.Exit(2);
+}
+
+var nerrors int
+
+func error(pos token.Position, msg string, args ...) {
+	nerrors++;
+	if pos.IsValid() {
+		fmt.Fprintf(os.Stderr, "%s: ", pos);
+	}
+	fmt.Fprintf(os.Stderr, msg, args);
+	fmt.Fprintf(os.Stderr, "\n");
+}
diff --git a/src/cmd/cgo/gmp.go b/src/cmd/cgo/gmp.go
new file mode 100644
index 000000000..82e4b98a1
--- /dev/null
+++ b/src/cmd/cgo/gmp.go
@@ -0,0 +1,427 @@
+// Copyright 2009 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+/*
+An example of wrapping a C library in Go. This is the GNU
+multiprecision library gmp's integer type mpz_t wrapped to look like
+the Go package big's integer type Int.
+
+This is a syntactically valid Go program—it can be parsed with the Go
+parser and processed by godoc—but it is not compiled directly by 6g.
+Instead, a separate tool, cgo, processes it to produce three output
+files.  The first two, 6g.go and 6c.c, are a Go source file for 6g and
+a C source file for 6c; both compile as part of the named package
+(gmp, in this example).  The third, gcc.c, is a C source file for gcc;
+it compiles into a shared object (.so) that is dynamically linked into
+any 6.out that imports the first two files.
+
+The stanza
+
+	// #include <gmp.h>
+	import "C"
+
+is a signal to cgo.  The doc comment on the import of "C" provides
+additional context for the C file.  Here it is just a single #include
+but it could contain arbitrary C definitions to be imported and used.
+
+Cgo recognizes any use of a qualified identifier C.xxx and uses gcc to
+find the definition of xxx.  If xxx is a type, cgo replaces C.xxx with
+a Go translation.  C arithmetic types translate to precisely-sized Go
+arithmetic types.  A C struct translates to a Go struct, field by
+field; unrepresentable fields are replaced with opaque byte arrays.  A
+C union translates into a struct containing the first union member and
+perhaps additional padding.  C arrays become Go arrays.  C pointers
+become Go pointers.  C function pointers and void pointers become Go's
+*byte.
+
+For example, mpz_t is defined in <gmp.h> as:
+
+	typedef unsigned long int mp_limb_t;
+
+	typedef struct
+	{
+		int _mp_alloc;
+		int _mp_size;
+		mp_limb_t *_mp_d;
+	} __mpz_struct;
+
+	typedef __mpz_struct mpz_t[1];
+
+Cgo generates:
+
+	type _C_int int32
+	type _C_mp_limb_t uint64
+	type _C___mpz_struct struct {
+		_mp_alloc _C_int;
+		_mp_size _C_int;
+		_mp_d *_C_mp_limb_t;
+	}
+	type _C_mpz_t [1]_C___mpz_struct
+
+and then replaces each occurrence of a type C.xxx with _C_xxx.
+
+If xxx is data, cgo arranges for C.xxx to refer to the C variable,
+with the type translated as described above.  To do this, cgo must
+introduce a Go variable that points at the C variable (the linker can
+be told to initialize this pointer).  For example, if the gmp library
+provided
+
+	mpz_t zero;
+
+then cgo would rewrite a reference to C.zero by introducing
+
+	var _C_zero *C.mpz_t
+
+and then replacing all instances of C.zero with (*_C_zero).
+
+Cgo's most interesting translation is for functions.  If xxx is a C
+function, then cgo rewrites C.xxx into a new function _C_xxx that
+calls the C xxx in a standard pthread.  The new function translates
+its arguments, calls xxx, and translates the return value.
+
+Translation of parameters and the return value follows the type
+translation above with one extension: a function expecting a char*
+will change to expect a string, and a function returning a char* will
+change to return a string.  The wrapper that cgo generates for the
+first case allocates a new C string, passes that pointer to the C
+function, and then frees the string when the function returns.  The
+wrapper for the second case assumes the char* being returned is
+pointer that must be freed.  It makes a Go string with a copy of the
+contents and then frees the pointer.  The char* conventions are a
+useful heuristic; there should be some way to override them but isn't
+yet.  One can also imagine wrapping Go functions being passed into C
+functions so that C can call them.
+
+Garbage collection is the big problem.  It is fine for the Go world to
+have pointers into the C world and to free those pointers when they
+are no longer needed.  To help, the garbage collector calls an
+object's destroy() method prior to collecting it.  C pointers can be
+wrapped by Go objects with appropriate destroy methods.
+
+It is much more difficult for the C world to have pointers into the Go
+world, because the Go garbage collector is unaware of the memory
+allocated by C. I think the most important consideration is not to
+constrain future implementations, so the rule is basically that Go
+code can hand a Go pointer to C code but must separately arrange for
+Go to hang on to a reference to the pointer until C is done with it.
+
+Note: the sketches assume that the char* <-> string conversions described
+above have been thrown away.  Otherwise one can't pass nil as the first
+argument to mpz_get_str.
+
+Sketch of 6c.c:
+
+	// NOTE: Maybe cgo is smart enough to figure out that
+	// mpz_init's real C name is __gmpz_init and use that instead.
+
+	// Tell dynamic linker to initialize _cgo_mpz_init in this file
+	// to point at the function of the same name in gcc.c.
+	#pragma dynld _cgo_mpz_init _cgo_mpz_init "gmp.so"
+	#pragma dynld _cgo_mpz_get_str _cgo_mpz_get_str "gmp.so"
+
+	void (*_cgo_mpz_init)(void*);
+	void (*_cgo_mpz_get_str)(void*);
+
+	// implementation of Go function called as C.mpz_init below.
+	void
+	gmp·_C_mpz_init(struct { char x[8]; } p)	// dummy struct, same size as 6g parameter frame
+	{
+		cgocall(_cgo_mpz_init, &p);
+	}
+
+	void
+	gmp·_C_mpz_get_str(struct { char x[32]; } p)
+	{
+		cgocall(_cgo_mpz_get_str, &p);
+	}
+
+Sketch of 6g.go:
+
+	// Type declarations from above, omitted.
+
+	// Extern declarations for 6c.c functions
+	func _C_mpz_init(*_C_mpz_t)
+	func _C_mpz_get_str(*_C_char, int32, *_C_mpz_t) *_C_char
+
+	// Original Go source with C.xxx replaced by _C_xxx
+	// as described above.
+
+Sketch of gcc.c:
+
+	void
+	_cgo_mpz_init(void *v)
+	{
+		struct {
+			__mpz_struct *p1;	// not mpz_t because of C array passing rule
+		} *a = v;
+		mpz_init(a->p1);
+	}
+
+	void
+	_cgo_mpz_get_str(void *v)
+	{
+		struct {
+			char *p1;
+			int32 p2;
+			in32 _pad1;
+			__mpz_struct *p3;
+			char *p4;
+		} *a = v;
+		a->p4 = mpz_get_str(a->p1, a->p2, a->p3);
+	}
+
+Gmp defines mpz_t as __mpz_struct[1], meaning that if you
+declare one it takes up a struct worth of space, but when you
+pass one to a function, it passes a pointer to the space instead
+of copying it.  This can't be modeled directly in Go or in C structs
+so some rewriting happens in the generated files.  In Go,
+the functions take *_C_mpz_t instead of _C_mpz_t, and in the
+GCC structs, the parameters are __mpz_struct* instead of mpz_t.
+
+*/
+
+package gmp
+
+// #include <gmp.h>
+import "C"
+
+
+/*
+ * one of a kind
+ */
+
+// An Int represents a signed multi-precision integer.
+// The zero value for an Int represents the value 0.
+type Int struct {
+	i C.mpz_t;
+	init bool;
+}
+
+// NewInt returns a new Int initialized to x.
+func NewInt(x int64) *Int {
+	z := new(Int);
+	z.init = true;
+	C.mpz_init(&z.i);
+	C.mpz_set(&z.i, x);
+	return z;
+}
+
+// Int promises that the zero value is a 0, but in gmp
+// the zero value is a crash.  To bridge the gap, the
+// init bool says whether this is a valid gmp value.
+// doinit initializes z.i if it needs it.  This is not inherent
+// to FFI, just a mismatch between Go's convention of
+// making zero values useful and gmp's decision not to.
+func (z *Int) doinit() {
+	if z.init {
+		return;
+	}
+	z.init = true;
+	C.mpz_init(&z.i);
+}
+
+// Bytes returns z's representation as a big-endian byte array.
+func (z *Int) Bytes() []byte {
+	b := make([]byte, (z.Len() + 7) / 8);
+	n := C.size_t(len(b));
+	C.mpz_export(&b[0], &n, 1, 1, 1, 0, &z.i);
+	return b[0:n];
+}
+
+// Len returns the length of z in bits.  0 is considered to have length 1.
+func (z *Int) Len() int {
+	z.doinit();
+	return int(C.mpz_sizeinbase(&z.i, 2));
+}
+
+// Set sets z = x and returns z.
+func (z *Int) Set(x *Int) *Int {
+	z.doinit();
+	C.mpz_set(&z.i, x);
+	return z;
+}
+
+// SetBytes interprets b as the bytes of a big-endian integer
+// and sets z to that value.
+func (z *Int) SetBytes(b []byte) *Int {
+	z.doinit();
+	if len(b) == 0 {
+		z.SetInt64(0);
+	} else {
+		C.mpz_import(&z.i, len(b), 1, 1, 1, 0, &b[0]);
+	}
+	return z;
+}
+
+// SetInt64 sets z = x and returns z.
+func (z *Int) SetInt64(x int64) *Int {
+	z.doinit();
+	// TODO(rsc): more work on 32-bit platforms
+	C.mpz_set_si(z, x);
+	return z;
+}
+
+// SetString interprets s as a number in the given base
+// and sets z to that value.  The base must be in the range [2,36].
+// SetString returns an error if s cannot be parsed or the base is invalid.
+func (z *Int) SetString(s string, base int) os.Error {
+	z.doinit();
+	if base < 2 || base > 36 {
+		return os.EINVAL;
+	}
+	if C.mpz_set_str(&z.i, s, base) < 0 {
+		return os.EINVAL;
+	}
+	return z;
+}
+
+// String returns the decimal representation of z.
+func (z *Int) String() string {
+	z.doinit();
+	return C.mpz_get_str(nil, 10, &z.i);
+}
+
+func (z *Int) destroy() {
+	if z.init {
+		C.mpz_clear(z);
+	}
+	z.init = false;
+}
+
+
+/*
+ * arithmetic
+ */
+
+// Add sets z = x + y and returns z.
+func (z *Int) Add(x, y *Int) *Int {
+	x.doinit();
+	y.doinit();
+	z.doinit();
+	C.mpz_add(&z.i, &x.i, &y.i);
+	return z;
+}
+
+// Sub sets z = x - y and returns z.
+func (z *Int) Sub(x, y *Int) *Int {
+	x.doinit();
+	y.doinit();
+	z.doinit();
+	C.mpz_sub(&z.i, &x.i, &y.i);
+	return z;
+}
+
+// Mul sets z = x * y and returns z.
+func (z *Int) Mul(x, y *Int) *Int {
+	x.doinit();
+	y.doinit();
+	z.doinit();
+	C.mpz_mul(&z.i, &x.i, &y.i);
+	return z;
+}
+
+// Div sets z = x / y, rounding toward zero, and returns z.
+func (z *Int) Div(x, y *Int) *Int {
+	x.doinit();
+	y.doinit();
+	z.doinit();
+	C.mpz_tdiv_q(&z.i, &x.i, &y.i);
+	return z;
+}
+
+// Mod sets z = x % y and returns z.
+// XXX Unlike in Go, the result is always positive.
+func (z *Int) Mod(x, y *Int) *Int {
+	x.doinit();
+	y.doinit();
+	z.doinit();
+	C.mpz_tdiv_r(&z.i, &x.i, &y.i);
+	return z;
+}
+
+// Lsh sets z = x << s and returns z.
+func (z *Int) Lsh(x *Int, s uint) *Int {
+	x.doinit();
+	y.doinit();
+	z.doinit();
+	C.mpz_mul_2exp(&z.i, &x.i, s);
+}
+
+// Rsh sets z = x >> s and returns z.
+func (z *Int) Rsh(x *int, s uint) *Int {
+	x.doinit();
+	y.doinit();
+	z.doinit();
+	C.mpz_div_2exp(&z.i, &x.i, s);
+}
+
+// Exp sets z = x^y % m and returns z.
+// If m == nil, Exp sets z = x^y.
+func (z *Int) Exp(x, y, m *Int) *Int {
+	m.doinit();
+	x.doinit();
+	y.doinit();
+	z.doinit();
+	if m == nil {
+		C.mpz_pow(&z.i, &x.i, &y.i);
+	} else {
+		C.mpz_powm(&z.i, &x.i, &y.i, &m.i);
+	}
+	return z;
+}
+
+// Neg sets z = -x and returns z.
+func (z *Int) Neg(x *Int) *Int {
+	x.doinit();
+	z.doinit();
+	C.mpz_neg(&z.i, &x.i);
+	return z;
+}
+
+// Abs sets z to the absolute value of x and returns z.
+func (z *Int) Abs(x *Int) *Int {
+	x.doinit();
+	z.doinit();
+	C.mpz_abs(&z.i, &x.i);
+	return z;
+}
+
+
+/*
+ * functions without a clear receiver
+ */
+
+// CmpInt compares x and y. The result is
+//
+//   -1 if x <  y
+//    0 if x == y
+//   +1 if x >  y
+//
+func CmpInt(x, y *Int) int {
+	x.doinit();
+	y.doinit();
+	return C.mpz_cmp(&x.i, &y.i);
+}
+
+// DivModInt sets q = x / y and r = x % y.
+func DivModInt(q, r, x, y *Int) {
+	q.doinit();
+	r.doinit();
+	x.doinit();
+	y.doinit();
+	C.mpz_tdiv_qr(&q.i, &r.i, &x.i, &y.i);
+}
+
+// GcdInt sets d to the greatest common divisor of a and b,
+// which must be positive numbers.
+// If x and y are not nil, GcdInt sets x and y such that d = a*x + b*y.
+// If either a or b is not positive, GcdInt sets d = x = y = 0.
+func GcdInt(d, x, y, a, b *Int) {
+	d.doinit();
+	x.doinit();
+	y.doinit();
+	a.doinit();
+	b.doinit();
+	C.mpz_gcdext(&d.i, &x.i, &y.i, &a.i, &b.i);
+}