summaryrefslogtreecommitdiff
path: root/src/pkg/gob/encoder.go
diff options
context:
space:
mode:
Diffstat (limited to 'src/pkg/gob/encoder.go')
-rw-r--r--src/pkg/gob/encoder.go198
1 files changed, 190 insertions, 8 deletions
diff --git a/src/pkg/gob/encoder.go b/src/pkg/gob/encoder.go
index f75eccd95..1182a70c4 100644
--- a/src/pkg/gob/encoder.go
+++ b/src/pkg/gob/encoder.go
@@ -2,6 +2,183 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
+/*
+ The gob package manages streams of gobs - binary values exchanged between an
+ Encoder (transmitter) and a Decoder (receiver). A typical use is transporting
+ arguments and results of remote procedure calls (RPCs) such as those provided by
+ package "rpc".
+
+ A stream of gobs is self-describing. Each data item in the stream is preceded by
+ a specification of its type, expressed in terms of a small set of predefined
+ types. Pointers are not transmitted, but the things they point to are
+ transmitted; that is, the values are flattened. Recursive types work fine, but
+ recursive values (data with cycles) are problematic. This may change.
+
+ To use gobs, create an Encoder and present it with a series of data items as
+ values or addresses that can be dereferenced to values. (At the moment, these
+ items must be structs (struct, *struct, **struct etc.), but this may change.) The
+ Encoder makes sure all type information is sent before it is needed. At the
+ receive side, a Decoder retrieves values from the encoded stream and unpacks them
+ into local variables.
+
+ The source and destination values/types need not correspond exactly. For structs,
+ fields (identified by name) that are in the source but absent from the receiving
+ variable will be ignored. Fields that are in the receiving variable but missing
+ from the transmitted type or value will be ignored in the destination. If a field
+ with the same name is present in both, their types must be compatible. Both the
+ receiver and transmitter will do all necessary indirection and dereferencing to
+ convert between gobs and actual Go values. For instance, a gob type that is
+ schematically,
+
+ struct { a, b int }
+
+ can be sent from or received into any of these Go types:
+
+ struct { a, b int } // the same
+ *struct { a, b int } // extra indirection of the struct
+ struct { *a, **b int } // extra indirection of the fields
+ struct { a, b int64 } // different concrete value type; see below
+
+ It may also be received into any of these:
+
+ struct { a, b int } // the same
+ struct { b, a int } // ordering doesn't matter; matching is by name
+ struct { a, b, c int } // extra field (c) ignored
+ struct { b int } // missing field (a) ignored; data will be dropped
+ struct { b, c int } // missing field (a) ignored; extra field (c) ignored.
+
+ Attempting to receive into these types will draw a decode error:
+
+ struct { a int; b uint } // change of signedness for b
+ struct { a int; b float } // change of type for b
+ struct { } // no field names in common
+ struct { c, d int } // no field names in common
+
+ Integers are transmitted two ways: arbitrary precision signed integers or
+ arbitrary precision unsigned integers. There is no int8, int16 etc.
+ discrimination in the gob format; there are only signed and unsigned integers. As
+ described below, the transmitter sends the value in a variable-length encoding;
+ the receiver accepts the value and stores it in the destination variable.
+ Floating-point numbers are always sent using IEEE-754 64-bit precision (see
+ below).
+
+ Signed integers may be received into any signed integer variable: int, int16, etc.;
+ unsigned integers may be received into any unsigned integer variable; and floating
+ point values may be received into any floating point variable. However,
+ the destination variable must be able to represent the value or the decode
+ operation will fail. (TODO(r): enforce this.)
+
+ Structs, arrays and slices are also supported. Strings and arrays of bytes are
+ supported with a special, efficient representation (see below).
+
+ Maps are not supported yet, but they will be. Interfaces, functions, and channels
+ cannot be sent in a gob. Attempting to encode a value that contains one will
+ fail. (TODO(r): fix this - it panics now.)
+
+ The rest of this comment documents the encoding, details that are not important
+ for most users. Details are presented bottom-up.
+
+ An unsigned integer is encoded as an arbitrary-precision, variable-length sequence
+ of bytes. It is sent in little-endian order (low bits first), with seven bits per
+ byte. The high bit of each byte is zero, except that the high bit of the final
+ (highest precision) byte of the encoding will be set. Thus 0 is transmitted as
+ (80), 7 is transmitted as (87) and 256=2*128 is transmitted as (00 82).
+
+ A boolean is encoded within an unsigned integer: 0 for false, 1 for true.
+
+ A signed integer, i, is encoded within an unsigned integer, u. Within u, bits 1
+ upward contain the value; bit 0 says whether they should be complemented upon
+ receipt. The encode algorithm looks like this:
+
+ uint u;
+ if i < 0 {
+ u = (^i << 1) | 1 // complement i, bit 0 is 1
+ } else {
+ u = (i << 1) // do not complement i, bit 0 is 0
+ }
+ encodeUnsigned(u)
+
+ The low bit is therefore analogous to a sign bit, but making it the complement bit
+ instead guarantees that the largest negative integer is not a special case. For
+ example, -129=^128=(^256>>1) encodes as (01 82).
+
+ Floating-point numbers are always sent as a representation of a float64 value.
+ That value is converted to a uint64 using math.Float64bits. The uint64 is then
+ byte-reversed and sent as a regular unsigned integer. The byte-reversal means the
+ exponent and high-precision part of the mantissa go first. Since the low bits are
+ often zero, this can save encoding bytes. For instance, 17.0 is encoded in only
+ two bytes (40 e2).
+
+ Strings and slices of bytes are sent as an unsigned count followed by that many
+ uninterpreted bytes of the value.
+
+ All other slices and arrays are sent as an unsigned count followed by that many
+ elements using the standard gob encoding for their type, recursively.
+
+ Structs are sent as a sequence of (field number, field value) pairs. The field
+ value is sent using the standard gob encoding for its type, recursively. If a
+ field has the zero value for its type, it is omitted from the transmission. The
+ field number is defined by the type of the encoded struct: the first field of the
+ encoded type is field 0, the second is field 1, etc. When encoding a value, the
+ field numbers are delta encoded for efficiency and the fields are always sent in
+ order of increasing field number; the deltas are therefore unsigned. The
+ initialization for the delta encoding sets the field number to -1, so an unsigned
+ integer field 0 with value 7 is transmitted as unsigned delta = 1, unsigned value
+ = 7 or (81 87). Finally, after all the fields have been sent a terminating mark
+ denotes the end of the struct. That mark is a delta=0 value, which has
+ representation (80).
+
+ The representation of types is described below. When a type is defined on a given
+ connection between an Encoder and Decoder, it is assigned a signed integer type
+ id. When Encoder.Encode(v) is called, it makes sure there is an id assigned for
+ the type of v and all its elements and then it sends the pair (typeid, encoded-v)
+ where typeid is the type id of the encoded type of v and encoded-v is the gob
+ encoding of the value v.
+
+ To define a type, the encoder chooses an unused, positive type id and sends the
+ pair (-type id, encoded-type) where encoded-type is the gob encoding of a wireType
+ description, constructed from these types:
+
+ type wireType struct {
+ s structType;
+ }
+ type fieldType struct {
+ name string; // the name of the field.
+ id int; // the type id of the field, which must be already defined
+ }
+ type commonType {
+ name string; // the name of the struct type
+ id int; // the id of the type, repeated for so it's inside the type
+ }
+ type structType struct {
+ commonType;
+ field []fieldType; // the fields of the struct.
+ }
+
+ If there are nested type ids, the types for all inner type ids must be defined
+ before the top-level type id is used to describe an encoded-v.
+
+ For simplicity in setup, the connection is defined to understand these types a
+ priori, as well as the basic gob types int, uint, etc. Their ids are:
+
+ bool 1
+ int 2
+ uint 3
+ float 4
+ []byte 5
+ string 6
+ wireType 7
+ structType 8
+ commonType 9
+ fieldType 10
+
+ In summary, a gob stream looks like
+
+ ((-type id, encoding of a wireType)* (type id, encoding of a value))*
+
+ where * signifies zero or more repetitions and the type id of a value must
+ be predefined or be defined before the value in the stream.
+*/
package gob
import (
@@ -13,19 +190,22 @@ import (
"sync";
)
+// An Encoder manages the transmission of type and data information to the
+// other side of a connection.
type Encoder struct {
- sync.Mutex; // each item must be sent atomically
+ mutex sync.Mutex; // each item must be sent atomically
w io.Writer; // where to send the data
- sent map[reflect.Type] TypeId; // which types we've already sent
+ sent map[reflect.Type] typeId; // which types we've already sent
state *encoderState; // so we can encode integers, strings directly
countState *encoderState; // stage for writing counts
buf []byte; // for collecting the output.
}
+// NewEncoder returns a new encoder that will transmit on the io.Writer.
func NewEncoder(w io.Writer) *Encoder {
enc := new(Encoder);
enc.w = w;
- enc.sent = make(map[reflect.Type] TypeId);
+ enc.sent = make(map[reflect.Type] typeId);
enc.state = new(encoderState);
enc.state.b = new(bytes.Buffer); // the rest isn't important; all we need is buffer and writer
enc.countState = new(encoderState);
@@ -91,15 +271,15 @@ func (enc *Encoder) sendType(origt reflect.Type) {
typeLock.Unlock();
// Send the pair (-id, type)
// Id:
- encodeInt(enc.state, -int64(info.typeId));
+ encodeInt(enc.state, -int64(info.id));
// Type:
encode(enc.state.b, info.wire);
enc.send();
// Remember we've sent this type.
- enc.sent[rt] = info.typeId;
+ enc.sent[rt] = info.id;
// Remember we've sent the top-level, possibly indirect type too.
- enc.sent[origt] = info.typeId;
+ enc.sent[origt] = info.id;
// Now send the inner types
st := rt.(*reflect.StructType);
for i := 0; i < st.NumField(); i++ {
@@ -107,6 +287,8 @@ func (enc *Encoder) sendType(origt reflect.Type) {
}
}
+// Encode transmits the data item represented by the empty interface value,
+// guaranteeing that all necessary type information has been transmitted first.
func (enc *Encoder) Encode(e interface{}) os.Error {
if enc.state.b.Len() > 0 || enc.countState.b.Len() > 0 {
panicln("Encoder: buffer not empty")
@@ -114,8 +296,8 @@ func (enc *Encoder) Encode(e interface{}) os.Error {
rt, indir := indirect(reflect.Typeof(e));
// Make sure we're single-threaded through here.
- enc.Lock();
- defer enc.Unlock();
+ enc.mutex.Lock();
+ defer enc.mutex.Unlock();
// Make sure the type is known to the other side.
// First, have we already sent this type?