diff options
Diffstat (limited to 'src/pkg/gob/encoder.go')
| -rw-r--r-- | src/pkg/gob/encoder.go | 198 |
1 files changed, 190 insertions, 8 deletions
diff --git a/src/pkg/gob/encoder.go b/src/pkg/gob/encoder.go index f75eccd95..1182a70c4 100644 --- a/src/pkg/gob/encoder.go +++ b/src/pkg/gob/encoder.go @@ -2,6 +2,183 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. +/* + The gob package manages streams of gobs - binary values exchanged between an + Encoder (transmitter) and a Decoder (receiver). A typical use is transporting + arguments and results of remote procedure calls (RPCs) such as those provided by + package "rpc". + + A stream of gobs is self-describing. Each data item in the stream is preceded by + a specification of its type, expressed in terms of a small set of predefined + types. Pointers are not transmitted, but the things they point to are + transmitted; that is, the values are flattened. Recursive types work fine, but + recursive values (data with cycles) are problematic. This may change. + + To use gobs, create an Encoder and present it with a series of data items as + values or addresses that can be dereferenced to values. (At the moment, these + items must be structs (struct, *struct, **struct etc.), but this may change.) The + Encoder makes sure all type information is sent before it is needed. At the + receive side, a Decoder retrieves values from the encoded stream and unpacks them + into local variables. + + The source and destination values/types need not correspond exactly. For structs, + fields (identified by name) that are in the source but absent from the receiving + variable will be ignored. Fields that are in the receiving variable but missing + from the transmitted type or value will be ignored in the destination. If a field + with the same name is present in both, their types must be compatible. Both the + receiver and transmitter will do all necessary indirection and dereferencing to + convert between gobs and actual Go values. For instance, a gob type that is + schematically, + + struct { a, b int } + + can be sent from or received into any of these Go types: + + struct { a, b int } // the same + *struct { a, b int } // extra indirection of the struct + struct { *a, **b int } // extra indirection of the fields + struct { a, b int64 } // different concrete value type; see below + + It may also be received into any of these: + + struct { a, b int } // the same + struct { b, a int } // ordering doesn't matter; matching is by name + struct { a, b, c int } // extra field (c) ignored + struct { b int } // missing field (a) ignored; data will be dropped + struct { b, c int } // missing field (a) ignored; extra field (c) ignored. + + Attempting to receive into these types will draw a decode error: + + struct { a int; b uint } // change of signedness for b + struct { a int; b float } // change of type for b + struct { } // no field names in common + struct { c, d int } // no field names in common + + Integers are transmitted two ways: arbitrary precision signed integers or + arbitrary precision unsigned integers. There is no int8, int16 etc. + discrimination in the gob format; there are only signed and unsigned integers. As + described below, the transmitter sends the value in a variable-length encoding; + the receiver accepts the value and stores it in the destination variable. + Floating-point numbers are always sent using IEEE-754 64-bit precision (see + below). + + Signed integers may be received into any signed integer variable: int, int16, etc.; + unsigned integers may be received into any unsigned integer variable; and floating + point values may be received into any floating point variable. However, + the destination variable must be able to represent the value or the decode + operation will fail. (TODO(r): enforce this.) + + Structs, arrays and slices are also supported. Strings and arrays of bytes are + supported with a special, efficient representation (see below). + + Maps are not supported yet, but they will be. Interfaces, functions, and channels + cannot be sent in a gob. Attempting to encode a value that contains one will + fail. (TODO(r): fix this - it panics now.) + + The rest of this comment documents the encoding, details that are not important + for most users. Details are presented bottom-up. + + An unsigned integer is encoded as an arbitrary-precision, variable-length sequence + of bytes. It is sent in little-endian order (low bits first), with seven bits per + byte. The high bit of each byte is zero, except that the high bit of the final + (highest precision) byte of the encoding will be set. Thus 0 is transmitted as + (80), 7 is transmitted as (87) and 256=2*128 is transmitted as (00 82). + + A boolean is encoded within an unsigned integer: 0 for false, 1 for true. + + A signed integer, i, is encoded within an unsigned integer, u. Within u, bits 1 + upward contain the value; bit 0 says whether they should be complemented upon + receipt. The encode algorithm looks like this: + + uint u; + if i < 0 { + u = (^i << 1) | 1 // complement i, bit 0 is 1 + } else { + u = (i << 1) // do not complement i, bit 0 is 0 + } + encodeUnsigned(u) + + The low bit is therefore analogous to a sign bit, but making it the complement bit + instead guarantees that the largest negative integer is not a special case. For + example, -129=^128=(^256>>1) encodes as (01 82). + + Floating-point numbers are always sent as a representation of a float64 value. + That value is converted to a uint64 using math.Float64bits. The uint64 is then + byte-reversed and sent as a regular unsigned integer. The byte-reversal means the + exponent and high-precision part of the mantissa go first. Since the low bits are + often zero, this can save encoding bytes. For instance, 17.0 is encoded in only + two bytes (40 e2). + + Strings and slices of bytes are sent as an unsigned count followed by that many + uninterpreted bytes of the value. + + All other slices and arrays are sent as an unsigned count followed by that many + elements using the standard gob encoding for their type, recursively. + + Structs are sent as a sequence of (field number, field value) pairs. The field + value is sent using the standard gob encoding for its type, recursively. If a + field has the zero value for its type, it is omitted from the transmission. The + field number is defined by the type of the encoded struct: the first field of the + encoded type is field 0, the second is field 1, etc. When encoding a value, the + field numbers are delta encoded for efficiency and the fields are always sent in + order of increasing field number; the deltas are therefore unsigned. The + initialization for the delta encoding sets the field number to -1, so an unsigned + integer field 0 with value 7 is transmitted as unsigned delta = 1, unsigned value + = 7 or (81 87). Finally, after all the fields have been sent a terminating mark + denotes the end of the struct. That mark is a delta=0 value, which has + representation (80). + + The representation of types is described below. When a type is defined on a given + connection between an Encoder and Decoder, it is assigned a signed integer type + id. When Encoder.Encode(v) is called, it makes sure there is an id assigned for + the type of v and all its elements and then it sends the pair (typeid, encoded-v) + where typeid is the type id of the encoded type of v and encoded-v is the gob + encoding of the value v. + + To define a type, the encoder chooses an unused, positive type id and sends the + pair (-type id, encoded-type) where encoded-type is the gob encoding of a wireType + description, constructed from these types: + + type wireType struct { + s structType; + } + type fieldType struct { + name string; // the name of the field. + id int; // the type id of the field, which must be already defined + } + type commonType { + name string; // the name of the struct type + id int; // the id of the type, repeated for so it's inside the type + } + type structType struct { + commonType; + field []fieldType; // the fields of the struct. + } + + If there are nested type ids, the types for all inner type ids must be defined + before the top-level type id is used to describe an encoded-v. + + For simplicity in setup, the connection is defined to understand these types a + priori, as well as the basic gob types int, uint, etc. Their ids are: + + bool 1 + int 2 + uint 3 + float 4 + []byte 5 + string 6 + wireType 7 + structType 8 + commonType 9 + fieldType 10 + + In summary, a gob stream looks like + + ((-type id, encoding of a wireType)* (type id, encoding of a value))* + + where * signifies zero or more repetitions and the type id of a value must + be predefined or be defined before the value in the stream. +*/ package gob import ( @@ -13,19 +190,22 @@ import ( "sync"; ) +// An Encoder manages the transmission of type and data information to the +// other side of a connection. type Encoder struct { - sync.Mutex; // each item must be sent atomically + mutex sync.Mutex; // each item must be sent atomically w io.Writer; // where to send the data - sent map[reflect.Type] TypeId; // which types we've already sent + sent map[reflect.Type] typeId; // which types we've already sent state *encoderState; // so we can encode integers, strings directly countState *encoderState; // stage for writing counts buf []byte; // for collecting the output. } +// NewEncoder returns a new encoder that will transmit on the io.Writer. func NewEncoder(w io.Writer) *Encoder { enc := new(Encoder); enc.w = w; - enc.sent = make(map[reflect.Type] TypeId); + enc.sent = make(map[reflect.Type] typeId); enc.state = new(encoderState); enc.state.b = new(bytes.Buffer); // the rest isn't important; all we need is buffer and writer enc.countState = new(encoderState); @@ -91,15 +271,15 @@ func (enc *Encoder) sendType(origt reflect.Type) { typeLock.Unlock(); // Send the pair (-id, type) // Id: - encodeInt(enc.state, -int64(info.typeId)); + encodeInt(enc.state, -int64(info.id)); // Type: encode(enc.state.b, info.wire); enc.send(); // Remember we've sent this type. - enc.sent[rt] = info.typeId; + enc.sent[rt] = info.id; // Remember we've sent the top-level, possibly indirect type too. - enc.sent[origt] = info.typeId; + enc.sent[origt] = info.id; // Now send the inner types st := rt.(*reflect.StructType); for i := 0; i < st.NumField(); i++ { @@ -107,6 +287,8 @@ func (enc *Encoder) sendType(origt reflect.Type) { } } +// Encode transmits the data item represented by the empty interface value, +// guaranteeing that all necessary type information has been transmitted first. func (enc *Encoder) Encode(e interface{}) os.Error { if enc.state.b.Len() > 0 || enc.countState.b.Len() > 0 { panicln("Encoder: buffer not empty") @@ -114,8 +296,8 @@ func (enc *Encoder) Encode(e interface{}) os.Error { rt, indir := indirect(reflect.Typeof(e)); // Make sure we're single-threaded through here. - enc.Lock(); - defer enc.Unlock(); + enc.mutex.Lock(); + defer enc.mutex.Unlock(); // Make sure the type is known to the other side. // First, have we already sent this type? |
