diff options
Diffstat (limited to 'src/pkg/runtime/hashmap.h')
-rw-r--r-- | src/pkg/runtime/hashmap.h | 147 |
1 files changed, 147 insertions, 0 deletions
diff --git a/src/pkg/runtime/hashmap.h b/src/pkg/runtime/hashmap.h new file mode 100644 index 000000000..522d1ad01 --- /dev/null +++ b/src/pkg/runtime/hashmap.h @@ -0,0 +1,147 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// This file contains the implementation of Go's map type. +// +// The map is just a hash table. The data is arranged +// into an array of buckets. Each bucket contains up to +// 8 key/value pairs. The low-order bits of the hash are +// used to select a bucket. Each bucket contains a few +// high-order bits of each hash to distinguish the entries +// within a single bucket. +// +// If more than 8 keys hash to a bucket, we chain on +// extra buckets. +// +// When the hashtable grows, we allocate a new array +// of buckets twice as big. Buckets are incrementally +// copied from the old bucket array to the new bucket array. +// +// Map iterators walk through the array of buckets and +// return the keys in walk order (bucket #, then overflow +// chain order, then bucket index). To maintain iteration +// semantics, we never move keys within their bucket (if +// we did, keys might be returned 0 or 2 times). When +// growing the table, iterators remain iterating through the +// old table and must check the new table if the bucket +// they are iterating through has been moved ("evacuated") +// to the new table. + +// Maximum number of key/value pairs a bucket can hold. +#define BUCKETSIZE 8 + +// Maximum average load of a bucket that triggers growth. +#define LOAD 6.5 + +// Picking LOAD: too large and we have lots of overflow +// buckets, too small and we waste a lot of space. I wrote +// a simple program to check some stats for different loads: +// (64-bit, 8 byte keys and values) +// LOAD %overflow bytes/entry hitprobe missprobe +// 4.00 2.13 20.77 3.00 4.00 +// 4.50 4.05 17.30 3.25 4.50 +// 5.00 6.85 14.77 3.50 5.00 +// 5.50 10.55 12.94 3.75 5.50 +// 6.00 15.27 11.67 4.00 6.00 +// 6.50 20.90 10.79 4.25 6.50 +// 7.00 27.14 10.15 4.50 7.00 +// 7.50 34.03 9.73 4.75 7.50 +// 8.00 41.10 9.40 5.00 8.00 +// +// %overflow = percentage of buckets which have an overflow bucket +// bytes/entry = overhead bytes used per key/value pair +// hitprobe = # of entries to check when looking up a present key +// missprobe = # of entries to check when looking up an absent key +// +// Keep in mind this data is for maximally loaded tables, i.e. just +// before the table grows. Typical tables will be somewhat less loaded. + +// Maximum key or value size to keep inline (instead of mallocing per element). +// Must fit in a uint8. +// Fast versions cannot handle big values - the cutoff size for +// fast versions in ../../cmd/gc/walk.c must be at most this value. +#define MAXKEYSIZE 128 +#define MAXVALUESIZE 128 + +typedef struct Bucket Bucket; +struct Bucket +{ + // Note: the format of the Bucket is encoded in ../../cmd/gc/reflect.c and + // ../reflect/type.go. Don't change this structure without also changing that code! + uint8 tophash[BUCKETSIZE]; // top 8 bits of hash of each entry (or special mark below) + Bucket *overflow; // overflow bucket, if any + uint64 data[1]; // BUCKETSIZE keys followed by BUCKETSIZE values +}; +// NOTE: packing all the keys together and then all the values together makes the +// code a bit more complicated than alternating key/value/key/value/... but it allows +// us to eliminate padding which would be needed for, e.g., map[int64]int8. + +// tophash values. We reserve a few possibilities for special marks. +// Each bucket (including its overflow buckets, if any) will have either all or none of its +// entries in the Evacuated* states (except during the evacuate() method, which only happens +// during map writes and thus no one else can observe the map during that time). +enum +{ + Empty = 0, // cell is empty + EvacuatedEmpty = 1, // cell is empty, bucket is evacuated. + EvacuatedX = 2, // key/value is valid. Entry has been evacuated to first half of larger table. + EvacuatedY = 3, // same as above, but evacuated to second half of larger table. + MinTopHash = 4, // minimum tophash for a normal filled cell. +}; +#define evacuated(b) ((b)->tophash[0] > Empty && (b)->tophash[0] < MinTopHash) + +struct Hmap +{ + // Note: the format of the Hmap is encoded in ../../cmd/gc/reflect.c and + // ../reflect/type.go. Don't change this structure without also changing that code! + uintgo count; // # live cells == size of map. Must be first (used by len() builtin) + uint32 flags; + uint32 hash0; // hash seed + uint8 B; // log_2 of # of buckets (can hold up to LOAD * 2^B items) + uint8 keysize; // key size in bytes + uint8 valuesize; // value size in bytes + uint16 bucketsize; // bucket size in bytes + + byte *buckets; // array of 2^B Buckets. may be nil if count==0. + byte *oldbuckets; // previous bucket array of half the size, non-nil only when growing + uintptr nevacuate; // progress counter for evacuation (buckets less than this have been evacuated) +}; + +// possible flags +enum +{ + IndirectKey = 1, // storing pointers to keys + IndirectValue = 2, // storing pointers to values + Iterator = 4, // there may be an iterator using buckets + OldIterator = 8, // there may be an iterator using oldbuckets +}; + +// Macros for dereferencing indirect keys +#define IK(h, p) (((h)->flags & IndirectKey) != 0 ? *(byte**)(p) : (p)) +#define IV(h, p) (((h)->flags & IndirectValue) != 0 ? *(byte**)(p) : (p)) + +// If you modify Hiter, also change cmd/gc/reflect.c to indicate +// the layout of this structure. +struct Hiter +{ + uint8* key; // Must be in first position. Write nil to indicate iteration end (see cmd/gc/range.c). + uint8* value; // Must be in second position (see cmd/gc/range.c). + + MapType *t; + Hmap *h; + byte *buckets; // bucket ptr at hash_iter initialization time + struct Bucket *bptr; // current bucket + + uint8 offset; // intra-bucket offset to start from during iteration (should be big enough to hold BUCKETSIZE-1) + bool done; + + // state of table at time iterator is initialized + uint8 B; + + // iter state + uintptr bucket; + uintptr i; + intptr check_bucket; +}; + |