runtime: faster hashmap implementation.

Hashtable is arranged as an array of 8-entry buckets with chained overflow. Each bucket has 8 extra hash bits per key to provide quick lookup within a bucket. Table is grown incrementally. Update #3885 Go time drops from 0.51s to 0.34s. R=r, rsc, m3b, dave, bradfitz, khr, ugorji, remyoudompheng CC=golang-dev https://golang.org/cl/7504044
author: Keith Randall <khr@golang.org> 2013-03-20 13:51:29 -0700
committer: Keith Randall <khr@golang.org> 2013-03-20 13:51:29 -0700
commit: 00224a356a5be3c134230bfa8fe11e0af2977aaf (patch)
tree: 215e0ad8bd4fe89b437bf1ef572a0c4f94af489d /src/pkg
parent: 2001f0c28ef4f2b7b907d060901a6fad2f1e9eb0 (diff)
download: go-00224a356a5be3c134230bfa8fe11e0af2977aaf.tar.xz
7 files changed, 1351 insertions, 926 deletions
diff --git a/src/pkg/runtime/hashmap.c b/src/pkg/runtime/hashmap.c
index dc5dfb82f5..e6871fd8f2 100644
--- a/src/pkg/runtime/hashmap.c
+++ b/src/pkg/runtime/hashmap.c
@@ -8,704 +8,794 @@
 #include "hashmap.h"
 #include "type.h"
 #include "race.h"
+#include "typekind.h" // TODO: remove
 
-/* Hmap flag values */
-#define IndirectVal  (1<<0)	/* storing pointers to values */
-#define IndirectKey (1<<1)	/* storing pointers to keys */
-#define CanFreeTable (1<<2)	/* okay to free subtables */
-#define CanFreeKey (1<<3)	/* okay to free pointers to keys */
-
-struct Hmap {	   /* a hash table; initialize with hash_init() */
-	uintgo count;	  /* elements in table - must be first */
-	uint8 datasize;   /* amount of data to store in entry */
-	uint8 flag;
-	uint8 valoff;	/* offset of value in key+value data block */
-	int32 changes;	      /* inc'ed whenever a subtable is created/grown */
-	uintptr hash0;      /* hash seed */
-	struct hash_subtable *st;    /* first-level table */
-};
+// This file contains the implementation of Go's map type.
+//
+// The map is just a hash table.  The data is arranged
+// into an array of buckets.  Each bucket contains up to
+// 8 key/value pairs.  The low-order bits of the hash are
+// used to select a bucket.  Each bucket contains a few
+// high-order bits of each hash to distinguish the entries
+// within a single bucket.
+//
+// If more than 8 keys hash to a bucket, we chain on
+// extra buckets.
+//
+// When the hashtable grows, we allocate a new array
+// of buckets twice as big.  Buckets are incrementally
+// copied from the old bucket array to the new bucket array.
+//
+// Map iterators walk through the array of buckets and
+// return the keys in walk order (bucket #, then overflow
+// chain order, then bucket index).  To maintain iteration
+// semantics, we never move keys within their bucket (if
+// we did, keys might be returned 0 or 2 times).  When
+// growing the table, iterators remain iterating through the
+// old table and must check the new table if the bucket
+// they are iterating through has been moved ("evacuated")
+// to the new table.
 
-#define MaxData 255
+// Maximum number of key/value pairs a bucket can hold.
+#define BUCKETSIZE 8
 
-struct hash_entry {
-	hash_hash_t hash;     /* hash value of data */
-	byte data[1];	 /* user data has "datasize" bytes */
-};
+// Maximum average load of a bucket that triggers growth.
+#define LOAD 6.5
 
-struct hash_subtable {
-	uint8 power;	 /* bits used to index this table */
-	uint8 used;	  /* bits in hash used before reaching this table */
-	uint8 datasize;      /* bytes of client data in an entry */
-	uint8 max_probes;    /* max number of probes when searching */
-	int16 limit_bytes;	   /* max_probes * (datasize+sizeof (hash_hash_t)) */
-	struct hash_entry *last;      /* points to last element of entry[] */
-	struct hash_entry entry[1];  /* 2**power+max_probes-1 elements of elemsize bytes */
-};
-
-#define HASH_DATA_EQ(eq, t, h,x,y) ((eq)=0, (*t->key->alg->equal) (&(eq), t->key->size, (x), (y)), (eq))
+// Picking LOAD: too large and we have lots of overflow
+// buckets, too small and we waste a lot of space.  I wrote
+// a simple program to check some stats for different loads:
+// (64-bit, 8 byte keys and values)
+//        LOAD    %overflow  bytes/entry     hitprobe    missprobe
+//        4.00         2.13        20.77         3.00         4.00
+//        4.50         4.05        17.30         3.25         4.50
+//        5.00         6.85        14.77         3.50         5.00
+//        5.50        10.55        12.94         3.75         5.50
+//        6.00        15.27        11.67         4.00         6.00
+//        6.50        20.90        10.79         4.25         6.50
+//        7.00        27.14        10.15         4.50         7.00
+//        7.50        34.03         9.73         4.75         7.50
+//        8.00        41.10         9.40         5.00         8.00
+//
+// %overflow   = percentage of buckets which have an overflow bucket
+// bytes/entry = overhead bytes used per key/value pair
+// hitprobe    = # of entries to check when looking up a present key
+// missprobe   = # of entries to check when looking up an absent key
+//
+// Keep in mind this data is for maximally loaded tables, i.e. just
+// before the table grows.  Typical tables will be somewhat less loaded.
 
-#define HASH_REHASH 0x2       /* an internal flag */
-/* the number of bits used is stored in the flags word too */
-#define HASH_USED(x)      ((x) >> 2)
-#define HASH_MAKE_USED(x) ((x) << 2)
+// Maximum key or value size to keep inline (instead of mallocing per element).
+// Must fit in a uint8.
+// Fast versions cannot handle big values - the cutoff size for
+// fast versions in ../../cmd/gc/walk.c must be at most this value.
+#define MAXKEYSIZE 128
+#define MAXVALUESIZE 128
 
-#define HASH_LOW	6
-#define HASH_ONE	(((hash_hash_t)1) << HASH_LOW)
-#define HASH_MASK       (HASH_ONE - 1)
-#define HASH_ADJUST(x)  (((x) < HASH_ONE) << HASH_LOW)
+typedef struct Bucket Bucket;
+struct Bucket
+{
+	uint8 tophash[BUCKETSIZE];  // top 8 bits of hash of each entry (0 = empty)
+	Bucket *overflow;           // overflow bucket, if any
+	byte data[1];               // BUCKETSIZE keys followed by BUCKETSIZE values
+};
+// NOTE: packing all the keys together and then all the values together makes the
+// code a bit more complicated than alternating key/value/key/value/... but it allows
+// us to eliminate padding which would be needed for, e.g., map[int64]int8.
 
-#define HASH_BITS       (sizeof (hash_hash_t) * 8)
+// Low-order bit of overflow field is used to mark a bucket as already evacuated
+// without destroying the overflow pointer.
+// Only buckets in oldbuckets will be marked as evacuated.
+// Evacuated bit will be set identically on the base bucket and any overflow buckets.
+#define evacuated(b) (((uintptr)(b)->overflow & 1) != 0)
+#define overflowptr(b) ((Bucket*)((uintptr)(b)->overflow & ~(uintptr)1))
 
-#define HASH_SUBHASH    HASH_MASK
-#define HASH_NIL	0
-#define HASH_NIL_MEMSET 0
+// Initialize bucket to the empty state.  This only works if BUCKETSIZE==8!
+#define clearbucket(b) { *(uint64*)((b)->tophash) = 0; (b)->overflow = nil; }
 
-#define HASH_OFFSET(base, byte_offset) \
-	  ((struct hash_entry *) (((byte *) (base)) + (byte_offset)))
+struct Hmap
+{
+	uintgo  count;        // # live cells == size of map.  Must be first (used by len() builtin)
+	uint8   B;            // log_2 of # of buckets (can hold up to LOAD * 2^B items)
+	uint8   flags;
+	uint8   keysize;      // key size in bytes
+	uint8   valuesize;    // value size in bytes
+	uint16  bucketsize;   // bucket size in bytes
 
-#define HASH_MAX_PROBES	15 /* max entries to probe before rehashing */
-#define HASH_MAX_POWER	12 /* max power of 2 to create sub-tables */
+	uintptr hash0;        // hash seed
+	byte    *buckets;     // array of 2^B Buckets
+	byte    *oldbuckets;  // previous bucket array of half the size, non-nil only when growing
+	uintptr nevacuate;    // progress counter for evacuation (buckets less than this have been evacuated)
+};
 
-/* return a hash layer with 2**power empty entries */
-static struct hash_subtable *
-hash_subtable_new (Hmap *h, int32 power, int32 used)
+// possible flags
+enum
 {
-	int32 elemsize = h->datasize + offsetof (struct hash_entry, data[0]);
-	int32 bytes = elemsize << power;
-	struct hash_subtable *st;
-	int32 limit_bytes = HASH_MAX_PROBES * elemsize;
-	int32 max_probes = HASH_MAX_PROBES;
+	IndirectKey = 1,    // storing pointers to keys
+	IndirectValue = 2,  // storing pointers to values
+	Iterator = 4,       // there may be an iterator using buckets  TODO: use
+	OldIterator = 8,    // there may be an iterator using oldbuckets  TODO: use
+	CanFreeBucket = 16, // ok to free buckets  TODO: use
+	CanFreeKey = 32,    // ok to free pointers to keys  TODO: use
+};
 
-	if (bytes < limit_bytes) {
-		limit_bytes = bytes;
-		max_probes = 1 << power;
-	}
-	bytes += limit_bytes - elemsize;
-	st = runtime·mallocgc(offsetof (struct hash_subtable, entry[0]) + bytes, UseSpanType ? FlagNoPointers : 0, 1, 1);
-	st->power = power;
-	st->used = used;
-	st->datasize = h->datasize;
-	st->max_probes = max_probes;
-	st->limit_bytes = limit_bytes;
-	st->last = HASH_OFFSET (st->entry, bytes) - 1;
-	memset (st->entry, HASH_NIL_MEMSET, bytes);
-	return (st);
-}
+// Macros for dereferencing indirect keys
+#define IK(h, p) (((h)->flags & IndirectKey) != 0 ? *(byte**)(p) : (p))
+#define IV(h, p) (((h)->flags & IndirectValue) != 0 ? *(byte**)(p) : (p))
 
+enum
+{
+	docheck = 0,  // check invariants before and after every op.  Slow!!!
+	debug = 0,    // print every operation
+};
 static void
-init_sizes (int64 hint, int32 *init_power)
+check(MapType *t, Hmap *h)
 {
-	int32 log = 0;
-	int32 i;
+	uintptr bucket, oldbucket;
+	Bucket *b;
+	uintptr i;
+	uintptr hash;
+	uintgo cnt;
+	uint8 top;
+	bool eq;
+	byte *k, *v;
+
+	cnt = 0;
 
-	for (i = 32; i != 0; i >>= 1) {
-		if ((hint >> (log + i)) != 0) {
-			log += i;
+	// check buckets
+	for(bucket = 0; bucket < (uintptr)1 << h->B; bucket++) {
+		if(h->oldbuckets != nil) {
+			oldbucket = bucket & (((uintptr)1 << (h->B - 1)) - 1);
+			b = (Bucket*)(h->oldbuckets + oldbucket * h->bucketsize);
+			if(!evacuated(b))
+				continue; // b is still uninitialized
+		}
+		for(b = (Bucket*)(h->buckets + bucket * h->bucketsize); b != nil; b = b->overflow) {
+			for(i = 0, k = b->data, v = k + h->keysize * BUCKETSIZE; i < BUCKETSIZE; i++, k += h->keysize, v += h->valuesize) {
+				if(b->tophash[i] == 0)
+					continue;
+				cnt++;
+				t->key->alg->equal(&eq, t->key->size, IK(h, k), IK(h, k));
+				if(!eq)
+					continue; // NaN!
+				hash = h->hash0;
+				t->key->alg->hash(&hash, t->key->size, IK(h, k));
+				top = hash >> (8*sizeof(uintptr) - 8);
+				if(top == 0)
+					top = 1;
+				if(top != b->tophash[i])
+					runtime·throw("bad hash");
+			}
 		}
 	}
-	log += 1 + (((hint << 3) >> log) >= 11);  /* round up for utilization */
-	if (log <= 14) {
-		*init_power = log;
-	} else {
-		*init_power = 12;
+
+	// check oldbuckets
+	if(h->oldbuckets != nil) {
+		for(oldbucket = 0; oldbucket < (uintptr)1 << (h->B - 1); oldbucket++) {
+			b = (Bucket*)(h->oldbuckets + oldbucket * h->bucketsize);
+			if(evacuated(b))
+				continue;
+			if(oldbucket < h->nevacuate)
+				runtime·throw("bucket became unevacuated");
+			for(; b != nil; b = overflowptr(b)) {
+				for(i = 0, k = b->data, v = k + h->keysize * BUCKETSIZE; i < BUCKETSIZE; i++, k += h->keysize, v += h->valuesize) {
+					if(b->tophash[i] == 0)
+						continue;
+					cnt++;
+					t->key->alg->equal(&eq, t->key->size, IK(h, k), IK(h, k));
+					if(!eq)
+						continue; // NaN!
+					hash = h->hash0;
+					t->key->alg->hash(&hash, t->key->size, IK(h, k));
+					top = hash >> (8*sizeof(uintptr) - 8);
+					if(top == 0)
+						top = 1;
+					if(top != b->tophash[i])
+						runtime·throw("bad hash (old)");
+				}
+			}
+		}
+	}
+
+	if(cnt != h->count) {
+		runtime·printf("%D %D\n", (uint64)cnt, (uint64)h->count);
+		runtime·throw("entries missing");
 	}
 }
 
 static void
-hash_init (Hmap *h, int32 datasize, int64 hint)
+hash_init(MapType *t, Hmap *h, uint32 hint)
 {
-	int32 init_power;
+	uint8 B;
+	byte *buckets;
+	uintptr i;
+	uintptr keysize, valuesize, bucketsize;
+	uint8 flags;
+	Bucket *b;
+
+	flags = CanFreeBucket | CanFreeKey;
+
+	// figure out how big we have to make everything
+	keysize = t->key->size;
+	if(keysize > MAXKEYSIZE) {
+		flags |= IndirectKey;
+		keysize = sizeof(byte*);
+	}
+	valuesize = t->elem->size;
+	if(valuesize >= MAXVALUESIZE) {
+		flags |= IndirectValue;
+		valuesize = sizeof(byte*);
+	}
+	bucketsize = offsetof(Bucket, data[0]) + (keysize + valuesize) * BUCKETSIZE;
+
+	// invariants we depend on.  We should probably check these at compile time
+	// somewhere, but for now we'll do it here.
+	if(t->key->align > BUCKETSIZE)
+		runtime·throw("key align too big");
+	if(t->elem->align > BUCKETSIZE)
+		runtime·throw("value align too big");
+	if(t->key->size % t->key->align != 0)
+		runtime·throw("key size not a multiple of key align");
+	if(t->elem->size % t->elem->align != 0)
+		runtime·throw("value size not a multiple of value align");
+	if(BUCKETSIZE < 8)
+		runtime·throw("bucketsize too small for proper alignment");
+	if(BUCKETSIZE != 8)
+		runtime·throw("must redo clearbucket");
+	if(sizeof(void*) == 4 && t->key->align > 4)
+		runtime·throw("need padding in bucket (key)");
+	if(sizeof(void*) == 4 && t->elem->align > 4)
+		runtime·throw("need padding in bucket (value)");
+
+	// find size parameter which will hold the requested # of elements
+	B = 0;
+	while(hint > BUCKETSIZE && hint > LOAD * ((uintptr)1 << B))
+		B++;
 
-	if(datasize < sizeof (void *))
-		datasize = sizeof (void *);
-	datasize = ROUND(datasize, sizeof (void *));
-	init_sizes (hint, &init_power);
-	h->datasize = datasize;
-	assert (h->datasize == datasize);
-	assert (sizeof (void *) <= h->datasize);
+	// allocate initial hash table
+	// If hint is large zeroing this memory could take a while.
+	buckets = runtime·mallocgc(bucketsize << B, 0, 1, 0);
+	for(i = 0; i < (uintptr)1 << B; i++) {
+		b = (Bucket*)(buckets + i * bucketsize);
+		clearbucket(b);
+	}
+
+	// initialize Hmap
+	// Note: we save all these stores to the end so gciter doesn't see
+	// a partially initialized map.
 	h->count = 0;
-	h->changes = 0;
-	h->st = hash_subtable_new (h, init_power, 0);
+	h->B = B;
+	h->flags = flags;
+	h->keysize = keysize;
+	h->valuesize = valuesize;
+	h->bucketsize = bucketsize;
 	h->hash0 = runtime·fastrand1();
+	h->buckets = buckets;
+	h->oldbuckets = nil;
+	h->nevacuate = 0;
+	if(docheck)
+		check(t, h);
 }
 
+// Moves entries in oldbuckets[i] to buckets[i] and buckets[i+2^k].
+// We leave the original bucket intact, except for the evacuated marks, so that
+// iterators can still iterate through the old buckets.
 static void
-hash_remove_n (struct hash_subtable *st, struct hash_entry *dst_e, int32 n)
+evacuate(MapType *t, Hmap *h, uintptr oldbucket)
 {
-	int32 elemsize = st->datasize + offsetof (struct hash_entry, data[0]);
-	struct hash_entry *src_e = HASH_OFFSET (dst_e, n * elemsize);
-	struct hash_entry *last_e = st->last;
-	int32 shift = HASH_BITS - (st->power + st->used);
-	int32 index_mask = (((hash_hash_t)1) << st->power) - 1;
-	int32 dst_i = (((byte *) dst_e) - ((byte *) st->entry)) / elemsize;
-	int32 src_i = dst_i + n;
-	hash_hash_t hash;
-	int32 skip;
-	int32 bytes;
+	Bucket *b;
+	Bucket *nextb;
+	Bucket *x, *y;
+	Bucket *newx, *newy;
+	uintptr xi, yi;
+	uintptr newbit;
+	uintptr hash;
+	uintptr i;
+	byte *k, *v;
+	byte *xk, *yk, *xv, *yv;
 
-	while (dst_e != src_e) {
-		if (src_e <= last_e) {
-			struct hash_entry *cp_e = src_e;
-			int32 save_dst_i = dst_i;
-			while (cp_e <= last_e && (hash = cp_e->hash) != HASH_NIL &&
-			     ((hash >> shift) & index_mask) <= dst_i) {
-				cp_e = HASH_OFFSET (cp_e, elemsize);
-				dst_i++;
-			}
-			bytes = ((byte *) cp_e) - (byte *) src_e;
-			memmove (dst_e, src_e, bytes);
-			dst_e = HASH_OFFSET (dst_e, bytes);
-			src_e = cp_e;
-			src_i += dst_i - save_dst_i;
-			if (src_e <= last_e && (hash = src_e->hash) != HASH_NIL) {
-				skip = ((hash >> shift) & index_mask) - dst_i;
-			} else {
-				skip = src_i - dst_i;
+	b = (Bucket*)(h->oldbuckets + oldbucket * h->bucketsize);
+	newbit = (uintptr)1 << (h->B - 1);
+
+	if(!evacuated(b)) {
+		// TODO: reuse overflow buckets instead of using new ones, if there
+		// is no iterator using the old buckets.  (If CanFreeBuckets and !OldIterator.)
+
+		x = (Bucket*)(h->buckets + oldbucket * h->bucketsize);
+		y = (Bucket*)(h->buckets + (oldbucket + newbit) * h->bucketsize);
+		clearbucket(x);
+		clearbucket(y);
+		xi = 0;
+		yi = 0;
+		xk = x->data;
+		yk = y->data;
+		xv = xk + h->keysize * BUCKETSIZE;
+		yv = yk + h->keysize * BUCKETSIZE;
+		do {
+			for(i = 0, k = b->data, v = k + h->keysize * BUCKETSIZE; i < BUCKETSIZE; i++, k += h->keysize, v += h->valuesize) {
+				if(b->tophash[i] == 0)
+					continue;
+				hash = h->hash0;
+				t->key->alg->hash(&hash, t->key->size, IK(h, k));
+				// NOTE: if key != key, then this hash could be (and probably will be)
+				// entirely different from the old hash.  We effectively only update
+				// the B'th bit of the hash in this case.
+				if((hash & newbit) == 0) {
+					if(xi == BUCKETSIZE) {
+						newx = runtime·mallocgc(h->bucketsize, 0, 1, 0);
+						clearbucket(newx);
+						x->overflow = newx;
+						x = newx;
+						xi = 0;
+						xk = x->data;
+						xv = xk + h->keysize * BUCKETSIZE;
+					}
+					x->tophash[xi] = b->tophash[i];
+					if((h->flags & IndirectKey) != 0) {
+						*(byte**)xk = *(byte**)k;               // copy pointer
+					} else {
+						t->key->alg->copy(t->key->size, xk, k); // copy value
+					}
+					if((h->flags & IndirectValue) != 0) {
+						*(byte**)xv = *(byte**)v;
+					} else {
+						t->elem->alg->copy(t->elem->size, xv, v);
+					}
+					xi++;
+					xk += h->keysize;
+					xv += h->valuesize;
+				} else {
+					if(yi == BUCKETSIZE) {
+						newy = runtime·mallocgc(h->bucketsize, 0, 1, 0);
+						clearbucket(newy);
+						y->overflow = newy;
+						y = newy;
+						yi = 0;
+						yk = y->data;
+						yv = yk + h->keysize * BUCKETSIZE;
+					}
+					y->tophash[yi] = b->tophash[i];
+					if((h->flags & IndirectKey) != 0) {
+						*(byte**)yk = *(byte**)k;
+					} else {
+						t->key->alg->copy(t->key->size, yk, k);
+					}
+					if((h->flags & IndirectValue) != 0) {
+						*(byte**)yv = *(byte**)v;
+					} else {
+						t->elem->alg->copy(t->elem->size, yv, v);
+					}
+					yi++;
+					yk += h->keysize;
+					yv += h->valuesize;
+				}
 			}
-		} else {
-			skip = src_i - dst_i;
+
+			// mark as evacuated so we don't do it again.
+			// this also tells any iterators that this data isn't golden anymore.
+			nextb = b->overflow;
+			b->overflow = (Bucket*)((uintptr)nextb + 1);
+
+			b = nextb;
+		} while(b != nil);
+	}
+
+	// advance evacuation mark
+	if(oldbucket == h->nevacuate) {
+		h->nevacuate = oldbucket + 1;
+		if(oldbucket + 1 == newbit) { // newbit == # of oldbuckets
+			h->oldbuckets = nil;
 		}
-		bytes = skip * elemsize;
-		memset (dst_e, HASH_NIL_MEMSET, bytes);
-		dst_e = HASH_OFFSET (dst_e, bytes);
-		dst_i += skip;
 	}
+	if(docheck)
+		check(t, h);
 }
 
-static int32
-hash_insert_internal (MapType*, struct hash_subtable **pst, int32 flags, hash_hash_t hash,
-		Hmap *h, void *data, void **pres);
-
 static void
-hash_conv (MapType *t, Hmap *h,
-		struct hash_subtable *st, int32 flags,
-		hash_hash_t hash,
-		struct hash_entry *e)
+grow_work(MapType *t, Hmap *h, uintptr bucket)
 {
-	int32 new_flags = (flags + HASH_MAKE_USED (st->power)) | HASH_REHASH;
-	int32 shift = HASH_BITS - HASH_USED (new_flags);
-	hash_hash_t prefix_mask = (-(hash_hash_t)1) << shift;
-	int32 elemsize = h->datasize + offsetof (struct hash_entry, data[0]);
-	void *dummy_result;
-	struct hash_entry *de;
-	int32 index_mask = (1 << st->power) - 1;
-	hash_hash_t e_hash;
-	struct hash_entry *pe = HASH_OFFSET (e, -elemsize);
+	uintptr noldbuckets;
 
-	while (e != st->entry && (e_hash = pe->hash) != HASH_NIL && (e_hash & HASH_MASK) != HASH_SUBHASH) {
-		e = pe;
-		pe = HASH_OFFSET (pe, -elemsize);
-	}
+	noldbuckets = (uintptr)1 << (h->B - 1);
 
-	de = e;
-	while (e <= st->last &&
-	    (e_hash = e->hash) != HASH_NIL &&
-	    (e_hash & HASH_MASK) != HASH_SUBHASH) {
-		struct hash_entry *target_e = HASH_OFFSET (st->entry, ((e_hash >> shift) & index_mask) * elemsize);
-		struct hash_entry *ne = HASH_OFFSET (e, elemsize);
-		hash_hash_t current = e_hash & prefix_mask;
-		if (de < target_e) {
-			memset (de, HASH_NIL_MEMSET, ((byte *) target_e) - (byte *) de);
-			de = target_e;
-		}
-		if ((hash & prefix_mask) == current ||
-		   (ne <= st->last && (e_hash = ne->hash) != HASH_NIL &&
-		   (e_hash & prefix_mask) == current)) {
-			struct hash_subtable *new_st = hash_subtable_new (h, 1, HASH_USED (new_flags));
-			int32 rc = hash_insert_internal (t, &new_st, new_flags, e->hash, h, e->data, &dummy_result);
-			assert (rc == 0);
-			memcpy(dummy_result, e->data, h->datasize);
-			e = ne;
-			while (e <= st->last && (e_hash = e->hash) != HASH_NIL && (e_hash & prefix_mask) == current) {
-				assert ((e_hash & HASH_MASK) != HASH_SUBHASH);
-				rc = hash_insert_internal (t, &new_st, new_flags, e_hash, h, e->data, &dummy_result);
-				assert (rc == 0);
-				memcpy(dummy_result, e->data, h->datasize);
-				e = HASH_OFFSET (e, elemsize);
-			}
-			memset (de->data, HASH_NIL_MEMSET, h->datasize);
-			*(struct hash_subtable **)de->data = new_st;
-			de->hash = current | HASH_SUBHASH;
-		} else {
-			if (e != de) {
-				memcpy (de, e, elemsize);
-			}
-			e = HASH_OFFSET (e, elemsize);
-		}
-		de = HASH_OFFSET (de, elemsize);
-	}
-	if (e != de) {
-		hash_remove_n (st, de, (((byte *) e) - (byte *) de) / elemsize);
-	}
+	// make sure we evacuate the oldbucket corresponding
+	// to the bucket we're about to use
+	evacuate(t, h, bucket & (noldbuckets - 1));
+
+	// evacuate one more oldbucket to make progress on growing
+	if(h->oldbuckets != nil)
+		evacuate(t, h, h->nevacuate);
 }
 
 static void
-hash_grow (MapType *t, Hmap *h, struct hash_subtable **pst, int32 flags)
+hash_grow(MapType *t, Hmap *h)
 {
-	struct hash_subtable *old_st = *pst;
-	int32 elemsize = h->datasize + offsetof (struct hash_entry, data[0]);
-	*pst = hash_subtable_new (h, old_st->power + 1, HASH_USED (flags));
-	struct hash_entry *last_e = old_st->last;
-	struct hash_entry *e;
-	void *dummy_result;
-	int32 used = 0;
+	byte *old_buckets;
+	byte *new_buckets;
+	uint8 flags;
 
-	flags |= HASH_REHASH;
-	for (e = old_st->entry; e <= last_e; e = HASH_OFFSET (e, elemsize)) {
-		hash_hash_t hash = e->hash;
-		if (hash != HASH_NIL) {
-			int32 rc = hash_insert_internal (t, pst, flags, e->hash, h, e->data, &dummy_result);
-			assert (rc == 0);
-			memcpy(dummy_result, e->data, h->datasize);
-			used++;
-		}
-	}
-	if (h->flag & CanFreeTable)
-		free (old_st);
+	// allocate a bigger hash table
+	if(h->oldbuckets != nil)
+		runtime·throw("evacuation not done in time");
+	old_buckets = h->buckets;
+	// NOTE: this could be a big malloc, but since we don't need zeroing it is probably fast.
+	new_buckets = runtime·mallocgc(h->bucketsize << (h->B + 1), 0, 1, 0);
+	flags = (h->flags & ~(Iterator | OldIterator));
+	if((h->flags & Iterator) != 0)
+		flags |= OldIterator;
+
+	// commit the grow (atomic wrt gc)
+	h->B++;
+	h->flags = flags;
+	h->oldbuckets = old_buckets;
+	h->buckets = new_buckets;
+	h->nevacuate = 0;
+
+	// the actual copying of the hash table data is done incrementally
+	// by grow_work() and evacuate().
+	if(docheck)
+		check(t, h);
 }
 
-static int32
-hash_lookup (MapType *t, Hmap *h, void *data, void **pres)
+// returns ptr to value associated with key *keyp, or nil if none.
+// if it returns non-nil, updates *keyp to point to the currently stored key.
+static byte*
+hash_lookup(MapType *t, Hmap *h, byte **keyp)
 {
-	int32 elemsize = h->datasize + offsetof (struct hash_entry, data[0]);
-	hash_hash_t hash;
-	struct hash_subtable *st = h->st;
-	int32 used = 0;
-	hash_hash_t e_hash;
-	struct hash_entry *e;
-	struct hash_entry *end_e;
 	void *key;
+	uintptr hash;
+	uintptr bucket;
+	Bucket *b;
+	uint8 top;
+	uintptr i;
 	bool eq;
+	byte *k, *k2, *v;
 
+	key = *keyp;
+	if(docheck)
+		check(t, h);
 	hash = h->hash0;
-	(*t->key->alg->hash) (&hash, t->key->size, data);
-	hash &= ~HASH_MASK;
-	hash += HASH_ADJUST (hash);
-	for (;;) {
-		int32 shift = HASH_BITS - (st->power + used);
-		int32 index_mask = (1 << st->power) - 1;
-		int32 i = (hash >> shift) & index_mask;	   /* i is the natural position of hash */
-
-		e = HASH_OFFSET (st->entry, i * elemsize); /* e points to element i */
-		e_hash = e->hash;
-		if ((e_hash & HASH_MASK) != HASH_SUBHASH) {      /* a subtable */
-			break;
-		}
-		used += st->power;
-		st = *(struct hash_subtable **)e->data;
-	}
-	end_e = HASH_OFFSET (e, st->limit_bytes);
-	while (e != end_e && (e_hash = e->hash) != HASH_NIL && e_hash < hash) {
-		e = HASH_OFFSET (e, elemsize);
-	}
-	while (e != end_e && ((e_hash = e->hash) ^ hash) < HASH_SUBHASH) {
-		key = e->data;
-		if (h->flag & IndirectKey)
-			key = *(void**)e->data;
-		if (HASH_DATA_EQ (eq, t, h, data, key)) {    /* a match */
-			*pres = e->data;
-			return (1);
+	t->key->alg->hash(&hash, t->key->size, key);
+	bucket = hash & (((uintptr)1 << h->B) - 1);
+	if(h->oldbuckets != nil)
+		grow_work(t, h, bucket);
+	b = (Bucket*)(h->buckets + bucket * h->bucketsize);
+	top = hash >> (sizeof(uintptr)*8 - 8);
+	if(top == 0)
+		top = 1;
+	do {
+		for(i = 0, k = b->data, v = k + h->keysize * BUCKETSIZE; i < BUCKETSIZE; i++, k += h->keysize, v += h->valuesize) {
+			if(b->tophash[i] == top) {
+				k2 = IK(h, k);
+				t->key->alg->equal(&eq, t->key->size, key, k2);
+				if(eq) {
+					*keyp = k2;
+					return IV(h, v);
+				}
+			}
 		}
-		e = HASH_OFFSET (e, elemsize);
-	}
-	USED(e_hash);
-	*pres = 0;
-	return (0);
+		b = b->overflow;
+	} while(b != nil);
+	return nil;
 }
 
-static int32
-hash_remove (MapType *t, Hmap *h, void *data)
-{
-	int32 elemsize = h->datasize + offsetof (struct hash_entry, data[0]);
-	hash_hash_t hash;
-	struct hash_subtable *st = h->st;
-	int32 used = 0;
-	hash_hash_t e_hash;
-	struct hash_entry *e;
-	struct hash_entry *end_e;
-	bool eq;
-	void *key;
+// When an item is not found, fast versions return a pointer to this zeroed memory.
+static uint8 empty_value[MAXVALUESIZE];
 
-	hash = h->hash0;
-	(*t->key->alg->hash) (&hash, t->key->size, data);
-	hash &= ~HASH_MASK;
-	hash += HASH_ADJUST (hash);
-	for (;;) {
-		int32 shift = HASH_BITS - (st->power + used);
-		int32 index_mask = (1 << st->power) - 1;
-		int32 i = (hash >> shift) & index_mask;	   /* i is the natural position of hash */
+// Specialized versions of mapaccess1 for specific types.
+// See ./hashmap_fast and ../../cmd/gc/walk.c.
+#define HASH_LOOKUP1 runtime·mapaccess1_fast32
+#define HASH_LOOKUP2 runtime·mapaccess2_fast32
+#define KEYTYPE uint32
+#define HASHFUNC runtime·algarray[AMEM32].hash
+#define EQFUNC(x,y) ((x) == (y))
+#define QUICKEQ(x) true
+#include "hashmap_fast.c"
 
-		e = HASH_OFFSET (st->entry, i * elemsize); /* e points to element i */
-		e_hash = e->hash;
-		if ((e_hash & HASH_MASK) != HASH_SUBHASH) {      /* a subtable */
-			break;
-		}
-		used += st->power;
-		st = *(struct hash_subtable **)e->data;
-	}
-	end_e = HASH_OFFSET (e, st->limit_bytes);
-	while (e != end_e && (e_hash = e->hash) != HASH_NIL && e_hash < hash) {
-		e = HASH_OFFSET (e, elemsize);
-	}
-	while (e != end_e && ((e_hash = e->hash) ^ hash) < HASH_SUBHASH) {
-		key = e->data;
-		if (h->flag & IndirectKey)
-			key = *(void**)e->data;
-		if (HASH_DATA_EQ (eq, t, h, data, key)) {    /* a match */
-			// Free key if indirect, but only if reflect can't be
-			// holding a pointer to it.  Deletions are rare,
-			// indirect (large) keys are rare, reflect on maps
-			// is rare.  So in the rare, rare, rare case of deleting
-			// an indirect key from a map that has been reflected on,
-			// we leave the key for garbage collection instead of
-			// freeing it here.
-			if (h->flag & CanFreeKey)
-				free (key);
-			if (h->flag & IndirectVal)
-				free (*(void**)((byte*)e->data + h->valoff));
-			hash_remove_n (st, e, 1);
-			h->count--;
-			return (1);
-		}
-		e = HASH_OFFSET (e, elemsize);
-	}
-	USED(e_hash);
-	return (0);
-}
+#undef HASH_LOOKUP1
+#undef HASH_LOOKUP2
+#undef KEYTYPE
+#undef HASHFUNC
+#undef EQFUNC
+#undef QUICKEQ
+
+#define HASH_LOOKUP1 runtime·mapaccess1_fast64
+#define HASH_LOOKUP2 runtime·mapaccess2_fast64
+#define KEYTYPE uint64
+#define HASHFUNC runtime·algarray[AMEM64].hash
+#define EQFUNC(x,y) ((x) == (y))
+#define QUICKEQ(x) true
+#include "hashmap_fast.c"
+
+#undef HASH_LOOKUP1
+#undef HASH_LOOKUP2
+#undef KEYTYPE
+#undef HASHFUNC
+#undef EQFUNC
+#undef QUICKEQ
 
-static int32
-hash_insert_internal (MapType *t, struct hash_subtable **pst, int32 flags, hash_hash_t hash,
-				 Hmap *h, void *data, void **pres)
+#define HASH_LOOKUP1 runtime·mapaccess1_faststr
+#define HASH_LOOKUP2 runtime·mapaccess2_faststr
+#define KEYTYPE String
+#define HASHFUNC runtime·algarray[ASTRING].hash
+#define EQFUNC(x,y) ((x).len == (y).len && ((x).str == (y).str || runtime·mcmp((x).str, (y).str, (x).len) == 0))
+#define QUICKEQ(x) ((x).len < 32)
+#include "hashmap_fast.c"
+
+static void
+hash_insert(MapType *t, Hmap *h, void *key, void *value)
 {
-	int32 elemsize = h->datasize + offsetof (struct hash_entry, data[0]);
+	uintptr hash;
+	uintptr bucket;
+	uintptr i;
 	bool eq;
+	Bucket *b;
+	Bucket *newb;
+	uint8 *inserti;
+	byte *insertk, *insertv;
+	uint8 top;
+	byte *k, *v;
+	byte *kmem, *vmem;
 
-	if ((flags & HASH_REHASH) == 0) {
-		hash += HASH_ADJUST (hash);
-		hash &= ~HASH_MASK;
-	}
-	for (;;) {
-		struct hash_subtable *st = *pst;
-		int32 shift = HASH_BITS - (st->power + HASH_USED (flags));
-		int32 index_mask = (1 << st->power) - 1;
-		int32 i = (hash >> shift) & index_mask;	   /* i is the natural position of hash */
-		struct hash_entry *start_e =
-			HASH_OFFSET (st->entry, i * elemsize);    /* start_e is the pointer to element i */
-		struct hash_entry *e = start_e;		   /* e is going to range over [start_e, end_e) */
-		struct hash_entry *end_e;
-		hash_hash_t e_hash = e->hash;
-
-		if ((e_hash & HASH_MASK) == HASH_SUBHASH) {      /* a subtable */
-			pst = (struct hash_subtable **) e->data;
-			flags += HASH_MAKE_USED (st->power);
-			continue;
-		}
-		end_e = HASH_OFFSET (start_e, st->limit_bytes);
-		while (e != end_e && (e_hash = e->hash) != HASH_NIL && e_hash < hash) {
-			e = HASH_OFFSET (e, elemsize);
-			i++;
-		}
-		if (e != end_e && e_hash != HASH_NIL) {
-			/* ins_e ranges over the elements that may match */
-			struct hash_entry *ins_e = e;
-			int32 ins_i = i;
-			hash_hash_t ins_e_hash;
-			void *key;
-			while (ins_e != end_e && ((e_hash = ins_e->hash) ^ hash) < HASH_SUBHASH) {
-				key = ins_e->data;
-				if (h->flag & IndirectKey)
-					key = *(void**)key;
-				if (HASH_DATA_EQ (eq, t, h, data, key)) {    /* a match */
-					*pres = ins_e->data;
-					return (1);
-				}
-				if (e_hash == hash) {	   /* adjust hash if it collides */
-					assert ((flags & HASH_REHASH) == 0);
-					hash++;
-					if ((hash & HASH_MASK) == HASH_SUBHASH)
-						runtime·throw("runtime: map hash collision overflow");
-				}
-				ins_e = HASH_OFFSET (ins_e, elemsize);
-				ins_i++;
-				if (e_hash <= hash) {	       /* set e to insertion point */
-					e = ins_e;
-					i = ins_i;
+	if(docheck)
+		check(t, h);
+	hash = h->hash0;
+	t->key->alg->hash(&hash, t->key->size, key);
+ again:
+	bucket = hash & (((uintptr)1 << h->B) - 1);
+	if(h->oldbuckets != nil)
+		grow_work(t, h, bucket);
+	b = (Bucket*)(h->buckets + bucket * h->bucketsize);
+	top = hash >> (sizeof(uintptr)*8 - 8);
+	if(top == 0)
+		top = 1;
+	inserti = 0;
+	insertk = nil;
+	insertv = nil;
+	while(true) {
+		for(i = 0, k = b->data, v = k + h->keysize * BUCKETSIZE; i < BUCKETSIZE; i++, k += h->keysize, v += h->valuesize) {
+			if(b->tophash[i] != top) {
+				if(b->tophash[i] == 0 && inserti == nil) {
+					inserti = &b->tophash[i];
+					insertk = k;
+					insertv = v;
 				}
+				continue;
 			}
-			/* set ins_e to the insertion point for the new element */
-			ins_e = e;
-			ins_i = i;
-			ins_e_hash = 0;
-			/* move ins_e to point at the end of the contiguous block, but
-			   stop if any element can't be moved by one up */
-			while (ins_e <= st->last && (ins_e_hash = ins_e->hash) != HASH_NIL &&
-			       ins_i + 1 - ((ins_e_hash >> shift) & index_mask) < st->max_probes &&
-			       (ins_e_hash & HASH_MASK) != HASH_SUBHASH) {
-				ins_e = HASH_OFFSET (ins_e, elemsize);
-				ins_i++;
-			}
-			if (e == end_e || ins_e > st->last || ins_e_hash != HASH_NIL) {
-				e = end_e;    /* can't insert; must grow or convert to subtable */
-			} else {	      /* make space for element */
-				memmove (HASH_OFFSET (e, elemsize), e, ((byte *) ins_e) - (byte *) e);
-			}
-		}
-		if (e != end_e) {
-			e->hash = hash;
-			*pres = e->data;
-			return (0);
-		}
-		h->changes++;
-		if (st->power < HASH_MAX_POWER) {
-			hash_grow (t, h, pst, flags);
-		} else {
-			hash_conv (t, h, st, flags, hash, start_e);
+			t->key->alg->equal(&eq, t->key->size, key, IK(h, k));
+			if(!eq)
+				continue;
+			// already have a mapping for key.  Update it.
+			t->key->alg->copy(t->key->size, IK(h, k), key); // Need to update key for keys which are distinct but equal (e.g. +0.0 and -0.0)
+			t->elem->alg->copy(t->elem->size, IV(h, v), value);
+			if(docheck)
+				check(t, h);
+			return;
 		}
+		if(b->overflow == nil)
+			break;
+		b = b->overflow;
 	}
-}
 
-static int32
-hash_insert (MapType *t, Hmap *h, void *data, void **pres)
-{
-	uintptr hash;
-	int32 rc;
-
-	hash = h->hash0;
-	(*t->key->alg->hash) (&hash, t->key->size, data);
-	rc = hash_insert_internal (t, &h->st, 0, hash, h, data, pres);
+	// did not find mapping for key.  Allocate new cell & add entry.
+	if(h->count >= LOAD * ((uintptr)1 << h->B) && h->count >= BUCKETSIZE) {
+		hash_grow(t, h);
+		goto again; // Growing the table invalidates everything, so try again
+	}
 
-	h->count += (rc == 0);    /* increment count if element didn't previously exist */
-	return (rc);
-}
+	if(inserti == nil) {
+		// all current buckets are full, allocate a new one.
+		newb = runtime·mallocgc(h->bucketsize, 0, 1, 0);
+		clearbucket(newb);
+		b->overflow = newb;
+		inserti = newb->tophash;
+		insertk = newb->data;
+		insertv = insertk + h->keysize * BUCKETSIZE;
+	}
 
-static uint32
-hash_count (Hmap *h)
-{
-	return (h->count);
+	// store new key/value at insert position
+	if((h->flags & IndirectKey) != 0) {
+		kmem = runtime·mallocgc(t->key->size, 0, 1, 0);
+		*(byte**)insertk = kmem;
+		insertk = kmem;
+	}
+	if((h->flags & IndirectValue) != 0) {
+		vmem = runtime·mallocgc(t->elem->size, 0, 1, 0);
+		*(byte**)insertv = vmem;
+		insertv = vmem;
+	}
+	t->key->alg->copy(t->key->size, insertk, key);
+	t->elem->alg->copy(t->elem->size, insertv, value);
+	*inserti = top;
+	h->count++;
+	if(docheck)
+		check(t, h);
 }
 
 static void
-iter_restart (struct hash_iter *it, struct hash_subtable *st, int32 used)
+hash_remove(MapType *t, Hmap *h, void *key)
 {
-	int32 elemsize = it->elemsize;
-	hash_hash_t last_hash = it->last_hash;
-	struct hash_entry *e;
-	hash_hash_t e_hash;
-	struct hash_iter_sub *sub = &it->subtable_state[it->i];
-	struct hash_entry *last;
-
-	for (;;) {
-		int32 shift = HASH_BITS - (st->power + used);
-		int32 index_mask = (1 << st->power) - 1;
-		int32 i = (last_hash >> shift) & index_mask;
-
-		last = st->last;
-		e = HASH_OFFSET (st->entry, i * elemsize);
-		sub->start = st->entry;
-		sub->last = last;
+	uintptr hash;
+	uintptr bucket;
+	Bucket *b;
+	uint8 top;
+	uintptr i;
+	byte *k, *v;
+	bool eq;
+	
+	if(docheck)
+		check(t, h);
+	hash = h->hash0;
+	t->key->alg->hash(&hash, t->key->size, key);
+	bucket = hash & (((uintptr)1 << h->B) - 1);
+	if(h->oldbuckets != nil)
+		grow_work(t, h, bucket);
+	b = (Bucket*)(h->buckets + bucket * h->bucketsize);
+	top = hash >> (sizeof(uintptr)*8 - 8);
+	if(top == 0)
+		top = 1;
+	do {
+		for(i = 0, k = b->data, v = k + h->keysize * BUCKETSIZE; i < BUCKETSIZE; i++, k += h->keysize, v += h->valuesize) {
+			if(b->tophash[i] != top)
+				continue;
+			t->key->alg->equal(&eq, t->key->size, key, IK(h, k));
+			if(!eq)
+				continue;
 
-		if ((e->hash & HASH_MASK) != HASH_SUBHASH) {
-			break;
+			b->tophash[i] = 0;
+			h->count--;
+			// TODO: free key if indirect.  Can't do it if
+			// there's any iterator ever, as indirect keys are aliased across
+			// buckets.
+			// TODO: consolidate buckets if they are mostly empty
+			// can only consolidate if there are no live iterators at this size.
+			if(docheck)
+				check(t, h);
+			return;
 		}
-		sub->e = HASH_OFFSET (e, elemsize);
-		sub = &it->subtable_state[++(it->i)];
-		used += st->power;
-		st = *(struct hash_subtable **)e->data;
-	}
-	while (e <= last && ((e_hash = e->hash) == HASH_NIL || e_hash <= last_hash)) {
-		e = HASH_OFFSET (e, elemsize);
-	}
-	sub->e = e;
+		b = b->overflow;
+	} while(b != nil);
 }
 
-static void *
-hash_next (struct hash_iter *it)
+// TODO: shrink the map, the same way we grow it.
+
+// If you modify hash_iter, also change cmd/gc/range.c to indicate
+// the size of this structure.
+struct hash_iter
 {
-	int32 elemsize;
-	struct hash_iter_sub *sub;
-	struct hash_entry *e;
-	struct hash_entry *last;
-	hash_hash_t e_hash;
+	uint8* key; // Must be in first position.  Write nil to indicate iteration end (see cmd/gc/range.c).
+	uint8* value;
 
-	if (it->changes != it->h->changes) {    /* hash table's structure changed; recompute */
-		if (~it->last_hash == 0)
-			return (0);
-		it->changes = it->h->changes;
-		it->i = 0;
-		iter_restart (it, it->h->st, 0);
-	}
-	elemsize = it->elemsize;
+	MapType *t;
+	Hmap *h;
 
-Again:
-	e_hash = 0;
-	sub = &it->subtable_state[it->i];
-	e = sub->e;
-	last = sub->last;
+	// end point for iteration
+	uintptr endbucket;
+	bool wrapped;
 
-	if (e != sub->start && it->last_hash != HASH_OFFSET (e, -elemsize)->hash) {
-		struct hash_entry *start = HASH_OFFSET (e, -(elemsize * HASH_MAX_PROBES));
-		struct hash_entry *pe = HASH_OFFSET (e, -elemsize);
-		hash_hash_t last_hash = it->last_hash;
-		if (start < sub->start) {
-			start = sub->start;
-		}
-		while (e != start && ((e_hash = pe->hash) == HASH_NIL || last_hash < e_hash)) {
-			e = pe;
-			pe = HASH_OFFSET (pe, -elemsize);
-		}
-		while (e <= last && ((e_hash = e->hash) == HASH_NIL || e_hash <= last_hash)) {
-			e = HASH_OFFSET (e, elemsize);
-		}
-	}
+	// state of table at time iterator is initialized
+	uint8 B;
+	byte *buckets;
 
-	for (;;) {
-		while (e <= last && (e_hash = e->hash) == HASH_NIL) {
-			e = HASH_OFFSET (e, elemsize);
-		}
-		if (e > last) {
-			if (it->i == 0) {
-				if(!it->cycled) {
-					// Wrap to zero and iterate up until it->cycle.
-					it->cycled = true;
-					it->last_hash = 0;
-					it->subtable_state[0].e = it->h->st->entry;
-					it->subtable_state[0].start = it->h->st->entry;
-					it->subtable_state[0].last = it->h->st->last;
-					goto Again;
-				}
-				// Set last_hash to impossible value and
-				// break it->changes, so that check at top of
-				// hash_next will be used if we get called again.
-				it->last_hash = ~(uintptr_t)0;
-				it->changes--;
-				return (0);
-			} else {
-				it->i--;
-				sub = &it->subtable_state[it->i];
-				e = sub->e;
-				last = sub->last;
-			}
-		} else if ((e_hash & HASH_MASK) != HASH_SUBHASH) {
-			if(it->cycled && e->hash > it->cycle) {
-				// Already returned this.
-				// Set last_hash to impossible value and
-				// break it->changes, so that check at top of
-				// hash_next will be used if we get called again.
-				it->last_hash = ~(uintptr_t)0;
-				it->changes--;
-				return (0);
-			}
-			it->last_hash = e->hash;
-			sub->e = HASH_OFFSET (e, elemsize);
-			return (e->data);
-		} else {
-			struct hash_subtable *st =
-				*(struct hash_subtable **)e->data;
-			sub->e = HASH_OFFSET (e, elemsize);
-			it->i++;
-			assert (it->i < sizeof (it->subtable_state) /
-					sizeof (it->subtable_state[0]));
-			sub = &it->subtable_state[it->i];
-			sub->e = e = st->entry;
-			sub->start = st->entry;
-			sub->last = last = st->last;
-		}
-	}
-}
+	// iter state
+	uintptr bucket;
+	struct Bucket *bptr;
+	uintptr i;
+};
 
+// iterator state:
+// bucket: the current bucket ID
+// b: the current Bucket in the chain
+// i: the next offset to check in the current bucket
 static void
-hash_iter_init (MapType *t, Hmap *h, struct hash_iter *it)
+hash_iter_init(MapType *t, Hmap *h, struct hash_iter *it)
 {
-	it->elemsize = h->datasize + offsetof (struct hash_entry, data[0]);
-	it->changes = h->changes;
-	it->i = 0;
-	it->h = h;
 	it->t = t;
-	it->last_hash = 0;
-	it->subtable_state[0].e = h->st->entry;
-	it->subtable_state[0].start = h->st->entry;
-	it->subtable_state[0].last = h->st->last;
+	it->h = h;
 
-	// fastrand1 returns 31 useful bits.
-	// We don't care about not having a bottom bit but we
-	// do want top bits.
-	if(sizeof(void*) == 8)
-		it->cycle = (uint64)runtime·fastrand1()<<33 | (uint64)runtime·fastrand1()<<2;
-	else
-		it->cycle = runtime·fastrand1()<<1;
-	it->cycled = false;
-	it->last_hash = it->cycle;
-	iter_restart(it, it->h->st, 0);
-}
+	// grab snapshot of bucket state
+	it->B = h->B;
+	it->buckets = h->buckets;
 
-static void
-clean_st (Hmap *h, struct hash_subtable *st, int32 *slots, int32 *used)
-{
-	int32 elemsize = st->datasize + offsetof (struct hash_entry, data[0]);
-	struct hash_entry *e = st->entry;
-	struct hash_entry *last = st->last;
-	int32 lslots = (((byte *) (last+1)) - (byte *) e) / elemsize;
-	int32 lused = 0;
+	// iterator state
+	it->bucket = it->endbucket = runtime·fastrand1() & (((uintptr)1 << h->B) - 1);
+	it->wrapped = false;
+	it->bptr = nil;
 
-	while (e <= last) {
-		hash_hash_t hash = e->hash;
-		if ((hash & HASH_MASK) == HASH_SUBHASH) {
-			clean_st (h, *(struct hash_subtable **)e->data, slots, used);
-		} else {
-			lused += (hash != HASH_NIL);
-		}
-		e = HASH_OFFSET (e, elemsize);
-	}
-	if (h->flag & CanFreeTable)
-		free (st);
-	*slots += lslots;
-	*used += lused;
+	h->flags |= Iterator;
 }
 
+// initializes it->key and it->value to the next key/value pair
+// in the iteration, or nil if we've reached the end.
 static void
-hash_destroy (Hmap *h)
+hash_next(struct hash_iter *it)
 {
-	int32 slots = 0;
-	int32 used = 0;
-
-	clean_st (h, h->st, &slots, &used);
-	free (h);
-}
+	Hmap *h;
+	MapType *t;
+	uintptr bucket;
+	Bucket *b;
+	uintptr i;
+	bool eq;
+	byte *k, *v;
+	byte *rk, *rv;
 
-static void
-hash_visit_internal (struct hash_subtable *st,
-		int32 used, int32 level,
-		void (*data_visit) (void *arg, int32 level, void *data),
-		void *arg)
-{
-	int32 elemsize = st->datasize + offsetof (struct hash_entry, data[0]);
-	struct hash_entry *e = st->entry;
-	int32 shift = HASH_BITS - (used + st->power);
-	int32 i = 0;
+	h = it->h;
+	t = it->t;
+	bucket = it->bucket;
+	b = it->bptr;
+	i = it->i;
 
-	while (e <= st->last) {
-		int32 index = ((e->hash >> (shift - 1)) >> 1) & ((1 << st->power) - 1);
-		if ((e->hash & HASH_MASK) == HASH_SUBHASH) {
-			  (*data_visit) (arg, level, e->data);
-			  hash_visit_internal (*(struct hash_subtable **)e->data,
-				used + st->power, level + 1, data_visit, arg);
-		} else {
-			  (*data_visit) (arg, level, e->data);
+next:
+	if(b == nil) {
+		if(bucket == it->endbucket && it->wrapped) {
+			// end of iteration
+			it->key = nil;
+			it->value = nil;
+			return;
+		}
+		if(h->oldbuckets != nil && it->B == h->B) {
+			// Iterator was started in the middle of a grow, and the grow isn't done yet.
+			// Make sure the bucket we're about to read is valid.
+			grow_work(t, h, bucket);
 		}
-		if (e->hash != HASH_NIL) {
-			  assert (i < index + st->max_probes);
-			  assert (index <= i);
+		b = (Bucket*)(it->buckets + bucket * h->bucketsize);
+		bucket++;
+		if(bucket == ((uintptr)1 << it->B)) {
+			bucket = 0;
+			it->wrapped = true;
 		}
-		e = HASH_OFFSET (e, elemsize);
-		i++;
+		i = 0;
 	}
+	k = b->data + h->keysize * i;
+	v = b->data + h->keysize * BUCKETSIZE + h->valuesize * i;
+	for(; i < BUCKETSIZE; i++, k += h->keysize, v += h->valuesize) {
+		if(b->tophash[i] != 0) {
+			if(!evacuated(b)) {
+				// this is the golden data, we can return it.
+				it->key = IK(h, k);
+				it->value = IV(h, v);
+			} else {
+				// The hash table has grown since the iterator was started.
+				// The golden data for this key is now somewhere else.
+				t->key->alg->equal(&eq, t->key->size, IK(h, k), IK(h, k));
+				if(eq) {
+					// Check the current hash table for the data.
+					// This code handles the case where the key
+					// has been deleted, updated, or deleted and reinserted.
+					// NOTE: we need to regrab the key as it has potentially been
+					// updated to an equal() but not identical key (e.g. +0.0 vs -0.0).
+					rk = IK(h, k);
+					rv = hash_lookup(t, it->h, &rk);
+					if(rv == nil)
+						continue; // key has been deleted
+					it->key = rk;
+					it->value = rv;
+				} else {
+					// if key!=key then the entry can't be deleted or
+					// updated, so we can just return it.  That's lucky for
+					// us because when key!=key we can't look it up
+					// successfully in the current table.
+					it->key = IK(h, k);
+					it->value = IV(h, v);
+				}
+			}
+			it->bucket = bucket;
+			it->bptr = b;
+			it->i = i + 1;
+			return;
+		}
+	}
+	b = overflowptr(b);
+	i = 0;
+	goto next;
 }
 
-static void
-hash_visit (Hmap *h, void (*data_visit) (void *arg, int32 level, void *data), void *arg)
-{
-	hash_visit_internal (h->st, 0, 0, data_visit, arg);
-}
+
+#define PHASE_BUCKETS      0
+#define PHASE_OLD_BUCKETS  1
+#define PHASE_TABLE        2
+#define PHASE_OLD_TABLE    3
+#define PHASE_DONE         4
 
 // Initialize the iterator.
 // Returns false if Hmap contains no pointers (in which case the iterator is not initialized).
@@ -713,73 +803,141 @@ bool
 hash_gciter_init (Hmap *h, struct hash_gciter *it)
 {
 	// GC during map initialization
-	if(h->st == nil)
+	if(h->buckets == nil)
 		return false;
 
-	it->elemsize = h->datasize + offsetof (struct hash_entry, data[0]);
-	it->flag = h->flag;
-	it->valoff = h->valoff;
-	it->i = 0;
-	it->st = h->st;
-	it->subtable_state[it->i].e = h->st->entry;
-	it->subtable_state[it->i].last = h->st->last;
+	it->h = h;
+	it->phase = PHASE_BUCKETS;
+	it->bucket = 0;
+	it->b = nil;
+
+	// TODO: finish evacuating oldbuckets so that we can collect
+	// oldbuckets?  We don't want to keep a partially evacuated
+	// table around forever, so each gc could make at least some
+	// evacuation progress.  Need to be careful about concurrent
+	// access if we do concurrent gc.  Even if not, we don't want
+	// to make the gc pause any longer than it has to be.
+
 	return true;
 }
 
-// Returns true and fills *data with subtable/key/value data,
+// Returns true and fills *data with internal structure/key/value data,
 // or returns false if the iterator has terminated.
+// Ugh, this interface is really annoying.  I want a callback fn!
 bool
-hash_gciter_next (struct hash_gciter *it, struct hash_gciter_data *data)
+hash_gciter_next(struct hash_gciter *it, struct hash_gciter_data *data)
 {
-	struct hash_entry *e;
-	struct hash_gciter_sub *sub;
+	Hmap *h;
+	uintptr bucket, oldbucket;
+	Bucket *b, *oldb;
+	uintptr i;
+	byte *k, *v;
+
+	h = it->h;
+	bucket = it->bucket;
+	b = it->b;
+	i = it->i;
 
 	data->st = nil;
 	data->key_data = nil;
 	data->val_data = nil;
+	data->indirectkey = (h->flags & IndirectKey) != 0;
+	data->indirectval = (h->flags & IndirectValue) != 0;
 
-	// pointer to the first-level table
-	if(it->st != nil) {
-		data->st = it->st;
-		it->st = nil;
-		return true;
-	}
-
-popped:
-	sub = &it->subtable_state[it->i];
-	e = sub->e;
-	while (e <= sub->last) {
-		if ((e->hash & HASH_MASK) == HASH_SUBHASH) {
-			struct hash_subtable *st = *(struct hash_subtable **)e->data;
-			data->st = st;
-			sub->e = HASH_OFFSET (e, it->elemsize);
-
-			// push
-			it->i++;
-			assert (it->i < nelem(it->subtable_state));
-			sub++;
-			sub->e = st->entry;
-			sub->last = st->last;
-
-			return true;
+next:
+	switch (it->phase) {
+	case PHASE_BUCKETS:
+		if(b != nil) {
+			k = b->data + h->keysize * i;
+			v = b->data + h->keysize * BUCKETSIZE + h->valuesize * i;
+			for(; i < BUCKETSIZE; i++, k += h->keysize, v += h->valuesize) {
+				if(b->tophash[i] != 0) {
+					data->key_data = k;
+					data->val_data = v;
+					it->bucket = bucket;
+					it->b = b;
+					it->i = i + 1;
+					return true;
+				}
+			}
+			b = b->overflow;
+			if(b != nil) {
+				data->st = (byte*)b;
+				it->bucket = bucket;
+				it->b = b;
+				it->i = 0;
+				return true;
+			}
 		}
-		if(e->hash != HASH_NIL) {
-			void *key_data = e->data;
-			void *val_data = (byte*)e->data + it->valoff;
-			data->key_data = key_data;
-			data->val_data = val_data;
-			data->indirectkey = (it->flag & IndirectKey) != 0;
-			data->indirectval = (it->flag & IndirectVal) != 0;
-			sub->e = HASH_OFFSET (e, it->elemsize);
+		while(bucket < ((uintptr)1 << h->B)) {
+			if(h->oldbuckets != nil) {
+				oldbucket = bucket & (((uintptr)1 << (h->B - 1)) - 1);
+				oldb = (Bucket*)(h->oldbuckets + oldbucket * h->bucketsize);
+				if(!evacuated(oldb)) {
+					// new bucket isn't valid yet
+					bucket++;
+					continue;
+				}
+			}
+			b = (Bucket*)(h->buckets + bucket * h->bucketsize);
+			i = 0;
+			bucket++;
+			goto next;
+		}
+		it->phase = PHASE_OLD_BUCKETS;
+		bucket = 0;
+		b = nil;
+		goto next;
+	case PHASE_OLD_BUCKETS:
+		if(h->oldbuckets == nil) {
+			it->phase = PHASE_TABLE;
+			goto next;
+		}
+		if(b != nil) {
+			k = b->data + h->keysize * i;
+			v = b->data + h->keysize * BUCKETSIZE + h->valuesize * i;
+			for(; i < BUCKETSIZE; i++, k += h->keysize, v += h->valuesize) {
+				if(b->tophash[i] != 0) {
+					data->key_data = k;
+					data->val_data = v;
+					it->bucket = bucket;
+					it->b = b;
+					it->i = i + 1;
+					return true;
+				}
+			}
+			b = overflowptr(b);
+			if(b != nil) {
+				data->st = (byte*)b;
+				it->bucket = bucket;
+				it->b = b;
+				it->i = 0;
+				return true;
+			}
+		}
+		if(bucket < ((uintptr)1 << (h->B - 1))) {
+			b = (Bucket*)(h->oldbuckets + bucket * h->bucketsize);
+			bucket++;
+			i = 0;
+			goto next;
+		}
+		it->phase = PHASE_TABLE;
+		goto next;
+	case PHASE_TABLE:
+		it->phase = PHASE_OLD_TABLE;
+		data->st = h->buckets;
+		return true;
+	case PHASE_OLD_TABLE:
+		it->phase = PHASE_DONE;
+		if(h->oldbuckets != nil) {
+			data->st = h->oldbuckets;
 			return true;
+		} else {
+			goto next;
 		}
-		e = HASH_OFFSET (e, it->elemsize);
-	}
-	if(it->i != 0) {
-		// pop
-		it->i--;
-		goto popped;
 	}
+	if(it->phase != PHASE_DONE)
+		runtime·throw("bad phase at done");
 	return false;
 }
 
@@ -787,35 +945,13 @@ popped:
 /// interfaces to go runtime
 //
 
-static void**
-hash_valptr(Hmap *h, void *p)
-{
-	p = (byte*)p + h->valoff;
-	if(h->flag & IndirectVal)
-		p = *(void**)p;
-	return p;
-}
-
-
-static void**
-hash_keyptr(Hmap *h, void *p)
-{
-	if(h->flag & IndirectKey)
-		p = *(void**)p;
-	return p;
-}
-
-static	int32	debug	= 0;
-
 Hmap*
 runtime·makemap_c(MapType *typ, int64 hint)
 {
 	Hmap *h;
-	Type *key, *val;
-	uintptr ksize, vsize;
+	Type *key;
 
 	key = typ->key;
-	val = typ->elem;
 
 	if(hint < 0 || (int32)hint != hint)
 		runtime·panicstring("makemap: size out of range");
@@ -824,7 +960,6 @@ runtime·makemap_c(MapType *typ, int64 hint)
 		runtime·throw("runtime.makemap: unsupported map key type");
 
 	h = runtime·mal(sizeof(*h));
-	h->flag |= CanFreeTable;  /* until reflect gets involved, free is okay */
 
 	if(UseSpanType) {
 		if(false) {
@@ -833,34 +968,14 @@ runtime·makemap_c(MapType *typ, int64 hint)
 		runtime·settype(h, (uintptr)typ | TypeInfo_Map);
 	}
 
-	ksize = ROUND(key->size, sizeof(void*));
-	vsize = ROUND(val->size, sizeof(void*));
-	if(ksize > MaxData || vsize > MaxData || ksize+vsize > MaxData) {
-		// Either key is too big, or value is, or combined they are.
-		// Prefer to keep the key if possible, because we look at
-		// keys more often than values.
-		if(ksize > MaxData - sizeof(void*)) {
-			// No choice but to indirect the key.
-			h->flag |= IndirectKey;
-			h->flag |= CanFreeKey;  /* until reflect gets involved, free is okay */
-			ksize = sizeof(void*);
-		}
-		if(vsize > MaxData - ksize) {
-			// Have to indirect the value.
-			h->flag |= IndirectVal;
-			vsize = sizeof(void*);
-		}
-	}
-
-	h->valoff = ksize;
-	hash_init(h, ksize+vsize, hint);
+	hash_init(typ, h, hint);
 
 	// these calculations are compiler dependent.
 	// figure out offsets of map call arguments.
 
 	if(debug) {
 		runtime·printf("makemap: map=%p; keysize=%p; valsize=%p; keyalg=%p; valalg=%p\n",
-			h, key->size, val->size, key->alg, val->alg);
+			       h, key->size, typ->elem->size, key->alg, typ->elem->alg);
 	}
 
 	return h;
@@ -890,7 +1005,7 @@ runtime·mapaccess(MapType *t, Hmap *h, byte *ak, byte *av, bool *pres)
 	Type *elem;
 
 	elem = t->elem;
-	if(h == nil) {
+	if(h == nil || h->count == 0) {
 		elem->alg->copy(elem->size, av, nil);
 		*pres = false;
 		return;
@@ -899,10 +1014,11 @@ runtime·mapaccess(MapType *t, Hmap *h, byte *ak, byte *av, bool *pres)
 	if(runtime·gcwaiting)
 		runtime·gosched();
 
-	res = nil;
-	if(hash_lookup(t, h, ak, (void**)&res)) {
+	res = hash_lookup(t, h, &ak);
+
+	if(res != nil) {
 		*pres = true;
-		elem->alg->copy(elem->size, av, hash_valptr(h, res));
+		elem->alg->copy(elem->size, av, res);
 	} else {
 		*pres = false;
 		elem->alg->copy(elem->size, av, nil);
@@ -915,7 +1031,7 @@ void
 runtime·mapaccess1(MapType *t, Hmap *h, ...)
 {
 	byte *ak, *av;
-	bool pres;
+	byte *res;
 
 	if(raceenabled && h != nil)
 		runtime·racereadpc(h, runtime·getcallerpc(&t), runtime·mapaccess1);
@@ -923,7 +1039,12 @@ runtime·mapaccess1(MapType *t, Hmap *h, ...)
 	ak = (byte*)(&h + 1);
 	av = ak + ROUND(t->key->size, Structrnd);
 
-	runtime·mapaccess(t, h, ak, av, &pres);
+	if(h == nil || h->count == 0) {
+		t->elem->alg->copy(t->elem->size, av, nil);
+	} else {
+		res = hash_lookup(t, h, &ak);
+		t->elem->alg->copy(t->elem->size, av, res);
+	}
 
 	if(debug) {
 		runtime·prints("runtime.mapaccess1: map=");
@@ -932,8 +1053,6 @@ runtime·mapaccess1(MapType *t, Hmap *h, ...)
 		t->key->alg->print(t->key->size, ak);
 		runtime·prints("; val=");
 		t->elem->alg->print(t->elem->size, av);
-		runtime·prints("; pres=");
-		runtime·printbool(pres);
 		runtime·prints("\n");
 	}
 }
@@ -960,7 +1079,7 @@ runtime·mapaccess2(MapType *t, Hmap *h, ...)
 		runtime·prints("; key=");
 		t->key->alg->print(t->key->size, ak);
 		runtime·prints("; val=");
-		t->elem->alg->print(t->key->size, av);
+		t->elem->alg->print(t->elem->size, av);
 		runtime·prints("; pres=");
 		runtime·printbool(*ap);
 		runtime·prints("\n");
@@ -999,9 +1118,6 @@ reflect·mapaccess(MapType *t, Hmap *h, uintptr key, uintptr val, bool pres)
 void
 runtime·mapassign(MapType *t, Hmap *h, byte *ak, byte *av)
 {
-	byte *res;
-	int32 hit;
-
 	if(h == nil)
 		runtime·panicstring("assignment to entry in nil map");
 
@@ -1010,21 +1126,9 @@ runtime·mapassign(MapType *t, Hmap *h, byte *ak, byte *av)
 
 	if(av == nil) {
 		hash_remove(t, h, ak);
-		return;
-	}
-
-	res = nil;
-	hit = hash_insert(t, h, ak, (void**)&res);
-	if(!hit) {
-		// Need to pass dogc=0 to runtime·mallocgc because the garbage collector
-		// is assuming that all hashmaps are in a consistent state.
-		if(h->flag & IndirectKey)
-			*(void**)res = runtime·mallocgc(t->key->size, 0, 0, 1);
-		if(h->flag & IndirectVal)
-			*(void**)(res+h->valoff) = runtime·mallocgc(t->elem->size, 0, 0, 1);
+	} else {
+		hash_insert(t, h, ak, av);
 	}
-	t->key->alg->copy(t->key->size, hash_keyptr(h, res), ak);
-	t->elem->alg->copy(t->elem->size, hash_valptr(h, res), av);
 
 	if(debug) {
 		runtime·prints("mapassign: map=");
@@ -1033,10 +1137,6 @@ runtime·mapassign(MapType *t, Hmap *h, byte *ak, byte *av)
 		t->key->alg->print(t->key->size, ak);
 		runtime·prints("; val=");
 		t->elem->alg->print(t->elem->size, av);
-		runtime·prints("; hit=");
-		runtime·printint(hit);
-		runtime·prints("; res=");
-		runtime·printpointer(res);
 		runtime·prints("\n");
 	}
 }
@@ -1114,20 +1214,20 @@ void
 runtime·mapiterinit(MapType *t, Hmap *h, struct hash_iter *it)
 {
 	if(h == nil) {
-		it->data = nil;
+		it->key = nil;
 		return;
 	}
 	if(raceenabled)
 		runtime·racereadpc(h, runtime·getcallerpc(&t), runtime·mapiterinit);
 	hash_iter_init(t, h, it);
-	it->data = hash_next(it);
+	hash_next(it);
 	if(debug) {
 		runtime·prints("runtime.mapiterinit: map=");
 		runtime·printpointer(h);
 		runtime·prints("; iter=");
 		runtime·printpointer(it);
-		runtime·prints("; data=");
-		runtime·printpointer(it->data);
+		runtime·prints("; key=");
+		runtime·printpointer(it->key);
 		runtime·prints("\n");
 	}
 }
@@ -1137,20 +1237,18 @@ runtime·mapiterinit(MapType *t, Hmap *h, struct hash_iter *it)
 void
 reflect·mapiterinit(MapType *t, Hmap *h, struct hash_iter *it)
 {
-	uint8 flag;
+	uint8 flags;
 
 	if(h != nil && t->key->size > sizeof(void*)) {
 		// reflect·mapiterkey returns pointers to key data,
 		// and reflect holds them, so we cannot free key data
-		// eagerly anymore.  Updating h->flag now is racy,
-		// but it's okay because this is the only possible store
-		// after creation.
-		flag = h->flag;
-		if(flag & IndirectKey)
-			flag &= ~CanFreeKey;
+		// eagerly anymore.
+		flags = h->flags;
+		if(flags & IndirectKey)
+			flags &= ~CanFreeKey;
 		else
-			flag &= ~CanFreeTable;
-		h->flag = flag;
+			flags &= ~CanFreeBucket;
+		h->flags = flags;
 	}
 
 	it = runtime·mal(sizeof *it);
@@ -1167,12 +1265,12 @@ runtime·mapiternext(struct hash_iter *it)
 	if(runtime·gcwaiting)
 		runtime·gosched();
 
-	it->data = hash_next(it);
+	hash_next(it);
 	if(debug) {
 		runtime·prints("runtime.mapiternext: iter=");
 		runtime·printpointer(it);
-		runtime·prints("; data=");
-		runtime·printpointer(it->data);
+		runtime·prints("; key=");
+		runtime·printpointer(it->key);
 		runtime·prints("\n");
 	}
 }
@@ -1190,25 +1288,23 @@ reflect·mapiternext(struct hash_iter *it)
 void
 runtime·mapiter1(struct hash_iter *it, ...)
 {
-	Hmap *h;
 	byte *ak, *res;
 	Type *key;
 
-	h = it->h;
 	ak = (byte*)(&it + 1);
 
-	res = it->data;
+	res = it->key;
 	if(res == nil)
 		runtime·throw("runtime.mapiter1: key:val nil pointer");
 
 	key = it->t->key;
-	key->alg->copy(key->size, ak, hash_keyptr(h, res));
+	key->alg->copy(key->size, ak, res);
 
 	if(debug) {
-		runtime·prints("mapiter2: iter=");
+		runtime·prints("mapiter1: iter=");
 		runtime·printpointer(it);
 		runtime·prints("; map=");
-		runtime·printpointer(h);
+		runtime·printpointer(it->h);
 		runtime·prints("\n");
 	}
 }
@@ -1219,11 +1315,11 @@ runtime·mapiterkey(struct hash_iter *it, void *ak)
 	byte *res;
 	Type *key;
 
-	res = it->data;
+	res = it->key;
 	if(res == nil)
 		return false;
 	key = it->t->key;
-	key->alg->copy(key->size, ak, hash_keyptr(it->h, res));
+	key->alg->copy(key->size, ak, res);
 	return true;
 }
 
@@ -1239,14 +1335,13 @@ reflect·mapiterkey(struct hash_iter *it, uintptr key, bool ok)
 
 	key = 0;
 	ok = false;
-	res = it->data;
+	res = it->key;
 	if(res == nil) {
 		key = 0;
 		ok = false;
 	} else {
 		tkey = it->t->key;
 		key = 0;
-		res = (byte*)hash_keyptr(it->h, res);
 		if(tkey->size <= sizeof(key))
 			tkey->alg->copy(tkey->size, (byte*)&key, res);
 		else
@@ -1278,7 +1373,6 @@ reflect·maplen(Hmap *h, intgo len)
 void
 runtime·mapiter2(struct hash_iter *it, ...)
 {
-	Hmap *h;
 	byte *ak, *av, *res;
 	MapType *t;
 
@@ -1286,19 +1380,18 @@ runtime·mapiter2(struct hash_iter *it, ...)
 	ak = (byte*)(&it + 1);
 	av = ak + ROUND(t->key->size, t->elem->align);
 
-	res = it->data;
+	res = it->key;
 	if(res == nil)
 		runtime·throw("runtime.mapiter2: key:val nil pointer");
 
-	h = it->h;
-	t->key->alg->copy(t->key->size, ak, hash_keyptr(h, res));
-	t->elem->alg->copy(t->elem->size, av, hash_valptr(h, res));
+	t->key->alg->copy(t->key->size, ak, res);
+	t->elem->alg->copy(t->elem->size, av, it->value);
 
 	if(debug) {
 		runtime·prints("mapiter2: iter=");
 		runtime·printpointer(it);
 		runtime·prints("; map=");
-		runtime·printpointer(h);
+		runtime·printpointer(it->h);
 		runtime·prints("\n");
 	}
 }
diff --git a/src/pkg/runtime/hashmap.h b/src/pkg/runtime/hashmap.h
index 9b82f299e0..2988417f68 100644
--- a/src/pkg/runtime/hashmap.h
+++ b/src/pkg/runtime/hashmap.h
@@ -2,180 +2,28 @@
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.
 
-/* A hash table.
-   Example, hashing nul-terminated char*s:
-	hash_hash_t str_hash (void *v) {
-		char *s;
-		hash_hash_t hash = 0;
-		for (s = *(char **)v; *s != 0; s++) {
-			hash = (hash ^ *s) * 2654435769U;
-		}
-		return (hash);
-	}
-	int str_eq (void *a, void *b) {
-		return (strcmp (*(char **)a, *(char **)b) == 0);
-	}
-	void str_del (void *arg, void *data) {
-		*(char **)arg = *(char **)data;
-	}
-
-	struct hash *h = hash_new (sizeof (char *), &str_hash, &str_eq, &str_del, 3, 12, 15);
-	...  3=> 2**3  entries initial size
-	... 12=> 2**12 entries before sprouting sub-tables
-	... 15=> number of adjacent probes to attempt before growing
-
-  Example lookup:
-	char *key = "foobar";
-	char **result_ptr;
-	if (hash_lookup (h, &key, (void **) &result_ptr)) {
-	      printf ("found in table: %s\n", *result_ptr);
-	} else {
-	      printf ("not found in table\n");
-	}
-
-  Example insertion:
-	char *key = strdup ("foobar");
-	char **result_ptr;
-	if (hash_lookup (h, &key, (void **) &result_ptr)) {
-	      printf ("found in table: %s\n", *result_ptr);
-	      printf ("to overwrite, do   *result_ptr = key\n");
-	} else {
-	      printf ("not found in table; inserted as %s\n", *result_ptr);
-	      assert (*result_ptr == key);
-	}
-
-  Example deletion:
-	char *key = "foobar";
-	char *result;
-	if (hash_remove (h, &key, &result)) {
-	      printf ("key found and deleted from table\n");
-	      printf ("called str_del (&result, data) to copy data to result: %s\n", result);
-	} else {
-	      printf ("not found in table\n");
-	}
-
-  Example iteration over the elements of *h:
-	char **data;
-	struct hash_iter it;
-	hash_iter_init (h, &it);
-	for (data = hash_next (&it); data != 0; data = hash_next (&it)) {
-	    printf ("%s\n", *data);
-	}
- */
-
-#define	memset(a,b,c)	runtime·memclr((byte*)(a), (uint32)(c))
-#define	memcpy(a,b,c)	runtime·memmove((byte*)(a),(byte*)(b),(uint32)(c))
-#define	assert(a)	if(!(a)) runtime·throw("hashmap assert")
-#define free(x)	runtime·free(x)
-#define memmove(a,b,c)	runtime·memmove(a, b, c)
-
 struct Hmap;		/* opaque */
-struct hash_subtable;	/* opaque */
-struct hash_entry;	/* opaque */
-
-typedef uintptr uintptr_t;
-typedef uintptr_t hash_hash_t;
-
-struct hash_iter {
-	uint8*	data;		/* returned from next */
-	int32	elemsize;	/* size of elements in table */
-	int32	changes;	/* number of changes observed last time */
-	int32	i;		/* stack pointer in subtable_state */
-	bool cycled;		/* have reached the end and wrapped to 0 */
-	hash_hash_t last_hash;	/* last hash value returned */
-	hash_hash_t cycle;	/* hash value where we started */
-	struct Hmap *h;		/* the hash table */
-	MapType *t;			/* the map type */
-	struct hash_iter_sub {
-		struct hash_entry *e;		/* pointer into subtable */
-		struct hash_entry *start;	/* start of subtable */
-		struct hash_entry *last;		/* last entry in subtable */
-	} subtable_state[4];	/* Should be large enough unless the hashing is
-				   so bad that many distinct data values hash
-				   to the same hash value.  */
-};
-
-/* Return a hashtable h 2**init_power empty entries, each with
-   "datasize" data bytes.
-   (*data_hash)(a) should return the hash value of data element *a.
-   (*data_eq)(a,b) should return whether the data at "a" and the data at "b"
-   are equal.
-   (*data_del)(arg, a) will be invoked when data element *a is about to be removed
-   from the table.  "arg" is the argument passed to "hash_remove()".
-
-   Growing is accomplished by resizing if the current tables size is less than
-   a threshold, and by adding subtables otherwise.  hint should be set
-   the expected maximum size of the table.
-   "datasize" should be in [sizeof (void*), ..., 255].  If you need a
-   bigger "datasize", store a pointer to another piece of memory. */
-
-//struct hash *hash_new (int32 datasize,
-//		hash_hash_t (*data_hash) (void *),
-//		int32 (*data_eq) (void *, void *),
-//		void (*data_del) (void *, void *),
-//		int64 hint);
-
-/* Lookup *data in *h.   If the data is found, return 1 and place a pointer to
-   the found element in *pres.   Otherwise return 0 and place 0 in *pres. */
-// int32 hash_lookup (struct hash *h, void *data, void **pres);
-
-/* Lookup *data in *h.  If the data is found, execute (*data_del) (arg, p)
-   where p points to the data in the table, then remove it from *h and return
-   1.  Otherwise return 0.  */
-// int32 hash_remove (struct hash *h, void *data, void *arg);
-
-/* Lookup *data in *h.   If the data is found, return 1, and place a pointer
-   to the found element in *pres.   Otherwise, return 0, allocate a region
-   for the data to be inserted, and place a pointer to the inserted element
-   in *pres; it is the caller's responsibility to copy the data to be
-   inserted to the pointer returned in *pres in this case.
-
-   If using garbage collection, it is the caller's responsibility to
-   add references for **pres if HASH_ADDED is returned. */
-// int32 hash_insert (struct hash *h, void *data, void **pres);
-
-/* Return the number of elements in the table. */
-// uint32 hash_count (struct hash *h);
-
-/* The following call is useful only if not using garbage collection on the
-   table.
-   Remove all sub-tables associated with *h.
-   This undoes the effects of hash_init().
-   If other memory pointed to by user data must be freed, the caller is
-   responsible for doing so by iterating over *h first; see
-   hash_iter_init()/hash_next().  */
-// void hash_destroy (struct hash *h);
-
-/*----- iteration -----*/
-
-/* Initialize *it from *h. */
-// void hash_iter_init (struct hash *h, struct hash_iter *it);
-
-/* Return the next used entry in the table with which *it was initialized. */
-// void *hash_next (struct hash_iter *it);
-
-/*---- test interface ----*/
-/* Call (*data_visit) (arg, level, data) for every data entry in the table,
-   whether used or not.   "level" is the subtable level, 0 means first level. */
-/* TESTING ONLY: DO NOT USE THIS ROUTINE IN NORMAL CODE */
-// void hash_visit (struct hash *h, void (*data_visit) (void *arg, int32 level, void *data), void *arg);
 
 /* Used by the garbage collector */
 struct hash_gciter
 {
-	int32	elemsize;
-	uint8	flag;
-	uint8	valoff;
-	uint32	i;		/* stack pointer in subtable_state */
-	struct hash_subtable *st;
-	struct hash_gciter_sub {
-		struct hash_entry *e;		/* pointer into subtable */
-		struct hash_entry *last;	/* last entry in subtable */
-	} subtable_state[4];
+	Hmap *h;
+	int32 phase;
+	uintptr bucket;
+	struct Bucket *b;
+	uintptr i;
 };
+
+// this data is used by the garbage collector to keep the map's
+// internal structures from being reclaimed.  The iterator must
+// return in st every live object (ones returned by mallocgc) so
+// that those objects won't be collected, and it must return
+// every key & value in key_data/val_data so they can get scanned
+// for pointers they point to.  Note that if you malloc storage
+// for keys and values, you need to do both.
 struct hash_gciter_data
 {
-	struct hash_subtable *st;	/* subtable pointer, or nil */
+	uint8 *st;			/* internal structure, or nil */
 	uint8 *key_data;		/* key data, or nil */
 	uint8 *val_data;		/* value data, or nil */
 	bool indirectkey;		/* storing pointers to keys */
diff --git a/src/pkg/runtime/hashmap_fast.c b/src/pkg/runtime/hashmap_fast.c
new file mode 100644
index 0000000000..2169f4c300
--- /dev/null
+++ b/src/pkg/runtime/hashmap_fast.c
@@ -0,0 +1,149 @@
+// Copyright 2013 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Fast hashmap lookup specialized to a specific key type.
+// Included by hashmap.c once for each specialized type.
+
+// Note that this code differs from hash_lookup in that
+// it returns a pointer to the result, not the result itself.
+// The returned pointer is only valid until the next GC
+// point, so the caller must dereference it before then.
+
+// +build ignore
+
+#pragma textflag 7
+void
+HASH_LOOKUP1(MapType *t, Hmap *h, KEYTYPE key, byte *value)
+{
+	uintptr hash;
+	uintptr bucket;
+	Bucket *b;
+	uint8 top;
+	uintptr i;
+	KEYTYPE *k;
+	byte *v;
+
+	if(debug) {
+		runtime·prints("runtime.mapaccess1_fastXXX: map=");
+		runtime·printpointer(h);
+		runtime·prints("; key=");
+		t->key->alg->print(t->key->size, &key);
+		runtime·prints("\n");
+	}
+	if(h == nil || h->count == 0) {
+		value = empty_value;
+		FLUSH(&value);
+		return;
+	}
+	if(raceenabled)
+		runtime·racereadpc(h, runtime·getcallerpc(&t), HASH_LOOKUP1);
+	if(docheck)
+		check(t, h);
+
+	if(h->B == 0 && (h->count == 1 || QUICKEQ(key))) {
+		// One-bucket table.  Don't hash, just check each bucket entry.
+		b = (Bucket*)h->buckets;
+		for(i = 0, k = (KEYTYPE*)b->data, v = (byte*)(k + BUCKETSIZE); i < BUCKETSIZE; i++, k++, v += h->valuesize) {
+			if(b->tophash[i] != 0 && EQFUNC(key, *k)) {
+				value = v;
+				FLUSH(&value);
+				return;
+			}
+		}
+	} else {
+		hash = h->hash0;
+		HASHFUNC(&hash, sizeof(KEYTYPE), &key);
+		bucket = hash & (((uintptr)1 << h->B) - 1);
+		if(h->oldbuckets != nil)
+			grow_work(t, h, bucket);
+		b = (Bucket*)(h->buckets + bucket * (offsetof(Bucket, data[0]) + BUCKETSIZE * sizeof(KEYTYPE) + BUCKETSIZE * h->valuesize));
+		top = hash >> (sizeof(uintptr)*8 - 8);
+		if(top == 0)
+			top = 1;
+		do {
+			for(i = 0, k = (KEYTYPE*)b->data, v = (byte*)(k + BUCKETSIZE); i < BUCKETSIZE; i++, k++, v += h->valuesize) {
+				if(b->tophash[i] == top && EQFUNC(key, *k)) {
+					value = v;
+					FLUSH(&value);
+					return;
+				}
+			}
+			b = b->overflow;
+		} while(b != nil);
+	}
+	value = empty_value;
+	FLUSH(&value);
+}
+
+#pragma textflag 7
+void
+HASH_LOOKUP2(MapType *t, Hmap *h, KEYTYPE key, byte *value, bool res)
+{
+	uintptr hash;
+	uintptr bucket;
+	Bucket *b;
+	uint8 top;
+	uintptr i;
+	KEYTYPE *k;
+	byte *v;
+
+	if(debug) {
+		runtime·prints("runtime.mapaccess2_fastXXX: map=");
+		runtime·printpointer(h);
+		runtime·prints("; key=");
+		t->key->alg->print(t->key->size, &key);
+		runtime·prints("\n");
+	}
+	if(h == nil || h->count == 0) {
+		value = empty_value;
+		res = false;
+		FLUSH(&value);
+		FLUSH(&res);
+		return;
+	}
+	if(raceenabled)
+		runtime·racereadpc(h, runtime·getcallerpc(&t), HASH_LOOKUP2);
+	if(docheck)
+		check(t, h);
+
+	if(h->B == 0 && (h->count == 1 || QUICKEQ(key))) {
+		// One-bucket table.  Don't hash, just check each bucket entry.
+		b = (Bucket*)h->buckets;
+		for(i = 0, k = (KEYTYPE*)b->data, v = (byte*)(k + BUCKETSIZE); i < BUCKETSIZE; i++, k++, v += h->valuesize) {
+			if(b->tophash[i] != 0 && EQFUNC(key, *k)) {
+				value = v;
+				res = true;
+				FLUSH(&value);
+				FLUSH(&res);
+				return;
+			}
+		}
+	} else {
+		hash = h->hash0;
+		HASHFUNC(&hash, sizeof(KEYTYPE), &key);
+		bucket = hash & (((uintptr)1 << h->B) - 1);
+		if(h->oldbuckets != nil)
+			grow_work(t, h, bucket);
+		b = (Bucket*)(h->buckets + bucket * (offsetof(Bucket, data[0]) + BUCKETSIZE * sizeof(KEYTYPE) + BUCKETSIZE * h->valuesize));
+		top = hash >> (sizeof(uintptr)*8 - 8);
+		if(top == 0)
+			top = 1;
+		do {
+			for(i = 0, k = (KEYTYPE*)b->data, v = (byte*)(k + BUCKETSIZE); i < BUCKETSIZE; i++, k++, v += h->valuesize) {
+				if(b->tophash[i] == top && EQFUNC(key, *k)) {
+					value = v;
+					res = true;
+					FLUSH(&value);
+					FLUSH(&res);
+					return;
+				}
+			}
+			b = b->overflow;
+		} while(b != nil);
+	}
+	value = empty_value;
+	res = false;
+	FLUSH(&value);
+	FLUSH(&res);
+}
diff --git a/src/pkg/runtime/map_test.go b/src/pkg/runtime/map_test.go
new file mode 100644
index 0000000000..29e19db2c6
--- /dev/null
+++ b/src/pkg/runtime/map_test.go
@@ -0,0 +1,282 @@
+// Copyright 2013 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package runtime_test
+
+import (
+	"fmt"
+	"math"
+	"runtime"
+	"sort"
+	"testing"
+)
+
+// negative zero is a good test because:
+//  1) 0 and -0 are equal, yet have distinct representations.
+//  2) 0 is represented as all zeros, -0 isn't.
+// I'm not sure the language spec actually requires this behavior,
+// but it's what the current map implementation does.
+func TestNegativeZero(t *testing.T) {
+	m := make(map[float64]bool, 0)
+
+	m[+0.0] = true
+	m[math.Copysign(0.0, -1.0)] = true // should overwrite +0 entry
+
+	if len(m) != 1 {
+		t.Error("length wrong")
+	}
+
+	for k, _ := range m {
+		if math.Copysign(1.0, k) > 0 {
+			t.Error("wrong sign")
+		}
+	}
+
+	m = make(map[float64]bool, 0)
+	m[math.Copysign(0.0, -1.0)] = true
+	m[+0.0] = true // should overwrite -0.0 entry
+
+	if len(m) != 1 {
+		t.Error("length wrong")
+	}
+
+	for k, _ := range m {
+		if math.Copysign(1.0, k) < 0 {
+			t.Error("wrong sign")
+		}
+	}
+}
+
+// nan is a good test because nan != nan, and nan has
+// a randomized hash value.
+func TestNan(t *testing.T) {
+	m := make(map[float64]int, 0)
+	nan := math.NaN()
+	m[nan] = 1
+	m[nan] = 2
+	m[nan] = 4
+	if len(m) != 3 {
+		t.Error("length wrong")
+	}
+	s := 0
+	for k, v := range m {
+		if k == k {
+			t.Error("nan disappeared")
+		}
+		if (v & (v - 1)) != 0 {
+			t.Error("value wrong")
+		}
+		s |= v
+	}
+	if s != 7 {
+		t.Error("values wrong")
+	}
+}
+
+// Maps aren't actually copied on assignment.
+func TestAlias(t *testing.T) {
+	m := make(map[int]int, 0)
+	m[0] = 5
+	n := m
+	n[0] = 6
+	if m[0] != 6 {
+		t.Error("alias didn't work")
+	}
+}
+
+func TestGrowWithNaN(t *testing.T) {
+	m := make(map[float64]int, 4)
+	nan := math.NaN()
+	m[nan] = 1
+	m[nan] = 2
+	m[nan] = 4
+	cnt := 0
+	s := 0
+	growflag := true
+	for k, v := range m {
+		if growflag {
+			// force a hashtable resize
+			for i := 0; i < 100; i++ {
+				m[float64(i)] = i
+			}
+			growflag = false
+		}
+		if k != k {
+			cnt++
+			s |= v
+		}
+	}
+	if cnt != 3 {
+		t.Error("NaN keys lost during grow")
+	}
+	if s != 7 {
+		t.Error("NaN values lost during grow")
+	}
+}
+
+type FloatInt struct {
+	x float64
+	y int
+}
+
+func TestGrowWithNegativeZero(t *testing.T) {
+	negzero := math.Copysign(0.0, -1.0)
+	m := make(map[FloatInt]int, 4)
+	m[FloatInt{0.0, 0}] = 1
+	m[FloatInt{0.0, 1}] = 2
+	m[FloatInt{0.0, 2}] = 4
+	m[FloatInt{0.0, 3}] = 8
+	growflag := true
+	s := 0
+	cnt := 0
+	negcnt := 0
+	// The first iteration should return the +0 key.
+	// The subsequent iterations should return the -0 key.
+	// I'm not really sure this is required by the spec,
+	// but it makes sense.
+	// TODO: are we allowed to get the first entry returned again???
+	for k, v := range m {
+		if v == 0 {
+			continue
+		} // ignore entries added to grow table
+		cnt++
+		if math.Copysign(1.0, k.x) < 0 {
+			if v&16 == 0 {
+				t.Error("key/value not updated together 1")
+			}
+			negcnt++
+			s |= v & 15
+		} else {
+			if v&16 == 16 {
+				t.Error("key/value not updated together 2", k, v)
+			}
+			s |= v
+		}
+		if growflag {
+			// force a hashtable resize
+			for i := 0; i < 100; i++ {
+				m[FloatInt{3.0, i}] = 0
+			}
+			// then change all the entries
+			// to negative zero
+			m[FloatInt{negzero, 0}] = 1 | 16
+			m[FloatInt{negzero, 1}] = 2 | 16
+			m[FloatInt{negzero, 2}] = 4 | 16
+			m[FloatInt{negzero, 3}] = 8 | 16
+			growflag = false
+		}
+	}
+	if s != 15 {
+		t.Error("entry missing", s)
+	}
+	if cnt != 4 {
+		t.Error("wrong number of entries returned by iterator", cnt)
+	}
+	if negcnt != 3 {
+		t.Error("update to negzero missed by iteration", negcnt)
+	}
+}
+
+func TestIterGrowAndDelete(t *testing.T) {
+	m := make(map[int]int, 4)
+	for i := 0; i < 100; i++ {
+		m[i] = i
+	}
+	growflag := true
+	for k := range m {
+		if growflag {
+			// grow the table
+			for i := 100; i < 1000; i++ {
+				m[i] = i
+			}
+			// delete all odd keys
+			for i := 1; i < 1000; i += 2 {
+				delete(m, i)
+			}
+			growflag = false
+		} else {
+			if k&1 == 1 {
+				t.Error("odd value returned")
+			}
+		}
+	}
+}
+
+// make sure old bucket arrays don't get GCd while
+// an iterator is still using them.
+func TestIterGrowWithGC(t *testing.T) {
+	m := make(map[int]int, 4)
+	for i := 0; i < 16; i++ {
+		m[i] = i
+	}
+	growflag := true
+	bitmask := 0
+	for k := range m {
+		if k < 16 {
+			bitmask |= 1 << uint(k)
+		}
+		if growflag {
+			// grow the table
+			for i := 100; i < 1000; i++ {
+				m[i] = i
+			}
+			// trigger a gc
+			runtime.GC()
+			growflag = false
+		}
+	}
+	if bitmask != 1<<16-1 {
+		t.Error("missing key", bitmask)
+	}
+}
+
+func TestBigItems(t *testing.T) {
+	var key [256]string
+	for i := 0; i < 256; i++ {
+		key[i] = "foo"
+	}
+	m := make(map[[256]string][256]string, 4)
+	for i := 0; i < 100; i++ {
+		key[37] = fmt.Sprintf("string%02d", i)
+		m[key] = key
+	}
+	var keys [100]string
+	var values [100]string
+	i := 0
+	for k, v := range m {
+		keys[i] = k[37]
+		values[i] = v[37]
+		i++
+	}
+	sort.Strings(keys[:])
+	sort.Strings(values[:])
+	for i := 0; i < 100; i++ {
+		if keys[i] != fmt.Sprintf("string%02d", i) {
+			t.Errorf("#%d: missing key: %v", i, keys[i])
+		}
+		if values[i] != fmt.Sprintf("string%02d", i) {
+			t.Errorf("#%d: missing value: %v", i, values[i])
+		}
+	}
+}
+
+type empty struct {
+}
+
+func TestEmptyKeyAndValue(t *testing.T) {
+	a := make(map[int]empty, 4)
+	b := make(map[empty]int, 4)
+	c := make(map[empty]empty, 4)
+	a[0] = empty{}
+	b[empty{}] = 0
+	b[empty{}] = 1
+	c[empty{}] = empty{}
+
+	if len(a) != 1 {
+		t.Errorf("empty value insert problem")
+	}
+	if b[empty{}] != 1 {
+		t.Errorf("empty key returned wrong value")
+	}
+}
diff --git a/src/pkg/runtime/mapspeed_test.go b/src/pkg/runtime/mapspeed_test.go
index c6a83113a6..a379740606 100644
--- a/src/pkg/runtime/mapspeed_test.go
+++ b/src/pkg/runtime/mapspeed_test.go
@@ -5,6 +5,7 @@ package runtime_test
 
 import (
 	"fmt"
+	"strings"
 	"testing"
 )
 
@@ -94,3 +95,56 @@ func BenchmarkHashStringArraySpeed(b *testing.B) {
 		}
 	}
 }
+
+func BenchmarkMegMap(b *testing.B) {
+	m := make(map[string]bool)
+	for suffix := 'A'; suffix <= 'G'; suffix++ {
+		m[strings.Repeat("X", 1<<20-1)+fmt.Sprint(suffix)] = true
+	}
+	key := strings.Repeat("X", 1<<20-1) + "k"
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		_, _ = m[key]
+	}
+}
+
+func BenchmarkMegOneMap(b *testing.B) {
+	m := make(map[string]bool)
+	m[strings.Repeat("X", 1<<20)] = true
+	key := strings.Repeat("Y", 1<<20)
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		_, _ = m[key]
+	}
+}
+
+func BenchmarkMegEmptyMap(b *testing.B) {
+	m := make(map[string]bool)
+	key := strings.Repeat("X", 1<<20)
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		_, _ = m[key]
+	}
+}
+
+func BenchmarkSmallStrMap(b *testing.B) {
+	m := make(map[string]bool)
+	for suffix := 'A'; suffix <= 'G'; suffix++ {
+		m[fmt.Sprint(suffix)] = true
+	}
+	key := "k"
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		_, _ = m[key]
+	}
+}
+func BenchmarkIntMap(b *testing.B) {
+	m := make(map[int]bool)
+	for i := 0; i < 8; i++ {
+		m[i] = true
+	}
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		_, _ = m[7]
+	}
+}
diff --git a/src/pkg/runtime/runtime.c b/src/pkg/runtime/runtime.c
index ef39a2d55f..d62408118b 100644
--- a/src/pkg/runtime/runtime.c
+++ b/src/pkg/runtime/runtime.c
@@ -42,9 +42,9 @@ runtime·gotraceback(bool *crash)
 }
 
 int32
-runtime·mcmp(byte *s1, byte *s2, uint32 n)
+runtime·mcmp(byte *s1, byte *s2, uintptr n)
 {
-	uint32 i;
+	uintptr i;
 	byte c1, c2;
 
 	for(i=0; i<n; i++) {
diff --git a/src/pkg/runtime/runtime.h b/src/pkg/runtime/runtime.h
index 9828a9c558..d209a4dfca 100644
--- a/src/pkg/runtime/runtime.h
+++ b/src/pkg/runtime/runtime.h
@@ -687,7 +687,7 @@ void	runtime·panicstring(int8*);
 void	runtime·prints(int8*);
 void	runtime·printf(int8*, ...);
 byte*	runtime·mchr(byte*, byte, byte*);
-int32	runtime·mcmp(byte*, byte*, uint32);
+int32	runtime·mcmp(byte*, byte*, uintptr);
 void	runtime·memmove(void*, void*, uintptr);
 void*	runtime·mal(uintptr);
 String	runtime·catstring(String, String);
@@ -962,7 +962,6 @@ void	runtime·mapassign(MapType*, Hmap*, byte*, byte*);
 void	runtime·mapaccess(MapType*, Hmap*, byte*, byte*, bool*);
 void	runtime·mapiternext(struct hash_iter*);
 bool	runtime·mapiterkey(struct hash_iter*, void*);
-void	runtime·mapiterkeyvalue(struct hash_iter*, void*, void*);
 Hmap*	runtime·makemap_c(MapType*, int64);
 
 Hchan*	runtime·makechan_c(ChanType*, int64);
author	Keith Randall <khr@golang.org>	2013-03-20 13:51:29 -0700
committer	Keith Randall <khr@golang.org>	2013-03-20 13:51:29 -0700
commit	00224a356a5be3c134230bfa8fe11e0af2977aaf (patch)
tree	215e0ad8bd4fe89b437bf1ef572a0c4f94af489d /src/pkg
parent	2001f0c28ef4f2b7b907d060901a6fad2f1e9eb0 (diff)
download	go-00224a356a5be3c134230bfa8fe11e0af2977aaf.tar.xz