runtime: do hashmap grow work during reads.

Before this change, grow work was done only during map writes to ensure multithreaded safety. This can lead to maps remaining in a partially grown state for a long time, potentially forever. This change allows grow work to happen during reads, which will lead to grow work finishing sooner, making the resulting map smaller and faster. Grow work is not done in parallel. Reads can happen in parallel while grow work is happening. R=golang-dev, dvyukov, khr, iant CC=golang-dev https://golang.org/cl/8852047
2013-05-31 20:58:31 -07:00 · 2013-05-31 20:58:31 -07:00 · 07b6add0ca
parent 71f061043d
commit 07b6add0ca
2 changed files with 163 additions and 142 deletions
--- a/src/pkg/runtime/hashmap.c
+++ b/src/pkg/runtime/hashmap.c
@ -107,6 +107,9 @@ struct Hmap
 	uintptr nevacuate;    // progress counter for evacuation (buckets less than this have been evacuated)
 };

+// token to store in nevacuate field when locking the table to evacuate a bucket.
+#define EVAC_LOCK ((uintptr)-1)
+
 // possible flags
 enum
 {
@ -114,7 +117,7 @@ enum
 	IndirectValue = 2,  // storing pointers to values
 	Iterator = 4,       // there may be an iterator using buckets
 	OldIterator = 8,    // there may be an iterator using oldbuckets
-	CanFreeBucket = 16, // ok to free buckets
+	CanFreeBucket = 16, // ok to free buckets TODO: remove - unused
 	CanFreeKey = 32,    // keys are indirect and ok to free keys
 };

@ -285,12 +288,14 @@ hash_init(MapType *t, Hmap *h, uint32 hint)

 // Moves entries in oldbuckets[i] to buckets[i] and buckets[i+2^k].
 // We leave the original bucket intact, except for the evacuated marks, so that
-// iterators can still iterate through the old buckets.
+// lookup and iterators can still iterate through the old buckets.
+// Multiple threads must not be evacuating the same bucket at the same time.
 static void
 evacuate(MapType *t, Hmap *h, uintptr oldbucket)
 {
 	Bucket *b;
 	Bucket *nextb;
+	Bucket *mainb;
 	Bucket *x, *y;
 	Bucket *newx, *newy;
 	uintptr xi, yi;
@ -299,143 +304,154 @@ evacuate(MapType *t, Hmap *h, uintptr oldbucket)
 	uintptr i;
 	byte *k, *v;
 	byte *xk, *yk, *xv, *yv;
-	byte *ob;

-	b = (Bucket*)(h->oldbuckets + oldbucket * h->bucketsize);
+	mainb = (Bucket*)(h->oldbuckets + oldbucket * h->bucketsize);
 	newbit = (uintptr)1 << (h->B - 1);

-	if(!evacuated(b)) {
-		// TODO: reuse overflow buckets instead of using new ones, if there
-		// is no iterator using the old buckets.  (If CanFreeBuckets and !OldIterator.)
+	if(evacuated(mainb))		// someone else already evacuated this bucket.
+		return;

-		x = (Bucket*)(h->buckets + oldbucket * h->bucketsize);
-		y = (Bucket*)(h->buckets + (oldbucket + newbit) * h->bucketsize);
-		clearbucket(x);
-		clearbucket(y);
-		xi = 0;
-		yi = 0;
-		xk = x->data;
-		yk = y->data;
-		xv = xk + h->keysize * BUCKETSIZE;
-		yv = yk + h->keysize * BUCKETSIZE;
-		do {
-			for(i = 0, k = b->data, v = k + h->keysize * BUCKETSIZE; i < BUCKETSIZE; i++, k += h->keysize, v += h->valuesize) {
-				if(b->tophash[i] == 0)
-					continue;
-				hash = h->hash0;
-				t->key->alg->hash(&hash, t->key->size, IK(h, k));
-				// NOTE: if key != key, then this hash could be (and probably will be)
-				// entirely different from the old hash.  We effectively only update
-				// the B'th bit of the hash in this case.
-				if((hash & newbit) == 0) {
-					if(xi == BUCKETSIZE) {
-						if(checkgc) mstats.next_gc = mstats.heap_alloc;
-						newx = runtime·mallocgc(h->bucketsize, 0, 1, 0);
-						clearbucket(newx);
-						x->overflow = newx;
-						x = newx;
-						xi = 0;
-						xk = x->data;
-						xv = xk + h->keysize * BUCKETSIZE;
-					}
-					x->tophash[xi] = b->tophash[i];
-					if((h->flags & IndirectKey) != 0) {
-						*(byte**)xk = *(byte**)k;               // copy pointer
-					} else {
-						t->key->alg->copy(t->key->size, xk, k); // copy value
-					}
-					if((h->flags & IndirectValue) != 0) {
-						*(byte**)xv = *(byte**)v;
-					} else {
-						t->elem->alg->copy(t->elem->size, xv, v);
-					}
-					xi++;
-					xk += h->keysize;
-					xv += h->valuesize;
+	b = mainb;
+	x = (Bucket*)(h->buckets + oldbucket * h->bucketsize);
+	y = (Bucket*)(h->buckets + (oldbucket + newbit) * h->bucketsize);
+	clearbucket(x);
+	clearbucket(y);
+	xi = 0;
+	yi = 0;
+	xk = x->data;
+	yk = y->data;
+	xv = xk + h->keysize * BUCKETSIZE;
+	yv = yk + h->keysize * BUCKETSIZE;
+	do {
+		for(i = 0, k = b->data, v = k + h->keysize * BUCKETSIZE; i < BUCKETSIZE; i++, k += h->keysize, v += h->valuesize) {
+			if(b->tophash[i] == 0)
+				continue;
+			hash = h->hash0;
+			t->key->alg->hash(&hash, t->key->size, IK(h, k));
+			// NOTE: if key != key, then this hash could be (and probably will be)
+			// entirely different from the old hash.  We effectively only update
+			// the B'th bit of the hash in this case.
+			if((hash & newbit) == 0) {
+				if(xi == BUCKETSIZE) {
+					if(checkgc) mstats.next_gc = mstats.heap_alloc;
+					newx = runtime·mallocgc(h->bucketsize, 0, 1, 0);
+					clearbucket(newx);
+					x->overflow = newx;
+					x = newx;
+					xi = 0;
+					xk = x->data;
+					xv = xk + h->keysize * BUCKETSIZE;
+				}
+				x->tophash[xi] = b->tophash[i];
+				if((h->flags & IndirectKey) != 0) {
+					*(byte**)xk = *(byte**)k;               // copy pointer
 				} else {
-					if(yi == BUCKETSIZE) {
-						if(checkgc) mstats.next_gc = mstats.heap_alloc;
-						newy = runtime·mallocgc(h->bucketsize, 0, 1, 0);
-						clearbucket(newy);
-						y->overflow = newy;
-						y = newy;
-						yi = 0;
-						yk = y->data;
-						yv = yk + h->keysize * BUCKETSIZE;
-					}
-					y->tophash[yi] = b->tophash[i];
-					if((h->flags & IndirectKey) != 0) {
-						*(byte**)yk = *(byte**)k;
-					} else {
-						t->key->alg->copy(t->key->size, yk, k);
-					}
-					if((h->flags & IndirectValue) != 0) {
-						*(byte**)yv = *(byte**)v;
-					} else {
-						t->elem->alg->copy(t->elem->size, yv, v);
-					}
-					yi++;
-					yk += h->keysize;
-					yv += h->valuesize;
+					t->key->alg->copy(t->key->size, xk, k); // copy value
 				}
-			}
-
-			// mark as evacuated so we don't do it again.
-			// this also tells any iterators that this data isn't golden anymore.
-			nextb = b->overflow;
-			b->overflow = (Bucket*)((uintptr)nextb + 1);
-
-			b = nextb;
-		} while(b != nil);
-
-		// Free old overflow buckets as much as we can.
-		if((h->flags & OldIterator) == 0) {
-			b = (Bucket*)(h->oldbuckets + oldbucket * h->bucketsize);
-			if((h->flags & CanFreeBucket) != 0) {
-				while((nextb = overflowptr(b)) != nil) {
-					b->overflow = nextb->overflow;
-					runtime·free(nextb);
+				if((h->flags & IndirectValue) != 0) {
+					*(byte**)xv = *(byte**)v;
+				} else {
+					t->elem->alg->copy(t->elem->size, xv, v);
 				}
+				xi++;
+				xk += h->keysize;
+				xv += h->valuesize;
 			} else {
-				// can't explicitly free overflow buckets, but at least
-				// we can unlink them.
-				b->overflow = (Bucket*)1;
+				if(yi == BUCKETSIZE) {
+					if(checkgc) mstats.next_gc = mstats.heap_alloc;
+					newy = runtime·mallocgc(h->bucketsize, 0, 1, 0);
+					clearbucket(newy);
+					y->overflow = newy;
+					y = newy;
+					yi = 0;
+					yk = y->data;
+					yv = yk + h->keysize * BUCKETSIZE;
+				}
+				y->tophash[yi] = b->tophash[i];
+				if((h->flags & IndirectKey) != 0) {
+					*(byte**)yk = *(byte**)k;
+				} else {
+					t->key->alg->copy(t->key->size, yk, k);
+				}
+				if((h->flags & IndirectValue) != 0) {
+					*(byte**)yv = *(byte**)v;
+				} else {
+					t->elem->alg->copy(t->elem->size, yv, v);
+				}
+				yi++;
+				yk += h->keysize;
+				yv += h->valuesize;
 			}
 		}
+		b = b->overflow;
+	} while(b != nil);
+
+	// Mark main bucket as evacuated.  This write commits the
+	// bucket evacuation (readers can start using the new buckets).
+	b = mainb->overflow;
+	runtime·atomicstorep(&mainb->overflow, (Bucket*)((uintptr)b + 1));
+
+	// Mark overflow buckets for any iterators.
+	// These writes don't need to reach anyone until the next hashtable
+	// modification, so they don't need to be synchronized.
+	while(b != nil) {
+		nextb = b->overflow;
+		b->overflow = (Bucket*)((uintptr)nextb + 1);
+		b = nextb;
 	}

-	// advance evacuation mark
-	if(oldbucket == h->nevacuate) {
-		h->nevacuate = oldbucket + 1;
-		if(oldbucket + 1 == newbit) { // newbit == # of oldbuckets
-			// free main bucket array
-			if((h->flags & (OldIterator | CanFreeBucket)) == CanFreeBucket) {
-				ob = h->oldbuckets;
-				h->oldbuckets = nil;
-				runtime·free(ob);
-			} else {
-				h->oldbuckets = nil;
-			}
-		}
-	}
 	if(docheck)
 		check(t, h);
 }

+// Ensure that bucket has been evacuated from oldbuckets so that we can modify it.
+// Not multithreaded safe - you must not call this from anywhere except hash table
+// modifications (where we're guaranteed external synchronization).
 static void
 grow_work(MapType *t, Hmap *h, uintptr bucket)
 {
 	uintptr noldbuckets;
+	intptr n;
+
+	// evac the bucket we're going to need
+	noldbuckets = (uintptr)1 << (h->B - 1);
+	evacuate(t, h, bucket & (noldbuckets - 1));
+	// evac another bucket to make progress
+	n = h->nevacuate;
+	evacuate(t, h, n);
+	// record what we've done
+	h->nevacuate = n + 1;
+	if(n + 1 == noldbuckets)
+		h->oldbuckets = nil;
+}
+
+// Do some work for growing the table.
+// Multithreaded-safe.
+static void
+grow_work_read(MapType *t, Hmap *h) {
+	uintptr noldbuckets;
+	intptr n;

 	noldbuckets = (uintptr)1 << (h->B - 1);

-	// make sure we evacuate the oldbucket corresponding
-	// to the bucket we're about to use
-	evacuate(t, h, bucket & (noldbuckets - 1));
+	// Get evacuation lock.  If we can't get it, fine, that means
+	// someone else is making progress which is good enough.
+	n = h->nevacuate;
+	if(n != EVAC_LOCK &&	// no one has evac lock
+	   n != noldbuckets &&	// there's still work to do
+	   runtime·casp((void**)&h->nevacuate, (void*)n, (void*)EVAC_LOCK)) { // we acquired lock
+		// We're now the exclusive evacuator.
+		evacuate(t, h, n);

-	// evacuate one more oldbucket to make progress on growing
-	if(h->oldbuckets != nil)
-		evacuate(t, h, h->nevacuate);
+		// record that we're done.
+		runtime·atomicstorep((void**)&h->nevacuate, (void*)(n + 1));
+		if(n + 1 == noldbuckets) {
+			// commit finishing of grow.
+			runtime·atomicstorep(&h->oldbuckets, nil);
+			// note: can't free oldbuckets, someone might be using it.
+			// it will have to get GCed.
+		}
+	}
 }

 static void
@ -480,7 +496,7 @@ hash_lookup(MapType *t, Hmap *h, byte **keyp)
 {
 	void *key;
 	uintptr hash;
-	uintptr bucket, oldbucket;
+	uintptr bucket;
 	Bucket *b;
 	uint8 top;
 	uintptr i;
@ -495,13 +511,14 @@ hash_lookup(MapType *t, Hmap *h, byte **keyp)
 	hash = h->hash0;
 	t->key->alg->hash(&hash, t->key->size, key);
 	bucket = hash & (((uintptr)1 << h->B) - 1);
-	if(h->oldbuckets != nil) {
-		oldbucket = bucket & (((uintptr)1 << (h->B - 1)) - 1);
-		b = (Bucket*)(h->oldbuckets + oldbucket * h->bucketsize);
-		if(evacuated(b)) {
-			b = (Bucket*)(h->buckets + bucket * h->bucketsize);
-		}
+	b = runtime·atomicloadp(&h->oldbuckets);
+	if(b != nil) {
+		grow_work_read(t, h);
+		b = (Bucket*)((byte*)b + (bucket & (((uintptr)1 << (h->B - 1)) - 1)) * h->bucketsize);
+		if(((uintptr)runtime·atomicloadp(&b->overflow) & 1) != 0)
+			goto newbucket;
 	} else {
+	newbucket:
 		b = (Bucket*)(h->buckets + bucket * h->bucketsize);
 	}
 	top = hash >> (sizeof(uintptr)*8 - 8);
@ -518,7 +535,7 @@ hash_lookup(MapType *t, Hmap *h, byte **keyp)
 				}
 			}
 		}
-		b = b->overflow;
+		b = overflowptr(b);
 	} while(b != nil);
 	return nil;
 }
@ -812,6 +829,7 @@ hash_next(struct hash_iter *it)
 	uintptr bucket, oldbucket;
 	uintptr hash;
 	Bucket *b;
+	byte *oldbuckets;
 	uintptr i;
 	intptr check_bucket;
 	bool eq;
@ -833,14 +851,15 @@ next:
 			it->value = nil;
 			return;
 		}
-		if(h->oldbuckets != nil && it->B == h->B) {
+		if(it->B == h->B && (oldbuckets = runtime·atomicloadp(&h->oldbuckets)) != nil) {
 			// Iterator was started in the middle of a grow, and the grow isn't done yet.
 			// If the bucket we're looking at hasn't been filled in yet (i.e. the old
 			// bucket hasn't been evacuated) then we need to iterate through the old
 			// bucket and only return the ones that will be migrated to this bucket.
+			grow_work_read(t, h);
 			oldbucket = bucket & (((uintptr)1 << (it->B - 1)) - 1);
-			b = (Bucket*)(h->oldbuckets + oldbucket * h->bucketsize);
-			if(!evacuated(b)) {
+			b = (Bucket*)(oldbuckets + oldbucket * h->bucketsize);
+			if(((uintptr)runtime·atomicloadp(&b->overflow) & 1) == 0) {
 				check_bucket = bucket;
 			} else {
 				b = (Bucket*)(it->buckets + bucket * h->bucketsize);
--- a/src/pkg/runtime/hashmap_fast.c
+++ b/src/pkg/runtime/hashmap_fast.c
@ -17,7 +17,7 @@ void
 HASH_LOOKUP1(MapType *t, Hmap *h, KEYTYPE key, byte *value)
 {
 	uintptr hash;
-	uintptr bucket, oldbucket;
+	uintptr bucket;
 	Bucket *b;
 	uintptr i;
 	KEYTYPE *k;
@ -83,13 +83,14 @@ dohash:
 		hash = h->hash0;
 		HASHFUNC(&hash, sizeof(KEYTYPE), &key);
 		bucket = hash & (((uintptr)1 << h->B) - 1);
-		if(h->oldbuckets != nil) {
-			oldbucket = bucket & (((uintptr)1 << (h->B - 1)) - 1);
-			b = (Bucket*)(h->oldbuckets + oldbucket * h->bucketsize);
-			if(evacuated(b)) {
-				b = (Bucket*)(h->buckets + bucket * h->bucketsize);
-			}
+		b = runtime·atomicloadp(&h->oldbuckets);
+		if(b != nil) {
+			grow_work_read(t, h);
+			b = (Bucket*)((byte*)b + (bucket & (((uintptr)1 << (h->B - 1)) - 1)) * h->bucketsize);
+			if(((uintptr)runtime·atomicloadp(&b->overflow) & 1) != 0)
+				goto newbucket;
 		} else {
+		newbucket:
 			b = (Bucket*)(h->buckets + bucket * h->bucketsize);
 		}
 		top = hash >> (sizeof(uintptr)*8 - 8);
@ -103,7 +104,7 @@ dohash:
 					return;
 				}
 			}
-			b = b->overflow;
+			b = overflowptr(b);
 		} while(b != nil);
 	}
 	value = empty_value;
@ -115,7 +116,7 @@ void
 HASH_LOOKUP2(MapType *t, Hmap *h, KEYTYPE key, byte *value, bool res)
 {
 	uintptr hash;
-	uintptr bucket, oldbucket;
+	uintptr bucket;
 	Bucket *b;
 	uintptr i;
 	KEYTYPE *k;
@ -187,13 +188,14 @@ dohash:
 		hash = h->hash0;
 		HASHFUNC(&hash, sizeof(KEYTYPE), &key);
 		bucket = hash & (((uintptr)1 << h->B) - 1);
-		if(h->oldbuckets != nil) {
-			oldbucket = bucket & (((uintptr)1 << (h->B - 1)) - 1);
-			b = (Bucket*)(h->oldbuckets + oldbucket * h->bucketsize);
-			if(evacuated(b)) {
-				b = (Bucket*)(h->buckets + bucket * h->bucketsize);
-			}
+		b = runtime·atomicloadp(&h->oldbuckets);
+		if(b != nil) {
+			grow_work_read(t, h);
+			b = (Bucket*)((byte*)b + (bucket & (((uintptr)1 << (h->B - 1)) - 1)) * h->bucketsize);
+			if(((uintptr)runtime·atomicloadp(&b->overflow) & 1) != 0)
+				goto newbucket;
 		} else {
+		newbucket:
 			b = (Bucket*)(h->buckets + bucket * h->bucketsize);
 		}
 		top = hash >> (sizeof(uintptr)*8 - 8);
@ -209,7 +211,7 @@ dohash:
 					return;
 				}
 			}
-			b = b->overflow;
+			b = overflowptr(b);
 		} while(b != nil);
 	}
 	value = empty_value;