runtime: on bigger maps, start iterator at a random bucket.

This change brings the iter/delete pattern down to O(n lgn) from O(n^2).

Fixes #8412.

before:
BenchmarkMapPop100	   50000	     32498 ns/op
BenchmarkMapPop1000	     500	   3244851 ns/op
BenchmarkMapPop10000	       5	 270276855 ns/op

after:
BenchmarkMapPop100	  100000	     16169 ns/op
BenchmarkMapPop1000	    5000	    300416 ns/op
BenchmarkMapPop10000	     300	   5990814 ns/op

LGTM=iant
R=golang-codereviews, iant, khr
CC=golang-codereviews
https://golang.org/cl/141270043
diff --git a/src/runtime/hashmap.go b/src/runtime/hashmap.go
index 55287f6..cbcc6c4 100644
--- a/src/runtime/hashmap.go
+++ b/src/runtime/hashmap.go
@@ -134,11 +134,12 @@
 	h           *hmap
 	buckets     unsafe.Pointer // bucket ptr at hash_iter initialization time
 	bptr        *bmap          // current bucket
+	startBucket uintptr        // bucket iteration started at
 	offset      uint8          // intra-bucket offset to start from during iteration (should be big enough to hold bucketCnt-1)
-	done        bool
+	wrapped     bool           // already wrapped around from end of bucket array to beginning
 	B           uint8
+	i	    uint8
 	bucket      uintptr
-	i           uintptr
 	checkBucket uintptr
 }
 
@@ -560,10 +561,22 @@
 	it.B = h.B
 	it.buckets = h.buckets
 
+	// decide where to start
+	switch {
+	case h.B == 0:
+		it.startBucket = 0
+		it.offset = uint8(fastrand1()) & (bucketCnt - 1)
+	case h.B <= 31:
+		it.startBucket = uintptr(fastrand1()) & (uintptr(1)<<h.B-1)
+		it.offset = 0
+	default:
+		it.startBucket = (uintptr(fastrand1()) + uintptr(fastrand1())<<31) & (uintptr(1)<<h.B-1)
+		it.offset = 0
+	}
+
 	// iterator state
-	it.bucket = 0
-	it.offset = uint8(fastrand1() & (bucketCnt - 1))
-	it.done = false
+	it.bucket = it.startBucket
+	it.wrapped = false
 	it.bptr = nil
 
 	// Remember we have an iterator.
@@ -596,7 +609,7 @@
 
 next:
 	if b == nil {
-		if it.done {
+		if bucket == it.startBucket && it.wrapped {
 			// end of iteration
 			it.key = nil
 			it.value = nil
@@ -622,14 +635,14 @@
 		bucket++
 		if bucket == uintptr(1)<<it.B {
 			bucket = 0
-			it.done = true
+			it.wrapped = true
 		}
 		i = 0
 	}
 	for ; i < bucketCnt; i++ {
-		offi := (i + uintptr(it.offset)) & (bucketCnt - 1)
-		k := add(unsafe.Pointer(b), dataOffset+offi*uintptr(t.keysize))
-		v := add(unsafe.Pointer(b), dataOffset+bucketCnt*uintptr(t.keysize)+offi*uintptr(t.valuesize))
+		offi := (i + it.offset) & (bucketCnt - 1)
+		k := add(unsafe.Pointer(b), dataOffset+uintptr(offi)*uintptr(t.keysize))
+		v := add(unsafe.Pointer(b), dataOffset+bucketCnt*uintptr(t.keysize)+uintptr(offi)*uintptr(t.valuesize))
 		if b.tophash[offi] != empty && b.tophash[offi] != evacuatedEmpty {
 			if checkBucket != noCheck {
 				// Special case: iterator was started during a grow and the