cmd/gc: allocate non-escaping maps on stack

Extend escape analysis to make(map[k]v).
If it does not escape, allocate temp buffer for hmap and one bucket on stack.

There are 75 cases of non-escaping maps in std lib.

benchmark                                    old allocs     new allocs     delta
BenchmarkConcurrentStmtQuery                 16161          15161          -6.19%
BenchmarkConcurrentTxQuery                   17658          16658          -5.66%
BenchmarkConcurrentTxStmtQuery               16157          15156          -6.20%
BenchmarkConcurrentRandom                    13637          13114          -3.84%
BenchmarkManyConcurrentQueries               22             20             -9.09%
BenchmarkDecodeComplex128Slice               250            188            -24.80%
BenchmarkDecodeFloat64Slice                  250            188            -24.80%
BenchmarkDecodeInt32Slice                    250            188            -24.80%
BenchmarkDecodeStringSlice                   2250           2188           -2.76%
BenchmarkNewEmptyMap                         1              0              -100.00%
BenchmarkNewSmallMap                         2              0              -100.00%

benchmark                old ns/op     new ns/op     delta
BenchmarkNewEmptyMap     124           55.7          -55.08%
BenchmarkNewSmallMap     317           148           -53.31%

benchmark                old allocs     new allocs     delta
BenchmarkNewEmptyMap     1              0              -100.00%
BenchmarkNewSmallMap     2              0              -100.00%

benchmark                old bytes     new bytes     delta
BenchmarkNewEmptyMap     48            0             -100.00%
BenchmarkNewSmallMap     192           0             -100.00%

Fixes #5449

Change-Id: I24fa66f949d2f138885d9e66a0d160240dc9e8fa
Reviewed-on: https://go-review.googlesource.com/3508
Reviewed-by: Keith Randall <khr@golang.org>
Run-TryBot: Dmitry Vyukov <dvyukov@google.com>
diff --git a/src/cmd/gc/builtin.c b/src/cmd/gc/builtin.c
index cdcc8e7..d381566 100644
--- a/src/cmd/gc/builtin.c
+++ b/src/cmd/gc/builtin.c
@@ -65,7 +65,7 @@
 	"func @\"\".efaceeq (@\"\".i1·2 any, @\"\".i2·3 any) (@\"\".ret·1 bool)\n"
 	"func @\"\".ifacethash (@\"\".i1·2 any) (@\"\".ret·1 uint32)\n"
 	"func @\"\".efacethash (@\"\".i1·2 any) (@\"\".ret·1 uint32)\n"
-	"func @\"\".makemap (@\"\".mapType·2 *byte, @\"\".hint·3 int64) (@\"\".hmap·1 map[any]any)\n"
+	"func @\"\".makemap (@\"\".mapType·2 *byte, @\"\".hint·3 int64, @\"\".mapbuf·4 *any, @\"\".bucketbuf·5 *any) (@\"\".hmap·1 map[any]any)\n"
 	"func @\"\".mapaccess1 (@\"\".mapType·2 *byte, @\"\".hmap·3 map[any]any, @\"\".key·4 *any) (@\"\".val·1 *any)\n"
 	"func @\"\".mapaccess1_fast32 (@\"\".mapType·2 *byte, @\"\".hmap·3 map[any]any, @\"\".key·4 any) (@\"\".val·1 *any)\n"
 	"func @\"\".mapaccess1_fast64 (@\"\".mapType·2 *byte, @\"\".hmap·3 map[any]any, @\"\".key·4 any) (@\"\".val·1 *any)\n"
diff --git a/src/cmd/gc/go.h b/src/cmd/gc/go.h
index 93eba2e..5be8ce5 100644
--- a/src/cmd/gc/go.h
+++ b/src/cmd/gc/go.h
@@ -1321,7 +1321,9 @@
 Sym*	tracksym(Type *t);
 Sym*	typesymprefix(char *prefix, Type *t);
 int	haspointers(Type *t);
+Type*	hmap(Type *t);
 Type*	hiter(Type* t);
+Type*	mapbucket(Type *t);
 
 /*
  *	select.c
diff --git a/src/cmd/gc/reflect.c b/src/cmd/gc/reflect.c
index ee00ff0..852485d 100644
--- a/src/cmd/gc/reflect.c
+++ b/src/cmd/gc/reflect.c
@@ -117,16 +117,29 @@
 };
 
 static Type*
+makefield(char *name, Type *t)
+{
+	Type *f;
+
+	f = typ(TFIELD);
+	f->type = t;
+	f->sym = mal(sizeof(Sym));
+	f->sym->name = name;
+	return f;
+}
+
+Type*
 mapbucket(Type *t)
 {
 	Type *keytype, *valtype;
-	Type *bucket;
-	Type *overflowfield, *keysfield, *valuesfield;
-	int32 offset;
+	Type *bucket, *arr;
+	Type *field[4];
+	int32 n;
 
 	if(t->bucket != T)
 		return t->bucket;
 
+	bucket = typ(TSTRUCT);
 	keytype = t->down;
 	valtype = t->type;
 	dowidth(keytype);
@@ -136,119 +149,69 @@
 	if(valtype->width > MAXVALSIZE)
 		valtype = ptrto(valtype);
 
-	bucket = typ(TSTRUCT);
-	bucket->noalg = 1;
-
 	// The first field is: uint8 topbits[BUCKETSIZE].
-	// We don't need to encode it as GC doesn't care about it.
-	offset = BUCKETSIZE * 1;
-
-	keysfield = typ(TFIELD);
-	keysfield->type = typ(TARRAY);
-	keysfield->type->type = keytype;
-	keysfield->type->bound = BUCKETSIZE;
-	keysfield->type->width = BUCKETSIZE * keytype->width;
-	keysfield->width = offset;
-	keysfield->sym = mal(sizeof(Sym));
-	keysfield->sym->name = "keys";
-	offset += BUCKETSIZE * keytype->width;
-
-	valuesfield = typ(TFIELD);
-	valuesfield->type = typ(TARRAY);
-	valuesfield->type->type = valtype;
-	valuesfield->type->bound = BUCKETSIZE;
-	valuesfield->type->width = BUCKETSIZE * valtype->width;
-	valuesfield->width = offset;
-	valuesfield->sym = mal(sizeof(Sym));
-	valuesfield->sym->name = "values";
-	offset += BUCKETSIZE * valtype->width;
-
-	overflowfield = typ(TFIELD);
-	overflowfield->type = ptrto(bucket);
-	overflowfield->width = offset;         // "width" is offset in structure
-	overflowfield->sym = mal(sizeof(Sym)); // not important but needs to be set to give this type a name
-	overflowfield->sym->name = "overflow";
-	offset += widthptr;
-	
-	// Pad to the native integer alignment.
-	// This is usually the same as widthptr; the exception (as usual) is nacl/amd64.
-	if(widthreg > widthptr)
-		offset += widthreg - widthptr;
+	arr = typ(TARRAY);
+	arr->type = types[TUINT8];
+	arr->bound = BUCKETSIZE;
+	field[0] = makefield("topbits", arr);
+	arr = typ(TARRAY);
+	arr->type = keytype;
+	arr->bound = BUCKETSIZE;
+	field[1] = makefield("keys", arr);
+	arr = typ(TARRAY);
+	arr->type = valtype;
+	arr->bound = BUCKETSIZE;
+	field[2] = makefield("values", arr);
+	field[3] = makefield("overflow", ptrto(bucket));
 
 	// link up fields
-	bucket->type = keysfield;
-	keysfield->down = valuesfield;
-	valuesfield->down = overflowfield;
-	overflowfield->down = T;
+	bucket->noalg = 1;
+	bucket->local = t->local;
+	bucket->type = field[0];
+	for(n = 0; n < nelem(field)-1; n++)
+		field[n]->down = field[n+1];
+	field[nelem(field)-1]->down = T;
+	dowidth(bucket);
 
 	// See comment on hmap.overflow in ../../runtime/hashmap.go.
 	if(!haspointers(t->type) && !haspointers(t->down))
 		bucket->haspointers = 1;  // no pointers
 
-	bucket->width = offset;
-	bucket->local = t->local;
 	t->bucket = bucket;
 	bucket->map = t;
 	return bucket;
 }
 
-// Builds a type respresenting a Hmap structure for
-// the given map type.  This type is not visible to users -
-// we include only enough information to generate a correct GC
-// program for it.
+// Builds a type representing a Hmap structure for the given map type.
 // Make sure this stays in sync with ../../runtime/hashmap.go!
-static Type*
+Type*
 hmap(Type *t)
 {
 	Type *h, *bucket;
-	Type *bucketsfield, *oldbucketsfield, *overflowfield;
-	int32 offset;
+	Type *field[8];
+	int32 n;
 
 	if(t->hmap != T)
 		return t->hmap;
 
 	bucket = mapbucket(t);
+	field[0] = makefield("count", types[TINT]);
+	field[1] = makefield("flags", types[TUINT8]);
+	field[2] = makefield("B", types[TUINT8]);
+	field[3] = makefield("hash0", types[TUINT32]);
+	field[4] = makefield("buckets", ptrto(bucket));
+	field[5] = makefield("oldbuckets", ptrto(bucket));
+	field[6] = makefield("nevacuate", types[TUINTPTR]);
+	field[7] = makefield("overflow", types[TUNSAFEPTR]);
+
 	h = typ(TSTRUCT);
 	h->noalg = 1;
-
-	offset = widthint; // count
-	offset += 1;       // flags
-	offset += 1;       // B
-	offset += 2;       // padding
-	offset += 4;       // hash0
-	offset = (offset + widthptr - 1) / widthptr * widthptr;
-	
-	bucketsfield = typ(TFIELD);
-	bucketsfield->type = ptrto(bucket);
-	bucketsfield->width = offset;
-	bucketsfield->sym = mal(sizeof(Sym));
-	bucketsfield->sym->name = "buckets";
-	offset += widthptr;
-
-	oldbucketsfield = typ(TFIELD);
-	oldbucketsfield->type = ptrto(bucket);
-	oldbucketsfield->width = offset;
-	oldbucketsfield->sym = mal(sizeof(Sym));
-	oldbucketsfield->sym->name = "oldbuckets";
-	offset += widthptr;
-
-	offset += widthptr; // nevacuate
-
-	overflowfield = typ(TFIELD);
-	overflowfield->type = types[TUNSAFEPTR];
-	overflowfield->width = offset;
-	overflowfield->sym = mal(sizeof(Sym));
-	overflowfield->sym->name = "overflow";
-	offset += widthptr;
-
-	// link up fields
-	h->type = bucketsfield;
-	bucketsfield->down = oldbucketsfield;
-	oldbucketsfield->down = overflowfield;
-	overflowfield->down = T;
-
-	h->width = offset;
 	h->local = t->local;
+	h->type = field[0];
+	for(n = 0; n < nelem(field)-1; n++)
+		field[n]->down = field[n+1];
+	field[nelem(field)-1]->down = T;
+	dowidth(h);
 	t->hmap = h;
 	h->map = t;
 	return h;
@@ -257,8 +220,8 @@
 Type*
 hiter(Type *t)
 {
-	int32 n, off;
-	Type *field[9];
+	int32 n;
+	Type *field[12];
 	Type *i;
 
 	if(t->hiter != T)
@@ -272,73 +235,37 @@
 	//    h *Hmap
 	//    buckets *Bucket
 	//    bptr *Bucket
-	//    overflow unsafe.Pointer
-	//    other [4]uintptr
+	//    overflow0 unsafe.Pointer
+	//    overflow1 unsafe.Pointer
+	//    startBucket uintptr
+	//    stuff uintptr
+	//    bucket uintptr
+	//    checkBucket uintptr
 	// }
 	// must match ../../runtime/hashmap.c:hash_iter.
-	field[0] = typ(TFIELD);
-	field[0]->type = ptrto(t->down);
-	field[0]->sym = mal(sizeof(Sym));
-	field[0]->sym->name = "key";
-	
-	field[1] = typ(TFIELD);
-	field[1]->type = ptrto(t->type);
-	field[1]->sym = mal(sizeof(Sym));
-	field[1]->sym->name = "val";
-	
-	field[2] = typ(TFIELD);
-	field[2]->type = ptrto(types[TUINT8]); // TODO: is there a Type type?
-	field[2]->sym = mal(sizeof(Sym));
-	field[2]->sym->name = "t";
-	
-	field[3] = typ(TFIELD);
-	field[3]->type = ptrto(hmap(t));
-	field[3]->sym = mal(sizeof(Sym));
-	field[3]->sym->name = "h";
-	
-	field[4] = typ(TFIELD);
-	field[4]->type = ptrto(mapbucket(t));
-	field[4]->sym = mal(sizeof(Sym));
-	field[4]->sym->name = "buckets";
-	
-	field[5] = typ(TFIELD);
-	field[5]->type = ptrto(mapbucket(t));
-	field[5]->sym = mal(sizeof(Sym));
-	field[5]->sym->name = "bptr";
-	
-	field[6] = typ(TFIELD);
-	field[6]->type = types[TUNSAFEPTR];
-	field[6]->sym = mal(sizeof(Sym));
-	field[6]->sym->name = "overflow0";
-
-	field[7] = typ(TFIELD);
-	field[7]->type = types[TUNSAFEPTR];
-	field[7]->sym = mal(sizeof(Sym));
-	field[7]->sym->name = "overflow1";
-
-	// all other non-pointer fields
-	field[8] = typ(TFIELD);
-	field[8]->type = typ(TARRAY);
-	field[8]->type->type = types[TUINTPTR];
-	field[8]->type->bound = 4;
-	field[8]->type->width = 4 * widthptr;
-	field[8]->sym = mal(sizeof(Sym));
-	field[8]->sym->name = "other";
+	field[0] = makefield("key", ptrto(t->down));
+	field[1] = makefield("val", ptrto(t->type));
+	field[2] = makefield("t", ptrto(types[TUINT8]));
+	field[3] = makefield("h", ptrto(hmap(t)));
+	field[4] = makefield("buckets", ptrto(mapbucket(t)));
+	field[5] = makefield("bptr", ptrto(mapbucket(t)));
+	field[6] = makefield("overflow0", types[TUNSAFEPTR]);
+	field[7] = makefield("overflow1", types[TUNSAFEPTR]);
+	field[8] = makefield("startBucket", types[TUINTPTR]);
+	field[9] = makefield("stuff", types[TUINTPTR]); // offset+wrapped+B+I
+	field[10] = makefield("bucket", types[TUINTPTR]);
+	field[11] = makefield("checkBucket", types[TUINTPTR]);
 	
 	// build iterator struct holding the above fields
 	i = typ(TSTRUCT);
 	i->noalg = 1;
 	i->type = field[0];
-	off = 0;
-	for(n = 0; n < nelem(field)-1; n++) {
+	for(n = 0; n < nelem(field)-1; n++)
 		field[n]->down = field[n+1];
-		field[n]->width = off;
-		off += field[n]->type->width;
-	}
 	field[nelem(field)-1]->down = T;
-	off += field[nelem(field)-1]->type->width;
-	if(off != 12 * widthptr)
-		yyerror("hash_iter size not correct %d %d", off, 11 * widthptr);
+	dowidth(i);
+	if(i->width != 12 * widthptr)
+		yyerror("hash_iter size not correct %d %d", i->width, 12 * widthptr);
 	t->hiter = i;
 	i->map = t;
 	return i;
diff --git a/src/cmd/gc/runtime.go b/src/cmd/gc/runtime.go
index 8648a97..0a4c1b8 100644
--- a/src/cmd/gc/runtime.go
+++ b/src/cmd/gc/runtime.go
@@ -86,7 +86,7 @@
 func efacethash(i1 any) (ret uint32)
 
 // *byte is really *runtime.Type
-func makemap(mapType *byte, hint int64) (hmap map[any]any)
+func makemap(mapType *byte, hint int64, mapbuf *any, bucketbuf *any) (hmap map[any]any)
 func mapaccess1(mapType *byte, hmap map[any]any, key *any) (val *any)
 func mapaccess1_fast32(mapType *byte, hmap map[any]any, key any) (val *any)
 func mapaccess1_fast64(mapType *byte, hmap map[any]any, key any) (val *any)
diff --git a/src/cmd/gc/walk.c b/src/cmd/gc/walk.c
index 99dd0d3..e2d74e4 100644
--- a/src/cmd/gc/walk.c
+++ b/src/cmd/gc/walk.c
@@ -1330,12 +1330,32 @@
 		t = n->type;
 
 		fn = syslook("makemap", 1);
-		argtype(fn, t->down);	// any-1
-		argtype(fn, t->type);	// any-2
 
-		n = mkcall1(fn, n->type, init,
-			typename(n->type),
-			conv(n->left, types[TINT64]));
+		a = nodnil(); // hmap buffer
+		r = nodnil(); // bucket buffer
+		if(n->esc == EscNone) {
+			// Allocate hmap buffer on stack.
+			var = temp(hmap(t));
+			a = nod(OAS, var, N); // zero temp
+			typecheck(&a, Etop);
+			*init = list(*init, a);
+			a = nod(OADDR, var, N);
+
+			// Allocate one bucket on stack.
+			// Maximum key/value size is 128 bytes, larger objects
+			// are stored with an indirection. So max bucket size is 2048+eps.
+			var = temp(mapbucket(t));
+			r = nod(OAS, var, N); // zero temp
+			typecheck(&r, Etop);
+			*init = list(*init, r);
+			r = nod(OADDR, var, N);
+		}
+
+		argtype(fn, hmap(t));	// hmap buffer
+		argtype(fn, mapbucket(t));	// bucket buffer
+		argtype(fn, t->down);	// key type
+		argtype(fn, t->type);	// value type
+		n = mkcall1(fn, n->type, init, typename(n->type), conv(n->left, types[TINT64]), a, r);
 		goto ret;
 
 	case OMAKESLICE:
diff --git a/src/runtime/hashmap.go b/src/runtime/hashmap.go
index 058d1c7..c7c1198 100644
--- a/src/runtime/hashmap.go
+++ b/src/runtime/hashmap.go
@@ -182,8 +182,14 @@
 	}
 }
 
-func makemap(t *maptype, hint int64) *hmap {
+// makemap implements a Go map creation make(map[k]v, hint)
+// If the compiler has determined that the map or the first bucket
+// can be created on the stack, h and/or bucket may be non-nil.
+// If h != nil, the map can be created directly in h.
+// If bucket != nil, bucket can be used as the first bucket.
+func makemap(t *maptype, hint int64, h *hmap, bucket unsafe.Pointer) *hmap {
 	if sz := unsafe.Sizeof(hmap{}); sz > 48 || sz != uintptr(t.hmap.size) {
+		println("runtime: sizeof(hmap) =", sz, ", t.hmap.size =", t.hmap.size)
 		throw("bad hmap size")
 	}
 
@@ -238,7 +244,7 @@
 	// allocate initial hash table
 	// if B == 0, the buckets field is allocated lazily later (in mapassign)
 	// If hint is large zeroing this memory could take a while.
-	var buckets unsafe.Pointer
+	buckets := bucket
 	if B != 0 {
 		if checkgc {
 			memstats.next_gc = memstats.heap_alloc
@@ -250,7 +256,9 @@
 	if checkgc {
 		memstats.next_gc = memstats.heap_alloc
 	}
-	h := (*hmap)(newobject(t.hmap))
+	if h == nil {
+		h = (*hmap)(newobject(t.hmap))
+	}
 	h.count = 0
 	h.B = B
 	h.flags = 0
@@ -956,7 +964,7 @@
 
 //go:linkname reflect_makemap reflect.makemap
 func reflect_makemap(t *maptype) *hmap {
-	return makemap(t, 0)
+	return makemap(t, 0, nil, nil)
 }
 
 //go:linkname reflect_mapaccess reflect.mapaccess
diff --git a/src/runtime/map_test.go b/src/runtime/map_test.go
index 92da2d8..55f1f82 100644
--- a/src/runtime/map_test.go
+++ b/src/runtime/map_test.go
@@ -535,3 +535,13 @@
 func BenchmarkMapPop100(b *testing.B)   { benchmarkMapPop(b, 100) }
 func BenchmarkMapPop1000(b *testing.B)  { benchmarkMapPop(b, 1000) }
 func BenchmarkMapPop10000(b *testing.B) { benchmarkMapPop(b, 10000) }
+
+func TestNonEscapingMap(t *testing.T) {
+	n := testing.AllocsPerRun(1000, func() {
+		m := make(map[int]int)
+		m[0] = 0
+	})
+	if n != 0 {
+		t.Fatalf("want 0 allocs, got %v", n)
+	}
+}
diff --git a/src/runtime/mapspeed_test.go b/src/runtime/mapspeed_test.go
index 119eb3f..b036d2a 100644
--- a/src/runtime/mapspeed_test.go
+++ b/src/runtime/mapspeed_test.go
@@ -234,6 +234,15 @@
 	}
 }
 
+func BenchmarkNewSmallMap(b *testing.B) {
+	b.ReportAllocs()
+	for i := 0; i < b.N; i++ {
+		m := make(map[int]int)
+		m[0] = 0
+		m[1] = 1
+	}
+}
+
 func BenchmarkMapIter(b *testing.B) {
 	m := make(map[int]bool)
 	for i := 0; i < 8; i++ {
diff --git a/test/escape2.go b/test/escape2.go
index 947dcc9..ca9f614 100644
--- a/test/escape2.go
+++ b/test/escape2.go
@@ -1751,3 +1751,20 @@
 	r := []rune{1, 2, 3} // ERROR "\[\]rune literal does not escape"
 	sink = string(r)     // ERROR "string\(r\) escapes to heap"
 }
+
+func makemap0() {
+	m := make(map[int]int) // ERROR "make\(map\[int\]int\, 0\) does not escape"
+	m[0] = 0
+	m[1]++
+	delete(m, 1)
+	sink = m[0]
+}
+
+func makemap1() map[int]int {
+	return make(map[int]int) // ERROR "make\(map\[int\]int\, 0\) escapes to heap"
+}
+
+func makemap2() {
+	m := make(map[int]int) // ERROR "make\(map\[int\]int\, 0\) escapes to heap"
+	sink = m
+}
diff --git a/test/escape2n.go b/test/escape2n.go
index d9d95e8..ddd5693 100644
--- a/test/escape2n.go
+++ b/test/escape2n.go
@@ -1751,3 +1751,20 @@
 	r := []rune{1, 2, 3} // ERROR "\[\]rune literal does not escape"
 	sink = string(r)     // ERROR "string\(r\) escapes to heap"
 }
+
+func makemap0() {
+	m := make(map[int]int) // ERROR "make\(map\[int\]int\, 0\) does not escape"
+	m[0] = 0
+	m[1]++
+	delete(m, 1)
+	sink = m[0]
+}
+
+func makemap1() map[int]int {
+	return make(map[int]int) // ERROR "make\(map\[int\]int\, 0\) escapes to heap"
+}
+
+func makemap2() {
+	m := make(map[int]int) // ERROR "make\(map\[int\]int\, 0\) escapes to heap"
+	sink = m
+}
diff --git a/test/live.go b/test/live.go
index 62c6a0b..f96bbcc 100644
--- a/test/live.go
+++ b/test/live.go
@@ -640,8 +640,8 @@
 
 func good40() {
 	ret := T40{}
-	ret.m = make(map[int]int) // ERROR "live at call to makemap: ret"
+	ret.m = make(map[int]int) // ERROR "live at call to makemap: autotmp_.* ret"
 	t := &ret
-	printnl() // ERROR "live at call to printnl: ret"
+	printnl() // ERROR "live at call to printnl: autotmp_.* ret"
 	_ = t
 }
diff --git a/test/live2.go b/test/live2.go
index 1bd0af2..7474756 100644
--- a/test/live2.go
+++ b/test/live2.go
@@ -25,15 +25,15 @@
 }
 
 func bad40() {
-	t := newT40() // ERROR "live at call to makemap: ret"
-	printnl()     // ERROR "live at call to printnl: ret"
+	t := newT40() // ERROR "live at call to makemap: autotmp_.* ret"
+	printnl()     // ERROR "live at call to printnl: autotmp_.* ret"
 	_ = t
 }
 
 func good40() {
 	ret := T40{}
-	ret.m = make(map[int]int) // ERROR "live at call to makemap: ret"
+	ret.m = make(map[int]int) // ERROR "live at call to makemap: autotmp_.* ret"
 	t := &ret
-	printnl() // ERROR "live at call to printnl: ret"
+	printnl() // ERROR "live at call to printnl: autotmp_.* ret"
 	_ = t
 }