[release-branch.go1.8] cmd/compile: zero ambiguously live variables at VARKILLs

This is a redo of CL 41076 backported to the 1.8 release branch.
There were major conflicts, so I had to basically rewrite it again
from scratch.  The way Progs are allocated changed.  Liveness analysis
and Prog generation got reordered.  Liveness analysis changed from
running on gc.BasicBlock to ssa.Block.  All that makes the logic quite
a bit different.

Please review carefully.

From CL 41076:

At VARKILLs, zero a variable if it is ambiguously live.
After the VARKILL anything this variable references
might be collected. If it were to become live again later,
the GC will see references to already-collected objects.

We don't know a variable is ambiguously live until very
late in compilation (after lowering, register allocation, ...),
so it is hard to generate the code in an arch-independent way.
We also have to be careful not to clobber any registers.
Fortunately, this almost never happens so performance is ~irrelevant.

There are only 2 instances where this triggers in the stdlib.

Fixes #20029

Change-Id: Ibb757eec58ee07f40df5e561b19d315684dc4bda
Reviewed-on: https://go-review.googlesource.com/43998
Run-TryBot: Keith Randall <khr@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Matthew Dempsky <mdempsky@google.com>
diff --git a/src/cmd/compile/internal/amd64/galign.go b/src/cmd/compile/internal/amd64/galign.go
index bb3830b..6fd7f31 100644
--- a/src/cmd/compile/internal/amd64/galign.go
+++ b/src/cmd/compile/internal/amd64/galign.go
@@ -27,4 +27,5 @@
 	gc.Thearch.SSAMarkMoves = ssaMarkMoves
 	gc.Thearch.SSAGenValue = ssaGenValue
 	gc.Thearch.SSAGenBlock = ssaGenBlock
+	gc.Thearch.ZeroAuto = zeroAuto
 }
diff --git a/src/cmd/compile/internal/amd64/ggen.go b/src/cmd/compile/internal/amd64/ggen.go
index c137b52..d121533 100644
--- a/src/cmd/compile/internal/amd64/ggen.go
+++ b/src/cmd/compile/internal/amd64/ggen.go
@@ -166,6 +166,27 @@
 	return p
 }
 
+func zeroAuto(n *gc.Node, pp *obj.Prog) {
+	// Note: this code must not clobber any registers.
+	op := x86.AMOVQ
+	if gc.Widthptr == 4 {
+		op = x86.AMOVL
+	}
+	sym := gc.Linksym(n.Sym)
+	size := n.Type.Size()
+	for i := int64(0); i < size; i += int64(gc.Widthptr) {
+		p := gc.AddAsmAfter(op, pp)
+		pp = p
+		p.From.Type = obj.TYPE_CONST
+		p.From.Offset = 0
+		p.To.Type = obj.TYPE_MEM
+		p.To.Name = obj.NAME_AUTO
+		p.To.Reg = x86.REG_SP
+		p.To.Offset = n.Xoffset + i
+		p.To.Sym = sym
+	}
+}
+
 func ginsnop() {
 	// This is actually not the x86 NOP anymore,
 	// but at the point where it gets used, AX is dead
diff --git a/src/cmd/compile/internal/arm/galign.go b/src/cmd/compile/internal/arm/galign.go
index 308b016..53f57cd 100644
--- a/src/cmd/compile/internal/arm/galign.go
+++ b/src/cmd/compile/internal/arm/galign.go
@@ -21,4 +21,5 @@
 	gc.Thearch.SSAMarkMoves = func(s *gc.SSAGenState, b *ssa.Block) {}
 	gc.Thearch.SSAGenValue = ssaGenValue
 	gc.Thearch.SSAGenBlock = ssaGenBlock
+	gc.Thearch.ZeroAuto = zeroAuto
 }
diff --git a/src/cmd/compile/internal/arm/ggen.go b/src/cmd/compile/internal/arm/ggen.go
index 6dce0a4..1b97a4d 100644
--- a/src/cmd/compile/internal/arm/ggen.go
+++ b/src/cmd/compile/internal/arm/ggen.go
@@ -92,6 +92,27 @@
 
 	return p
 }
+func zeroAuto(n *gc.Node, pp *obj.Prog) {
+	// Note: this code must not clobber any registers.
+	sym := gc.Linksym(n.Sym)
+	size := n.Type.Size()
+	p := gc.Prog(arm.AMOVW)
+	p.From.Type = obj.TYPE_CONST
+	p.From.Offset = 0
+	p.To.Type = obj.TYPE_REG
+	p.To.Reg = arm.REGTMP
+	for i := int64(0); i < size; i += 4 {
+		p := gc.AddAsmAfter(arm.AMOVW, pp)
+		pp = p
+		p.From.Type = obj.TYPE_REG
+		p.From.Reg = arm.REGTMP
+		p.To.Type = obj.TYPE_MEM
+		p.To.Name = obj.NAME_AUTO
+		p.To.Reg = arm.REGSP
+		p.To.Offset = n.Xoffset + i
+		p.To.Sym = sym
+	}
+}
 
 func ginsnop() {
 	p := gc.Prog(arm.AAND)
diff --git a/src/cmd/compile/internal/arm64/galign.go b/src/cmd/compile/internal/arm64/galign.go
index 20a67e3..15680dd 100644
--- a/src/cmd/compile/internal/arm64/galign.go
+++ b/src/cmd/compile/internal/arm64/galign.go
@@ -21,4 +21,5 @@
 	gc.Thearch.SSAMarkMoves = func(s *gc.SSAGenState, b *ssa.Block) {}
 	gc.Thearch.SSAGenValue = ssaGenValue
 	gc.Thearch.SSAGenBlock = ssaGenBlock
+	gc.Thearch.ZeroAuto = zeroAuto
 }
diff --git a/src/cmd/compile/internal/arm64/ggen.go b/src/cmd/compile/internal/arm64/ggen.go
index 16813b6..83966a4 100644
--- a/src/cmd/compile/internal/arm64/ggen.go
+++ b/src/cmd/compile/internal/arm64/ggen.go
@@ -103,6 +103,23 @@
 	return p
 }
 
+func zeroAuto(n *gc.Node, pp *obj.Prog) {
+	// Note: this code must not clobber any registers.
+	sym := gc.Linksym(n.Sym)
+	size := n.Type.Size()
+	for i := int64(0); i < size; i += 8 {
+		p := gc.AddAsmAfter(arm64.AMOVD, pp)
+		pp = p
+		p.From.Type = obj.TYPE_REG
+		p.From.Reg = arm64.REGZERO
+		p.To.Type = obj.TYPE_MEM
+		p.To.Name = obj.NAME_AUTO
+		p.To.Reg = arm64.REGSP
+		p.To.Offset = n.Xoffset + i
+		p.To.Sym = sym
+	}
+}
+
 func ginsnop() {
 	p := gc.Prog(arm64.AHINT)
 	p.From.Type = obj.TYPE_CONST
diff --git a/src/cmd/compile/internal/gc/go.go b/src/cmd/compile/internal/gc/go.go
index ff33e9c..a529ca4 100644
--- a/src/cmd/compile/internal/gc/go.go
+++ b/src/cmd/compile/internal/gc/go.go
@@ -361,6 +361,12 @@
 	// SSAGenBlock emits end-of-block Progs. SSAGenValue should be called
 	// for all values in the block before SSAGenBlock.
 	SSAGenBlock func(s *SSAGenState, b, next *ssa.Block)
+
+	// ZeroAuto emits code to zero the given auto stack variable.
+	// Code is added immediately after pp.
+	// ZeroAuto must not use any non-temporary registers.
+	// ZeroAuto will only be called for variables which contain a pointer.
+	ZeroAuto func(n *Node, pp *obj.Prog)
 }
 
 var pcloc int32
diff --git a/src/cmd/compile/internal/gc/gsubr.go b/src/cmd/compile/internal/gc/gsubr.go
index 1e86363..2a8bedf 100644
--- a/src/cmd/compile/internal/gc/gsubr.go
+++ b/src/cmd/compile/internal/gc/gsubr.go
@@ -72,6 +72,15 @@
 	return q
 }
 
+func AddAsmAfter(as obj.As, p *obj.Prog) *obj.Prog {
+	q := Ctxt.NewProg()
+	Clearp(q)
+	q.As = as
+	q.Link = p.Link
+	p.Link = q
+	return q
+}
+
 func ggloblnod(nam *Node) {
 	s := Linksym(nam.Sym)
 	s.Gotype = Linksym(ngotype(nam))
diff --git a/src/cmd/compile/internal/gc/pgen.go b/src/cmd/compile/internal/gc/pgen.go
index 643ba79..dde28f6 100644
--- a/src/cmd/compile/internal/gc/pgen.go
+++ b/src/cmd/compile/internal/gc/pgen.go
@@ -120,7 +120,30 @@
 }
 
 func removevardef(firstp *obj.Prog) {
+	// At VARKILLs, zero variable if it is ambiguously live.
+	// After the VARKILL anything this variable references
+	// might be collected. If it were to become live again later,
+	// the GC will see references to already-collected objects.
+	// See issue 20029.
 	for p := firstp; p != nil; p = p.Link {
+		if p.As != obj.AVARKILL {
+			continue
+		}
+		n := p.To.Node.(*Node)
+		if !n.Name.Needzero {
+			continue
+		}
+		if n.Class != PAUTO {
+			Fatalf("zero of variable which isn't PAUTO %v", n)
+		}
+		if n.Type.Size()%int64(Widthptr) != 0 {
+			Fatalf("zero of variable not a multiple of ptr size %v", n)
+		}
+		Thearch.ZeroAuto(n, p)
+	}
+
+	for p := firstp; p != nil; p = p.Link {
+
 		for p.Link != nil && (p.Link.As == obj.AVARDEF || p.Link.As == obj.AVARKILL || p.Link.As == obj.AVARLIVE) {
 			p.Link = p.Link.Link
 		}
diff --git a/src/cmd/compile/internal/mips/galign.go b/src/cmd/compile/internal/mips/galign.go
index 39f5d2b..e6d117a 100644
--- a/src/cmd/compile/internal/mips/galign.go
+++ b/src/cmd/compile/internal/mips/galign.go
@@ -23,4 +23,5 @@
 	gc.Thearch.SSAMarkMoves = func(s *gc.SSAGenState, b *ssa.Block) {}
 	gc.Thearch.SSAGenValue = ssaGenValue
 	gc.Thearch.SSAGenBlock = ssaGenBlock
+	gc.Thearch.ZeroAuto = zeroAuto
 }
diff --git a/src/cmd/compile/internal/mips/ggen.go b/src/cmd/compile/internal/mips/ggen.go
index ec540f8..a95db57 100644
--- a/src/cmd/compile/internal/mips/ggen.go
+++ b/src/cmd/compile/internal/mips/ggen.go
@@ -92,6 +92,23 @@
 	return p
 }
 
+func zeroAuto(n *gc.Node, pp *obj.Prog) {
+	// Note: this code must not clobber any registers.
+	sym := gc.Linksym(n.Sym)
+	size := n.Type.Size()
+	for i := int64(0); i < size; i += 4 {
+		p := gc.AddAsmAfter(mips.AMOVW, pp)
+		pp = p
+		p.From.Type = obj.TYPE_REG
+		p.From.Reg = mips.REGZERO
+		p.To.Type = obj.TYPE_MEM
+		p.To.Name = obj.NAME_AUTO
+		p.To.Reg = mips.REGSP
+		p.To.Offset = n.Xoffset + i
+		p.To.Sym = sym
+	}
+}
+
 func ginsnop() {
 	p := gc.Prog(mips.ANOR)
 	p.From.Type = obj.TYPE_REG
diff --git a/src/cmd/compile/internal/mips64/galign.go b/src/cmd/compile/internal/mips64/galign.go
index 4a36a4c..e8ea073 100644
--- a/src/cmd/compile/internal/mips64/galign.go
+++ b/src/cmd/compile/internal/mips64/galign.go
@@ -25,4 +25,5 @@
 	gc.Thearch.SSAMarkMoves = func(s *gc.SSAGenState, b *ssa.Block) {}
 	gc.Thearch.SSAGenValue = ssaGenValue
 	gc.Thearch.SSAGenBlock = ssaGenBlock
+	gc.Thearch.ZeroAuto = zeroAuto
 }
diff --git a/src/cmd/compile/internal/mips64/ggen.go b/src/cmd/compile/internal/mips64/ggen.go
index 2af4a8b..eb48e2b 100644
--- a/src/cmd/compile/internal/mips64/ggen.go
+++ b/src/cmd/compile/internal/mips64/ggen.go
@@ -95,6 +95,23 @@
 	return p
 }
 
+func zeroAuto(n *gc.Node, pp *obj.Prog) {
+	// Note: this code must not clobber any registers.
+	sym := gc.Linksym(n.Sym)
+	size := n.Type.Size()
+	for i := int64(0); i < size; i += 8 {
+		p := gc.AddAsmAfter(mips.AMOVV, pp)
+		pp = p
+		p.From.Type = obj.TYPE_REG
+		p.From.Reg = mips.REGZERO
+		p.To.Type = obj.TYPE_MEM
+		p.To.Name = obj.NAME_AUTO
+		p.To.Reg = mips.REGSP
+		p.To.Offset = n.Xoffset + i
+		p.To.Sym = sym
+	}
+}
+
 func ginsnop() {
 	p := gc.Prog(mips.ANOR)
 	p.From.Type = obj.TYPE_REG
diff --git a/src/cmd/compile/internal/ppc64/galign.go b/src/cmd/compile/internal/ppc64/galign.go
index 186aa29..3780247 100644
--- a/src/cmd/compile/internal/ppc64/galign.go
+++ b/src/cmd/compile/internal/ppc64/galign.go
@@ -24,6 +24,7 @@
 	gc.Thearch.SSAMarkMoves = ssaMarkMoves
 	gc.Thearch.SSAGenValue = ssaGenValue
 	gc.Thearch.SSAGenBlock = ssaGenBlock
+	gc.Thearch.ZeroAuto = zeroAuto
 
 	initvariants()
 	initproginfo()
diff --git a/src/cmd/compile/internal/ppc64/ggen.go b/src/cmd/compile/internal/ppc64/ggen.go
index b3ce968..4abd18d 100644
--- a/src/cmd/compile/internal/ppc64/ggen.go
+++ b/src/cmd/compile/internal/ppc64/ggen.go
@@ -90,6 +90,23 @@
 	return p
 }
 
+func zeroAuto(n *gc.Node, pp *obj.Prog) {
+	// Note: this code must not clobber any registers.
+	sym := gc.Linksym(n.Sym)
+	size := n.Type.Size()
+	for i := int64(0); i < size; i += 8 {
+		p := gc.AddAsmAfter(ppc64.AMOVD, pp)
+		pp = p
+		p.From.Type = obj.TYPE_REG
+		p.From.Reg = ppc64.REGZERO
+		p.To.Type = obj.TYPE_MEM
+		p.To.Name = obj.NAME_AUTO
+		p.To.Reg = ppc64.REGSP
+		p.To.Offset = n.Xoffset + i
+		p.To.Sym = sym
+	}
+}
+
 func ginsnop() {
 	p := gc.Prog(ppc64.AOR)
 	p.From.Type = obj.TYPE_REG
diff --git a/src/cmd/compile/internal/s390x/galign.go b/src/cmd/compile/internal/s390x/galign.go
index 91b9ed0..9424d20 100644
--- a/src/cmd/compile/internal/s390x/galign.go
+++ b/src/cmd/compile/internal/s390x/galign.go
@@ -20,4 +20,5 @@
 	gc.Thearch.SSAMarkMoves = ssaMarkMoves
 	gc.Thearch.SSAGenValue = ssaGenValue
 	gc.Thearch.SSAGenBlock = ssaGenBlock
+	gc.Thearch.ZeroAuto = zeroAuto
 }
diff --git a/src/cmd/compile/internal/s390x/ggen.go b/src/cmd/compile/internal/s390x/ggen.go
index 15c6554..5ecfaa5 100644
--- a/src/cmd/compile/internal/s390x/ggen.go
+++ b/src/cmd/compile/internal/s390x/ggen.go
@@ -143,6 +143,19 @@
 	return p
 }
 
+func zeroAuto(n *gc.Node, pp *obj.Prog) {
+	// Note: this code must not clobber any registers.
+	p := gc.AddAsmAfter(s390x.ACLEAR, pp)
+	pp = p
+	p.From.Type = obj.TYPE_CONST
+	p.From.Offset = n.Type.Size()
+	p.To.Type = obj.TYPE_MEM
+	p.To.Name = obj.NAME_AUTO
+	p.To.Reg = s390x.REGSP
+	p.To.Offset = n.Xoffset
+	p.To.Sym = gc.Linksym(n.Sym)
+}
+
 func ginsnop() {
 	p := gc.Prog(s390x.AOR)
 	p.From.Type = obj.TYPE_REG
diff --git a/src/cmd/compile/internal/x86/galign.go b/src/cmd/compile/internal/x86/galign.go
index edac6a0..bb29d2a 100644
--- a/src/cmd/compile/internal/x86/galign.go
+++ b/src/cmd/compile/internal/x86/galign.go
@@ -31,4 +31,5 @@
 	gc.Thearch.SSAMarkMoves = ssaMarkMoves
 	gc.Thearch.SSAGenValue = ssaGenValue
 	gc.Thearch.SSAGenBlock = ssaGenBlock
+	gc.Thearch.ZeroAuto = zeroAuto
 }
diff --git a/src/cmd/compile/internal/x86/ggen.go b/src/cmd/compile/internal/x86/ggen.go
index 25769b4..33ffc5f 100644
--- a/src/cmd/compile/internal/x86/ggen.go
+++ b/src/cmd/compile/internal/x86/ggen.go
@@ -84,6 +84,23 @@
 	return p
 }
 
+func zeroAuto(n *gc.Node, pp *obj.Prog) {
+	// Note: this code must not clobber any registers.
+	sym := gc.Linksym(n.Sym)
+	size := n.Type.Size()
+	for i := int64(0); i < size; i += 4 {
+		p := gc.AddAsmAfter(x86.AMOVL, pp)
+		pp = p
+		p.From.Type = obj.TYPE_CONST
+		p.From.Offset = 0
+		p.To.Type = obj.TYPE_MEM
+		p.To.Name = obj.NAME_AUTO
+		p.To.Reg = x86.REG_SP
+		p.To.Offset = n.Xoffset + i
+		p.To.Sym = sym
+	}
+}
+
 func ginsnop() {
 	p := gc.Prog(x86.AXCHGL)
 	p.From.Type = obj.TYPE_REG
diff --git a/test/fixedbugs/issue20029.go b/test/fixedbugs/issue20029.go
new file mode 100644
index 0000000..db3f8aa
--- /dev/null
+++ b/test/fixedbugs/issue20029.go
@@ -0,0 +1,32 @@
+// run
+
+// Copyright 2017 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Issue 20029: make sure we zero at VARKILLs of
+// ambiguously live variables.
+// The ambiguously live variable here is the hiter
+// for the inner range loop.
+
+package main
+
+import "runtime"
+
+func f(m map[int]int) {
+outer:
+	for i := 0; i < 10; i++ {
+		for k := range m {
+			if k == 5 {
+				continue outer
+			}
+		}
+		runtime.GC()
+		break
+	}
+	runtime.GC()
+}
+func main() {
+	m := map[int]int{1: 2, 2: 3, 3: 4}
+	f(m)
+}