cmd/compile: fix defer/deferreturn

Make sure we do any just-before-return cleanup on all paths out of a
function, including when recovering.  Each exit path should include
deferreturn (if there are any defers) and then the exit
code (e.g. copying heap-escaping return values back to the stack).

Introduce a Defer SSA block type which has two outgoing edges - one the
fallthrough edge (the defer was queued successfully) and one which
immediately returns (the defer had a successful recover() call and
normal execution should resume at the return point).

Fixes #14725

Change-Id: Iad035c9fd25ef8b7a74dafbd7461cf04833d981f
Reviewed-on: https://go-review.googlesource.com/20486
Reviewed-by: David Chase <drchase@google.com>
diff --git a/src/cmd/compile/internal/gc/ssa.go b/src/cmd/compile/internal/gc/ssa.go
index ff6a3f2..557564d 100644
--- a/src/cmd/compile/internal/gc/ssa.go
+++ b/src/cmd/compile/internal/gc/ssa.go
@@ -177,12 +177,9 @@
 
 	// fallthrough to exit
 	if s.curBlock != nil {
-		s.stmts(s.exitCode)
-		m := s.mem()
-		b := s.endBlock()
-		b.Line = fn.Func.Endlineno
-		b.Kind = ssa.BlockRet
-		b.Control = m
+		s.pushLine(fn.Func.Endlineno)
+		s.exit()
+		s.popLine()
 	}
 
 	// Check that we used all labels
@@ -904,6 +901,10 @@
 // It returns a BlockRet block that ends the control flow. Its control value
 // will be set to the final memory state.
 func (s *state) exit() *ssa.Block {
+	if hasdefer {
+		s.rtcall(Deferreturn, true, nil)
+	}
+
 	// Run exit code. Typically, this code copies heap-allocated PPARAMOUT
 	// variables back to the stack.
 	s.stmts(s.exitCode)
@@ -2402,6 +2403,15 @@
 	b.Kind = ssa.BlockCall
 	b.Control = call
 	b.AddEdgeTo(bNext)
+	if k == callDefer {
+		// Add recover edge to exit code.
+		b.Kind = ssa.BlockDefer
+		r := s.f.NewBlock(ssa.BlockPlain)
+		s.startBlock(r)
+		s.exit()
+		b.AddEdgeTo(r)
+		b.Likely = ssa.BranchLikely
+	}
 
 	// Start exit block, find address of result.
 	s.startBlock(bNext)
@@ -3622,12 +3632,6 @@
 
 	// bstart remembers where each block starts (indexed by block ID)
 	bstart []*obj.Prog
-
-	// deferBranches remembers all the defer branches we've seen.
-	deferBranches []*obj.Prog
-
-	// deferTarget remembers the (last) deferreturn call site.
-	deferTarget *obj.Prog
 }
 
 // genssa appends entries to ptxt for each instruction in f.
@@ -3690,15 +3694,6 @@
 	for _, br := range s.branches {
 		br.p.To.Val = s.bstart[br.b.ID]
 	}
-	if s.deferBranches != nil && s.deferTarget == nil {
-		// This can happen when the function has a defer but
-		// no return (because it has an infinite loop).
-		s.deferReturn()
-		Prog(obj.ARET)
-	}
-	for _, p := range s.deferBranches {
-		p.To.Val = s.deferTarget
-	}
 
 	if logProgs {
 		for p := ptxt; p != nil; p = p.Link {
@@ -4529,6 +4524,17 @@
 			q.To.Reg = r
 		}
 	case ssa.OpAMD64CALLstatic:
+		if v.Aux.(*Sym) == Deferreturn.Sym {
+			// Deferred calls will appear to be returning to
+			// the CALL deferreturn(SB) that we are about to emit.
+			// However, the stack trace code will show the line
+			// of the instruction byte before the return PC.
+			// To avoid that being an unrelated instruction,
+			// insert an actual hardware NOP that will have the right line number.
+			// This is different from obj.ANOP, which is a virtual no-op
+			// that doesn't make it into the instruction stream.
+			Thearch.Ginsnop()
+		}
 		p := Prog(obj.ACALL)
 		p.To.Type = obj.TYPE_MEM
 		p.To.Name = obj.NAME_EXTERN
@@ -4551,17 +4557,6 @@
 		if Maxarg < v.AuxInt {
 			Maxarg = v.AuxInt
 		}
-		// defer returns in rax:
-		// 0 if we should continue executing
-		// 1 if we should jump to deferreturn call
-		p = Prog(x86.ATESTL)
-		p.From.Type = obj.TYPE_REG
-		p.From.Reg = x86.REG_AX
-		p.To.Type = obj.TYPE_REG
-		p.To.Reg = x86.REG_AX
-		p = Prog(x86.AJNE)
-		p.To.Type = obj.TYPE_BRANCH
-		s.deferBranches = append(s.deferBranches, p)
 	case ssa.OpAMD64CALLgo:
 		p := Prog(obj.ACALL)
 		p.To.Type = obj.TYPE_MEM
@@ -4835,12 +4830,26 @@
 			p.To.Type = obj.TYPE_BRANCH
 			s.branches = append(s.branches, branch{p, b.Succs[0]})
 		}
+	case ssa.BlockDefer:
+		// defer returns in rax:
+		// 0 if we should continue executing
+		// 1 if we should jump to deferreturn call
+		p := Prog(x86.ATESTL)
+		p.From.Type = obj.TYPE_REG
+		p.From.Reg = x86.REG_AX
+		p.To.Type = obj.TYPE_REG
+		p.To.Reg = x86.REG_AX
+		p = Prog(x86.AJNE)
+		p.To.Type = obj.TYPE_BRANCH
+		s.branches = append(s.branches, branch{p, b.Succs[1]})
+		if b.Succs[0] != next {
+			p := Prog(obj.AJMP)
+			p.To.Type = obj.TYPE_BRANCH
+			s.branches = append(s.branches, branch{p, b.Succs[0]})
+		}
 	case ssa.BlockExit:
 		Prog(obj.AUNDEF) // tell plive.go that we never reach here
 	case ssa.BlockRet:
-		if hasdefer {
-			s.deferReturn()
-		}
 		Prog(obj.ARET)
 	case ssa.BlockRetJmp:
 		p := Prog(obj.AJMP)
@@ -4899,23 +4908,6 @@
 	}
 }
 
-func (s *genState) deferReturn() {
-	// Deferred calls will appear to be returning to
-	// the CALL deferreturn(SB) that we are about to emit.
-	// However, the stack trace code will show the line
-	// of the instruction byte before the return PC.
-	// To avoid that being an unrelated instruction,
-	// insert an actual hardware NOP that will have the right line number.
-	// This is different from obj.ANOP, which is a virtual no-op
-	// that doesn't make it into the instruction stream.
-	s.deferTarget = Pc
-	Thearch.Ginsnop()
-	p := Prog(obj.ACALL)
-	p.To.Type = obj.TYPE_MEM
-	p.To.Name = obj.NAME_EXTERN
-	p.To.Sym = Linksym(Deferreturn.Sym)
-}
-
 // addAux adds the offset in the aux fields (AuxInt and Aux) of v to a.
 func addAux(a *obj.Addr, v *ssa.Value) {
 	addAux2(a, v, v.AuxInt)