cmd/compile: fix defer/deferreturn

Make sure we do any just-before-return cleanup on all paths out of a
function, including when recovering.  Each exit path should include
deferreturn (if there are any defers) and then the exit
code (e.g. copying heap-escaping return values back to the stack).

Introduce a Defer SSA block type which has two outgoing edges - one the
fallthrough edge (the defer was queued successfully) and one which
immediately returns (the defer had a successful recover() call and
normal execution should resume at the return point).

Fixes #14725

Change-Id: Iad035c9fd25ef8b7a74dafbd7461cf04833d981f
Reviewed-on: https://go-review.googlesource.com/20486
Reviewed-by: David Chase <drchase@google.com>
diff --git a/src/cmd/compile/internal/ssa/check.go b/src/cmd/compile/internal/ssa/check.go
index 7243cdc..83aae3a 100644
--- a/src/cmd/compile/internal/ssa/check.go
+++ b/src/cmd/compile/internal/ssa/check.go
@@ -125,6 +125,16 @@
 			if !b.Control.Type.IsMemory() {
 				f.Fatalf("call block %s has non-memory control value %s", b, b.Control.LongString())
 			}
+		case BlockDefer:
+			if len(b.Succs) != 2 {
+				f.Fatalf("defer block %s len(Succs)==%d, want 2", b, len(b.Succs))
+			}
+			if b.Control == nil {
+				f.Fatalf("defer block %s has no control value", b)
+			}
+			if !b.Control.Type.IsMemory() {
+				f.Fatalf("defer block %s has non-memory control value %s", b, b.Control.LongString())
+			}
 		case BlockCheck:
 			if len(b.Succs) != 1 {
 				f.Fatalf("check block %s len(Succs)==%d, want 1", b, len(b.Succs))
diff --git a/src/cmd/compile/internal/ssa/flagalloc.go b/src/cmd/compile/internal/ssa/flagalloc.go
index b9a9741..b3aa62c 100644
--- a/src/cmd/compile/internal/ssa/flagalloc.go
+++ b/src/cmd/compile/internal/ssa/flagalloc.go
@@ -58,6 +58,10 @@
 		if v != nil && v.Type.IsFlags() && end[b.ID] != v {
 			end[b.ID] = nil
 		}
+		if b.Kind == BlockDefer {
+			// Defer blocks internally use/clobber the flags value.
+			end[b.ID] = nil
+		}
 	}
 
 	// Add flag recomputations where they are needed.
diff --git a/src/cmd/compile/internal/ssa/gen/genericOps.go b/src/cmd/compile/internal/ssa/gen/genericOps.go
index 3b55ebf..6a49cb7 100644
--- a/src/cmd/compile/internal/ssa/gen/genericOps.go
+++ b/src/cmd/compile/internal/ssa/gen/genericOps.go
@@ -401,6 +401,7 @@
 	{name: "Plain"},  // a single successor
 	{name: "If"},     // 2 successors, if control goto Succs[0] else goto Succs[1]
 	{name: "Call"},   // 1 successor, control is call op (of memory type)
+	{name: "Defer"},  // 2 successors, Succs[0]=defer queued, Succs[1]=defer recovered. control is call op (of memory type)
 	{name: "Check"},  // 1 successor, control is nilcheck op (of void type)
 	{name: "Ret"},    // no successors, control value is memory result
 	{name: "RetJmp"}, // no successors, jumps to b.Aux.(*gc.Sym)
diff --git a/src/cmd/compile/internal/ssa/likelyadjust.go b/src/cmd/compile/internal/ssa/likelyadjust.go
index b016519..93f32c7 100644
--- a/src/cmd/compile/internal/ssa/likelyadjust.go
+++ b/src/cmd/compile/internal/ssa/likelyadjust.go
@@ -100,7 +100,7 @@
 			// Calls. TODO not all calls are equal, names give useful clues.
 			// Any name-based heuristics are only relative to other calls,
 			// and less influential than inferences from loop structure.
-		case BlockCall:
+		case BlockCall, BlockDefer:
 			local[b.ID] = blCALL
 			certain[b.ID] = max8(blCALL, certain[b.Succs[0].ID])
 
diff --git a/src/cmd/compile/internal/ssa/opGen.go b/src/cmd/compile/internal/ssa/opGen.go
index f1f3f7b..3b5e14e 100644
--- a/src/cmd/compile/internal/ssa/opGen.go
+++ b/src/cmd/compile/internal/ssa/opGen.go
@@ -29,6 +29,7 @@
 	BlockPlain
 	BlockIf
 	BlockCall
+	BlockDefer
 	BlockCheck
 	BlockRet
 	BlockRetJmp
@@ -58,6 +59,7 @@
 	BlockPlain:  "Plain",
 	BlockIf:     "If",
 	BlockCall:   "Call",
+	BlockDefer:  "Defer",
 	BlockCheck:  "Check",
 	BlockRet:    "Ret",
 	BlockRetJmp: "RetJmp",
diff --git a/src/cmd/compile/internal/ssa/phiopt.go b/src/cmd/compile/internal/ssa/phiopt.go
index fb17727..31870a6 100644
--- a/src/cmd/compile/internal/ssa/phiopt.go
+++ b/src/cmd/compile/internal/ssa/phiopt.go
@@ -26,14 +26,14 @@
 		}
 
 		pb0, b0 := b, b.Preds[0]
-		for b0.Kind != BlockIf && len(b0.Preds) == 1 {
+		for len(b0.Succs) == 1 && len(b0.Preds) == 1 {
 			pb0, b0 = b0, b0.Preds[0]
 		}
 		if b0.Kind != BlockIf {
 			continue
 		}
 		pb1, b1 := b, b.Preds[1]
-		for b1.Kind != BlockIf && len(b1.Preds) == 1 {
+		for len(b1.Succs) == 1 && len(b1.Preds) == 1 {
 			pb1, b1 = b1, b1.Preds[0]
 		}
 		if b1 != b0 {
diff --git a/src/cmd/compile/internal/ssa/regalloc.go b/src/cmd/compile/internal/ssa/regalloc.go
index 042617b..0063dc1 100644
--- a/src/cmd/compile/internal/ssa/regalloc.go
+++ b/src/cmd/compile/internal/ssa/regalloc.go
@@ -585,7 +585,7 @@
 		// Walk backwards through the block doing liveness analysis.
 		liveSet.clear()
 		d := int32(len(b.Values))
-		if b.Kind == BlockCall {
+		if b.Kind == BlockCall || b.Kind == BlockDefer {
 			d += unlikelyDistance
 		}
 		for _, e := range s.live[b.ID] {
@@ -988,7 +988,7 @@
 					continue
 				}
 				for {
-					if p.Kind == BlockCall {
+					if p.Kind == BlockCall || p.Kind == BlockDefer {
 						goto badloop
 					}
 					if p == top {
@@ -1607,7 +1607,7 @@
 			// to beginning-of-block distance.
 			live.clear()
 			d := int32(len(b.Values))
-			if b.Kind == BlockCall {
+			if b.Kind == BlockCall || b.Kind == BlockDefer {
 				// Because we keep no values in registers across a call,
 				// make every use past a call very far away.
 				d += unlikelyDistance