[dev.ssa] cmd/compile: optimize phi ops

Redo how we keep track of forward references when building SSA.
When the forward reference is resolved, update the Value node
in place.

Improve the phi elimination pass so it can simplify phis of phis.

Give SSA package access to decoded line numbers.  Fix line numbers
for constant booleans.

Change-Id: I3dc9896148d260be2f3dd14cbe5db639ec9fa6b7
Reviewed-on: https://go-review.googlesource.com/18674
Reviewed-by: David Chase <drchase@google.com>
Run-TryBot: Keith Randall <khr@golang.org>
diff --git a/src/cmd/compile/internal/gc/ssa.go b/src/cmd/compile/internal/gc/ssa.go
index be9af60..42f484d 100644
--- a/src/cmd/compile/internal/gc/ssa.go
+++ b/src/cmd/compile/internal/gc/ssa.go
@@ -139,11 +139,6 @@
 		}
 	}()
 
-	// We construct SSA using an algorithm similar to
-	// Brau, Buchwald, Hack, Leißa, Mallon, and Zwinkau
-	// http://pp.info.uni-karlsruhe.de/uploads/publikationen/braun13cc.pdf
-	// TODO: check this comment
-
 	// Allocate starting block
 	s.f.Entry = s.f.NewBlock(ssa.BlockPlain)
 
@@ -285,6 +280,9 @@
 	// list of panic calls by function name and line number.
 	// Used to deduplicate panic calls.
 	panics map[funcLine]*ssa.Block
+
+	// list of FwdRef values.
+	fwdRefs []*ssa.Value
 }
 
 type funcLine struct {
@@ -1327,7 +1325,14 @@
 		case CTSTR:
 			return s.entryNewValue0A(ssa.OpConstString, n.Type, n.Val().U)
 		case CTBOOL:
-			return s.constBool(n.Val().U.(bool))
+			v := s.constBool(n.Val().U.(bool))
+			// For some reason the frontend gets the line numbers of
+			// CTBOOL literals totally wrong.  Fix it here by grabbing
+			// the line number of the enclosing AST node.
+			if len(s.line) >= 2 {
+				v.Line = s.line[len(s.line)-2]
+			}
+			return v
 		case CTNIL:
 			t := n.Type
 			switch {
@@ -3172,9 +3177,10 @@
 func (s *state) variable(name *Node, t ssa.Type) *ssa.Value {
 	v := s.vars[name]
 	if v == nil {
-		// TODO: get type?  Take Sym as arg?
 		v = s.newValue0A(ssa.OpFwdRef, t, name)
+		s.fwdRefs = append(s.fwdRefs, v)
 		s.vars[name] = v
+		s.addNamedValue(name, v)
 	}
 	return v
 }
@@ -3184,40 +3190,38 @@
 }
 
 func (s *state) linkForwardReferences() {
-	// Build ssa graph.  Each variable on its first use in a basic block
+	// Build SSA graph.  Each variable on its first use in a basic block
 	// leaves a FwdRef in that block representing the incoming value
 	// of that variable.  This function links that ref up with possible definitions,
 	// inserting Phi values as needed.  This is essentially the algorithm
-	// described by Brau, Buchwald, Hack, Leißa, Mallon, and Zwinkau:
+	// described by Braun, Buchwald, Hack, Leißa, Mallon, and Zwinkau:
 	// http://pp.info.uni-karlsruhe.de/uploads/publikationen/braun13cc.pdf
-	for _, b := range s.f.Blocks {
-		for _, v := range b.Values {
-			if v.Op != ssa.OpFwdRef {
-				continue
-			}
-			name := v.Aux.(*Node)
-			v.Op = ssa.OpCopy
-			v.Aux = nil
-			v.SetArgs1(s.lookupVarIncoming(b, v.Type, name))
-		}
+	// Differences:
+	//   - We use FwdRef nodes to postpone phi building until the CFG is
+	//     completely built.  That way we can avoid the notion of "sealed"
+	//     blocks.
+	//   - Phi optimization is a separate pass (in ../ssa/phielim.go).
+	for len(s.fwdRefs) > 0 {
+		v := s.fwdRefs[len(s.fwdRefs)-1]
+		s.fwdRefs = s.fwdRefs[:len(s.fwdRefs)-1]
+		s.resolveFwdRef(v)
 	}
 }
 
-// lookupVarIncoming finds the variable's value at the start of block b.
-func (s *state) lookupVarIncoming(b *ssa.Block, t ssa.Type, name *Node) *ssa.Value {
-	// TODO(khr): have lookupVarIncoming overwrite the fwdRef or copy it
-	// will be used in, instead of having the result used in a copy value.
+// resolveFwdRef modifies v to be the variable's value at the start of its block.
+// v must be a FwdRef op.
+func (s *state) resolveFwdRef(v *ssa.Value) {
+	b := v.Block
+	name := v.Aux.(*Node)
+	v.Aux = nil
 	if b == s.f.Entry {
-		if name == &memVar {
-			return s.startmem
-		}
+		// Live variable at start of function.
 		if canSSA(name) {
-			v := s.entryNewValue0A(ssa.OpArg, t, name)
-			// v starts with AuxInt == 0.
-			s.addNamedValue(name, v)
-			return v
+			v.Op = ssa.OpArg
+			v.Aux = name
+			return
 		}
-		// variable is live at the entry block.  Load it.
+		// Not SSAable.  Load it.
 		addr := s.decladdrs[name]
 		if addr == nil {
 			// TODO: closure args reach here.
@@ -3226,64 +3230,69 @@
 		if _, ok := addr.Aux.(*ssa.ArgSymbol); !ok {
 			s.Fatalf("variable live at start of function %s is not an argument %s", b.Func.Name, name)
 		}
-		return s.entryNewValue2(ssa.OpLoad, t, addr, s.startmem)
+		v.Op = ssa.OpLoad
+		v.AddArgs(addr, s.startmem)
+		return
 	}
-	var vals []*ssa.Value
-	for _, p := range b.Preds {
-		vals = append(vals, s.lookupVarOutgoing(p, t, name))
-	}
-	if len(vals) == 0 {
+	if len(b.Preds) == 0 {
 		// This block is dead; we have no predecessors and we're not the entry block.
-		// It doesn't matter what we use here as long as it is well-formed,
-		// so use the default/zero value.
-		if name == &memVar {
-			return s.startmem
-		}
-		return s.zeroVal(name.Type)
+		// It doesn't matter what we use here as long as it is well-formed.
+		v.Op = ssa.OpUnknown
+		return
 	}
-	v0 := vals[0]
-	for i := 1; i < len(vals); i++ {
-		if vals[i] != v0 {
-			// need a phi value
-			v := b.NewValue0(s.peekLine(), ssa.OpPhi, t)
-			v.AddArgs(vals...)
-			s.addNamedValue(name, v)
-			return v
-		}
+	// Find variable value on each predecessor.
+	var argstore [4]*ssa.Value
+	args := argstore[:0]
+	for _, p := range b.Preds {
+		args = append(args, s.lookupVarOutgoing(p, v.Type, name, v.Line))
 	}
-	return v0
+
+	// Decide if we need a phi or not.  We need a phi if there
+	// are two different args (which are both not v).
+	var w *ssa.Value
+	for _, a := range args {
+		if a == v {
+			continue // self-reference
+		}
+		if a == w {
+			continue // already have this witness
+		}
+		if w != nil {
+			// two witnesses, need a phi value
+			v.Op = ssa.OpPhi
+			v.AddArgs(args...)
+			return
+		}
+		w = a // save witness
+	}
+	if w == nil {
+		s.Fatalf("no witness for reachable phi %s", v)
+	}
+	// One witness.  Make v a copy of w.
+	v.Op = ssa.OpCopy
+	v.AddArg(w)
 }
 
 // lookupVarOutgoing finds the variable's value at the end of block b.
-func (s *state) lookupVarOutgoing(b *ssa.Block, t ssa.Type, name *Node) *ssa.Value {
+func (s *state) lookupVarOutgoing(b *ssa.Block, t ssa.Type, name *Node, line int32) *ssa.Value {
 	m := s.defvars[b.ID]
 	if v, ok := m[name]; ok {
 		return v
 	}
 	// The variable is not defined by b and we haven't
-	// looked it up yet.  Generate v, a copy value which
-	// will be the outgoing value of the variable.  Then
-	// look up w, the incoming value of the variable.
-	// Make v = copy(w).  We need the extra copy to
-	// prevent infinite recursion when looking up the
-	// incoming value of the variable.
-	v := b.NewValue0(s.peekLine(), ssa.OpCopy, t)
+	// looked it up yet.  Generate a FwdRef for the variable and return that.
+	v := b.NewValue0A(line, ssa.OpFwdRef, t, name)
+	s.fwdRefs = append(s.fwdRefs, v)
 	m[name] = v
-	v.AddArg(s.lookupVarIncoming(b, t, name))
+	s.addNamedValue(name, v)
 	return v
 }
 
-// TODO: the above mutually recursive functions can lead to very deep stacks.  Fix that.
-
 func (s *state) addNamedValue(n *Node, v *ssa.Value) {
 	if n.Class == Pxxx {
 		// Don't track our dummy nodes (&memVar etc.).
 		return
 	}
-	if n.Sym == nil {
-		// TODO: What the heck is this?
-		return
-	}
 	if strings.HasPrefix(n.Sym.Name, "autotmp_") {
 		// Don't track autotmp_ variables.
 		return
@@ -3910,7 +3919,7 @@
 		p.To.Sym = Linksym(Pkglookup("duffcopy", Runtimepkg))
 		p.To.Offset = v.AuxInt
 
-	case ssa.OpCopy, ssa.OpAMD64MOVQconvert: // TODO: lower Copy to MOVQ earlier?
+	case ssa.OpCopy, ssa.OpAMD64MOVQconvert: // TODO: use MOVQreg for reg->reg copies instead of OpCopy?
 		if v.Type.IsMemory() {
 			return
 		}
@@ -3970,12 +3979,6 @@
 				v.Fatalf("phi arg at different location than phi: %v @ %v, but arg %v @ %v\n%s\n", v, loc, a, aloc, v.Block.Func)
 			}
 		}
-	case ssa.OpConst8, ssa.OpConst16, ssa.OpConst32, ssa.OpConst64, ssa.OpConstString, ssa.OpConstNil, ssa.OpConstBool,
-		ssa.OpConst32F, ssa.OpConst64F:
-		if v.Block.Func.RegAlloc[v.ID] != nil {
-			v.Fatalf("const value %v shouldn't have a location", v)
-		}
-
 	case ssa.OpInitMem:
 		// memory arg needs no code
 	case ssa.OpArg:
@@ -4596,6 +4599,10 @@
 	return canSSAType(t.(*Type))
 }
 
+func (e *ssaExport) Line(line int32) string {
+	return Ctxt.Line(int(line))
+}
+
 // Log logs a message from the compiler.
 func (e *ssaExport) Logf(msg string, args ...interface{}) {
 	// If e was marked as unimplemented, anything could happen. Ignore.