src/cmd/compile/internal/ssa/loopreschedchecks.go - go - Git at Google

 // Copyright 2016 The Go Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.

 package ssa

 import (
 	"cmd/compile/internal/types"
 	"fmt"
 )

 // an edgeMem records a backedge, together with the memory
 // phi functions at the target of the backedge that must
 // be updated when a rescheduling check replaces the backedge.
 type edgeMem struct {
 	e Edge
 	m *Value // phi for memory at dest of e
 }

 // a rewriteTarget is a value-argindex pair indicating
 // where a rewrite is applied.  Note that this is for values,
 // not for block controls, because block controls are not targets
 // for the rewrites performed in inserting rescheduling checks.
 type rewriteTarget struct {
 	v *Value
 	i int
 }

 type rewrite struct {
 	before, after *Value          // before is the expected value before rewrite, after is the new value installed.
 	rewrites      []rewriteTarget // all the targets for this rewrite.
 }

 func (r *rewrite) String() string {
 	s := "\n\tbefore=" + r.before.String() + ", after=" + r.after.String()
 	for _, rw := range r.rewrites {
 		s += ", (i=" + fmt.Sprint(rw.i) + ", v=" + rw.v.LongString() + ")"
 	}
 	s += "\n"
 	return s
 }

 // insertLoopReschedChecks inserts rescheduling checks on loop backedges.
 func insertLoopReschedChecks(f *Func) {
 	// TODO: when split information is recorded in export data, insert checks only on backedges that can be reached on a split-call-free path.

 	// Loop reschedule checks compare the stack pointer with
 	// the per-g stack bound.  If the pointer appears invalid,
 	// that means a reschedule check is needed.
 	//
 	// Steps:
 	// 1. locate backedges.
 	// 2. Record memory definitions at block end so that
 	//    the SSA graph for mem can be properly modified.
 	// 3. Ensure that phi functions that will-be-needed for mem
 	//    are present in the graph, initially with trivial inputs.
 	// 4. Record all to-be-modified uses of mem;
 	//    apply modifications (split into two steps to simplify and
 	//    avoided nagging order-dependencies).
 	// 5. Rewrite backedges to include reschedule check,
 	//    and modify destination phi function appropriately with new
 	//    definitions for mem.

 	if f.NoSplit { // nosplit functions don't reschedule.
 		return
 	}

 	backedges := backedges(f)
 	if len(backedges) == 0 { // no backedges means no rescheduling checks.
 		return
 	}

 	lastMems := findLastMems(f)

 	idom := f.Idom()
 	po := f.postorder()
 	// The ordering in the dominator tree matters; it's important that
 	// the walk of the dominator tree also be a preorder (i.e., a node is
 	// visited only after all its non-backedge predecessors have been visited).
 	sdom := newSparseOrderedTree(f, idom, po)

 	if f.pass.debug > 1 {
 		fmt.Printf("before %s = %s\n", f.Name, sdom.treestructure(f.Entry))
 	}

 	tofixBackedges := []edgeMem{}

 	for _, e := range backedges { // TODO: could filter here by calls in loops, if declared and inferred nosplit are recorded in export data.
 		tofixBackedges = append(tofixBackedges, edgeMem{e, nil})
 	}

 	// It's possible that there is no memory state (no global/pointer loads/stores or calls)
 	if lastMems[f.Entry.ID] == nil {
 		lastMems[f.Entry.ID] = f.Entry.NewValue0(f.Entry.Pos, OpInitMem, types.TypeMem)
 	}

 	memDefsAtBlockEnds := make([]*Value, f.NumBlocks()) // For each block, the mem def seen at its bottom. Could be from earlier block.

 	// Propagate last mem definitions forward through successor blocks.
 	for i := len(po) - 1; i >= 0; i-- {
 		b := po[i]
 		mem := lastMems[b.ID]
 		for j := 0; mem == nil; j++ { // if there's no def, then there's no phi, so the visible mem is identical in all predecessors.
 			// loop because there might be backedges that haven't been visited yet.
 			mem = memDefsAtBlockEnds[b.Preds[j].b.ID]
 		}
 		memDefsAtBlockEnds[b.ID] = mem
 		if f.pass.debug > 2 {
 			fmt.Printf("memDefsAtBlockEnds[%s] = %s\n", b, mem)
 		}
 	}

 	// Maps from block to newly-inserted phi function in block.
 	newmemphis := make(map[*Block]rewrite)

 	// Insert phi functions as necessary for future changes to flow graph.
 	for i, emc := range tofixBackedges {
 		e := emc.e
 		h := e.b

 		// find the phi function for the memory input at "h", if there is one.
 		var headerMemPhi *Value // look for header mem phi

 		for _, v := range h.Values {
 			if v.Op == OpPhi && v.Type.IsMemory() {
 				headerMemPhi = v
 			}
 		}

 		if headerMemPhi == nil {
 			// if the header is nil, make a trivial phi from the dominator
 			mem0 := memDefsAtBlockEnds[idom[h.ID].ID]
 			headerMemPhi = newPhiFor(h, mem0)
 			newmemphis[h] = rewrite{before: mem0, after: headerMemPhi}
 			addDFphis(mem0, h, h, f, memDefsAtBlockEnds, newmemphis, sdom)

 		}
 		tofixBackedges[i].m = headerMemPhi

 	}
 	if f.pass.debug > 0 {
 		for b, r := range newmemphis {
 			fmt.Printf("before b=%s, rewrite=%s\n", b, r.String())
 		}
 	}

 	// dfPhiTargets notes inputs to phis in dominance frontiers that should not
 	// be rewritten as part of the dominated children of some outer rewrite.
 	dfPhiTargets := make(map[rewriteTarget]bool)

 	rewriteNewPhis(f.Entry, f.Entry, f, memDefsAtBlockEnds, newmemphis, dfPhiTargets, sdom)

 	if f.pass.debug > 0 {
 		for b, r := range newmemphis {
 			fmt.Printf("after b=%s, rewrite=%s\n", b, r.String())
 		}
 	}

 	// Apply collected rewrites.
 	for _, r := range newmemphis {
 		for _, rw := range r.rewrites {
 			rw.v.SetArg(rw.i, r.after)
 		}
 	}

 	// Rewrite backedges to include reschedule checks.
 	for _, emc := range tofixBackedges {
 		e := emc.e
 		headerMemPhi := emc.m
 		h := e.b
 		i := e.i
 		p := h.Preds[i]
 		bb := p.b
 		mem0 := headerMemPhi.Args[i]
 		// bb e->p h,
 		// Because we're going to insert a rare-call, make sure the
 		// looping edge still looks likely.
 		likely := BranchLikely
 		if p.i != 0 {
 			likely = BranchUnlikely
 		}
 		if bb.Kind != BlockPlain { // backedges can be unconditional. e.g., if x { something; continue }
 			bb.Likely = likely
 		}

 		// rewrite edge to include reschedule check
 		// existing edges:
 		//
 		// bb.Succs[p.i] == Edge{h, i}
 		// h.Preds[i] == p == Edge{bb,p.i}
 		//
 		// new block(s):
 		// test:
 		//    if sp < g.limit { goto sched }
 		//    goto join
 		// sched:
 		//    mem1 := call resched (mem0)
 		//    goto join
 		// join:
 		//    mem2 := phi(mem0, mem1)
 		//    goto h
 		//
 		// and correct arg i of headerMemPhi and headerCtrPhi
 		//
 		// EXCEPT: join block containing only phi functions is bad
 		// for the register allocator.  Therefore, there is no
 		// join, and branches targeting join must instead target
 		// the header, and the other phi functions within header are
 		// adjusted for the additional input.

 		test := f.NewBlock(BlockIf)
 		sched := f.NewBlock(BlockPlain)

 		test.Pos = bb.Pos
 		sched.Pos = bb.Pos

 		// if sp < g.limit { goto sched }
 		// goto header

 		cfgtypes := &f.Config.Types
 		pt := cfgtypes.Uintptr
 		g := test.NewValue1(bb.Pos, OpGetG, pt, mem0)
 		sp := test.NewValue0(bb.Pos, OpSP, pt)
 		cmpOp := OpLess64U
 		if pt.Size() == 4 {
 			cmpOp = OpLess32U
 		}
 		limaddr := test.NewValue1I(bb.Pos, OpOffPtr, pt, 2*pt.Size(), g)
 		lim := test.NewValue2(bb.Pos, OpLoad, pt, limaddr, mem0)
 		cmp := test.NewValue2(bb.Pos, cmpOp, cfgtypes.Bool, sp, lim)
 		test.SetControl(cmp)

 		// if true, goto sched
 		test.AddEdgeTo(sched)

 		// if false, rewrite edge to header.
 		// do NOT remove+add, because that will perturb all the other phi functions
 		// as well as messing up other edges to the header.
 		test.Succs = append(test.Succs, Edge{h, i})
 		h.Preds[i] = Edge{test, 1}
 		headerMemPhi.SetArg(i, mem0)

 		test.Likely = BranchUnlikely

 		// sched:
 		//    mem1 := call resched (mem0)
 		//    goto header
 		resched := f.fe.Syslook("goschedguarded")
 		mem1 := sched.NewValue1A(bb.Pos, OpStaticCall, types.TypeMem, StaticAuxCall(resched, nil, nil), mem0)
 		sched.AddEdgeTo(h)
 		headerMemPhi.AddArg(mem1)

 		bb.Succs[p.i] = Edge{test, 0}
 		test.Preds = append(test.Preds, Edge{bb, p.i})

 		// Must correct all the other phi functions in the header for new incoming edge.
 		// Except for mem phis, it will be the same value seen on the original
 		// backedge at index i.
 		for _, v := range h.Values {
 			if v.Op == OpPhi && v != headerMemPhi {
 				v.AddArg(v.Args[i])
 			}
 		}
 	}

 	f.invalidateCFG()

 	if f.pass.debug > 1 {
 		sdom = newSparseTree(f, f.Idom())
 		fmt.Printf("after %s = %s\n", f.Name, sdom.treestructure(f.Entry))
 	}
 }

 // newPhiFor inserts a new Phi function into b,
 // with all inputs set to v.
 func newPhiFor(b *Block, v *Value) *Value {
 	phiV := b.NewValue0(b.Pos, OpPhi, v.Type)

 	for range b.Preds {
 		phiV.AddArg(v)
 	}
 	return phiV
 }

 // rewriteNewPhis updates newphis[h] to record all places where the new phi function inserted
 // in block h will replace a previous definition.  Block b is the block currently being processed;
 // if b has its own phi definition then it takes the place of h.
 // defsForUses provides information about other definitions of the variable that are present
 // (and if nil, indicates that the variable is no longer live)
 // sdom must yield a preorder of the flow graph if recursively walked, root-to-children.
 // The result of newSparseOrderedTree with order supplied by a dfs-postorder satisfies this
 // requirement.
 func rewriteNewPhis(h, b *Block, f *Func, defsForUses []*Value, newphis map[*Block]rewrite, dfPhiTargets map[rewriteTarget]bool, sdom SparseTree) {
 	// If b is a block with a new phi, then a new rewrite applies below it in the dominator tree.
 	if _, ok := newphis[b]; ok {
 		h = b
 	}
 	change := newphis[h]
 	x := change.before
 	y := change.after

 	// Apply rewrites to this block
 	if x != nil { // don't waste time on the common case of no definition.
 		p := &change.rewrites
 		for _, v := range b.Values {
 			if v == y { // don't rewrite self -- phi inputs are handled below.
 				continue
 			}
 			for i, w := range v.Args {
 				if w != x {
 					continue
 				}
 				tgt := rewriteTarget{v, i}

 				// It's possible dominated control flow will rewrite this instead.
 				// Visiting in preorder (a property of how sdom was constructed)
 				// ensures that these are seen in the proper order.
 				if dfPhiTargets[tgt] {
 					continue
 				}
 				*p = append(*p, tgt)
 				if f.pass.debug > 1 {
 					fmt.Printf("added block target for h=%v, b=%v, x=%v, y=%v, tgt.v=%s, tgt.i=%d\n",
 						h, b, x, y, v, i)
 				}
 			}
 		}

 		// Rewrite appropriate inputs of phis reached in successors
 		// in dominance frontier, self, and dominated.
 		// If the variable def reaching uses in b is itself defined in b, then the new phi function
 		// does not reach the successors of b.  (This assumes a bit about the structure of the
 		// phi use-def graph, but it's true for memory.)
 		if dfu := defsForUses[b.ID]; dfu != nil && dfu.Block != b {
 			for _, e := range b.Succs {
 				s := e.b

 				for _, v := range s.Values {
 					if v.Op == OpPhi && v.Args[e.i] == x {
 						tgt := rewriteTarget{v, e.i}
 						*p = append(*p, tgt)
 						dfPhiTargets[tgt] = true
 						if f.pass.debug > 1 {
 							fmt.Printf("added phi target for h=%v, b=%v, s=%v, x=%v, y=%v, tgt.v=%s, tgt.i=%d\n",
 								h, b, s, x, y, v.LongString(), e.i)
 						}
 						break
 					}
 				}
 			}
 		}
 		newphis[h] = change
 	}

 	for c := sdom[b.ID].child; c != nil; c = sdom[c.ID].sibling {
 		rewriteNewPhis(h, c, f, defsForUses, newphis, dfPhiTargets, sdom) // TODO: convert to explicit stack from recursion.
 	}
 }

 // addDFphis creates new trivial phis that are necessary to correctly reflect (within SSA)
 // a new definition for variable "x" inserted at h (usually but not necessarily a phi).
 // These new phis can only occur at the dominance frontier of h; block s is in the dominance
 // frontier of h if h does not strictly dominate s and if s is a successor of a block b where
 // either b = h or h strictly dominates b.
 // These newly created phis are themselves new definitions that may require addition of their
 // own trivial phi functions in their own dominance frontier, and this is handled recursively.
 func addDFphis(x *Value, h, b *Block, f *Func, defForUses []*Value, newphis map[*Block]rewrite, sdom SparseTree) {
 	oldv := defForUses[b.ID]
 	if oldv != x { // either a new definition replacing x, or nil if it is proven that there are no uses reachable from b
 		return
 	}
 	idom := f.Idom()
 outer:
 	for _, e := range b.Succs {
 		s := e.b
 		// check phi functions in the dominance frontier
 		if sdom.isAncestor(h, s) {
 			continue // h dominates s, successor of b, therefore s is not in the frontier.
 		}
 		if _, ok := newphis[s]; ok {
 			continue // successor s of b already has a new phi function, so there is no need to add another.
 		}
 		if x != nil {
 			for _, v := range s.Values {
 				if v.Op == OpPhi && v.Args[e.i] == x {
 					continue outer // successor s of b has an old phi function, so there is no need to add another.
 				}
 			}
 		}

 		old := defForUses[idom[s.ID].ID] // new phi function is correct-but-redundant, combining value "old" on all inputs.
 		headerPhi := newPhiFor(s, old)
 		// the new phi will replace "old" in block s and all blocks dominated by s.
 		newphis[s] = rewrite{before: old, after: headerPhi} // record new phi, to have inputs labeled "old" rewritten to "headerPhi"
 		addDFphis(old, s, s, f, defForUses, newphis, sdom)  // the new definition may also create new phi functions.
 	}
 	for c := sdom[b.ID].child; c != nil; c = sdom[c.ID].sibling {
 		addDFphis(x, h, c, f, defForUses, newphis, sdom) // TODO: convert to explicit stack from recursion.
 	}
 }

 // findLastMems maps block ids to last memory-output op in a block, if any
 func findLastMems(f *Func) []*Value {

 	var stores []*Value
 	lastMems := make([]*Value, f.NumBlocks())
 	storeUse := f.newSparseSet(f.NumValues())
 	defer f.retSparseSet(storeUse)
 	for _, b := range f.Blocks {
 		// Find all the stores in this block. Categorize their uses:
 		//  storeUse contains stores which are used by a subsequent store.
 		storeUse.clear()
 		stores = stores[:0]
 		var memPhi *Value
 		for _, v := range b.Values {
 			if v.Op == OpPhi {
 				if v.Type.IsMemory() {
 					memPhi = v
 				}
 				continue
 			}
 			if v.Type.IsMemory() {
 				stores = append(stores, v)
 				for _, a := range v.Args {
 					if a.Block == b && a.Type.IsMemory() {
 						storeUse.add(a.ID)
 					}
 				}
 			}
 		}
 		if len(stores) == 0 {
 			lastMems[b.ID] = memPhi
 			continue
 		}

 		// find last store in the block
 		var last *Value
 		for _, v := range stores {
 			if storeUse.contains(v.ID) {
 				continue
 			}
 			if last != nil {
 				b.Fatalf("two final stores - simultaneous live stores %s %s", last, v)
 			}
 			last = v
 		}
 		if last == nil {
 			b.Fatalf("no last store found - cycle?")
 		}
 		lastMems[b.ID] = last
 	}
 	return lastMems
 }

 // mark values
 type markKind uint8

 const (
 	notFound    markKind = iota // block has not been discovered yet
 	notExplored                 // discovered and in queue, outedges not processed yet
 	explored                    // discovered and in queue, outedges processed
 	done                        // all done, in output ordering
 )

 type backedgesState struct {
 	b *Block
 	i int
 }

 // backedges returns a slice of successor edges that are back
 // edges.  For reducible loops, edge.b is the header.
 func backedges(f *Func) []Edge {
 	edges := []Edge{}
 	mark := make([]markKind, f.NumBlocks())
 	stack := []backedgesState{}

 	mark[f.Entry.ID] = notExplored
 	stack = append(stack, backedgesState{f.Entry, 0})

 	for len(stack) > 0 {
 		l := len(stack)
 		x := stack[l-1]
 		if x.i < len(x.b.Succs) {
 			e := x.b.Succs[x.i]
 			stack[l-1].i++
 			s := e.b
 			if mark[s.ID] == notFound {
 				mark[s.ID] = notExplored
 				stack = append(stack, backedgesState{s, 0})
 			} else if mark[s.ID] == notExplored {
 				edges = append(edges, e)
 			}
 		} else {
 			mark[x.b.ID] = done
 			stack = stack[0 : l-1]
 		}
 	}
 	return edges
 }
	// Copyright 2016 The Go Authors. All rights reserved.
	// Use of this source code is governed by a BSD-style
	// license that can be found in the LICENSE file.

	package ssa

	import (
	"cmd/compile/internal/types"
	"fmt"
	)

	// an edgeMem records a backedge, together with the memory
	// phi functions at the target of the backedge that must
	// be updated when a rescheduling check replaces the backedge.
	type edgeMem struct {
	e Edge
	m *Value // phi for memory at dest of e
	}

	// a rewriteTarget is a value-argindex pair indicating
	// where a rewrite is applied. Note that this is for values,
	// not for block controls, because block controls are not targets
	// for the rewrites performed in inserting rescheduling checks.
	type rewriteTarget struct {
	v *Value
	i int
	}

	type rewrite struct {
	before, after *Value // before is the expected value before rewrite, after is the new value installed.
	rewrites []rewriteTarget // all the targets for this rewrite.
	}

	func (r *rewrite) String() string {
	s := "\n\tbefore=" + r.before.String() + ", after=" + r.after.String()
	for _, rw := range r.rewrites {
	s += ", (i=" + fmt.Sprint(rw.i) + ", v=" + rw.v.LongString() + ")"
	}
	s += "\n"
	return s
	}

	// insertLoopReschedChecks inserts rescheduling checks on loop backedges.
	func insertLoopReschedChecks(f *Func) {
	// TODO: when split information is recorded in export data, insert checks only on backedges that can be reached on a split-call-free path.

	// Loop reschedule checks compare the stack pointer with
	// the per-g stack bound. If the pointer appears invalid,
	// that means a reschedule check is needed.
	//
	// Steps:
	// 1. locate backedges.
	// 2. Record memory definitions at block end so that
	// the SSA graph for mem can be properly modified.
	// 3. Ensure that phi functions that will-be-needed for mem
	// are present in the graph, initially with trivial inputs.
	// 4. Record all to-be-modified uses of mem;
	// apply modifications (split into two steps to simplify and
	// avoided nagging order-dependencies).
	// 5. Rewrite backedges to include reschedule check,
	// and modify destination phi function appropriately with new
	// definitions for mem.

	if f.NoSplit { // nosplit functions don't reschedule.
	return
	}

	backedges := backedges(f)
	if len(backedges) == 0 { // no backedges means no rescheduling checks.
	return
	}

	lastMems := findLastMems(f)

	idom := f.Idom()
	po := f.postorder()
	// The ordering in the dominator tree matters; it's important that
	// the walk of the dominator tree also be a preorder (i.e., a node is
	// visited only after all its non-backedge predecessors have been visited).
	sdom := newSparseOrderedTree(f, idom, po)

	if f.pass.debug > 1 {
	fmt.Printf("before %s = %s\n", f.Name, sdom.treestructure(f.Entry))
	}

	tofixBackedges := []edgeMem{}

	for _, e := range backedges { // TODO: could filter here by calls in loops, if declared and inferred nosplit are recorded in export data.
	tofixBackedges = append(tofixBackedges, edgeMem{e, nil})
	}

	// It's possible that there is no memory state (no global/pointer loads/stores or calls)
	if lastMems[f.Entry.ID] == nil {
	lastMems[f.Entry.ID] = f.Entry.NewValue0(f.Entry.Pos, OpInitMem, types.TypeMem)
	}

	memDefsAtBlockEnds := make([]*Value, f.NumBlocks()) // For each block, the mem def seen at its bottom. Could be from earlier block.

	// Propagate last mem definitions forward through successor blocks.
	for i := len(po) - 1; i >= 0; i-- {
	b := po[i]
	mem := lastMems[b.ID]
	for j := 0; mem == nil; j++ { // if there's no def, then there's no phi, so the visible mem is identical in all predecessors.
	// loop because there might be backedges that haven't been visited yet.
	mem = memDefsAtBlockEnds[b.Preds[j].b.ID]
	}
	memDefsAtBlockEnds[b.ID] = mem
	if f.pass.debug > 2 {
	fmt.Printf("memDefsAtBlockEnds[%s] = %s\n", b, mem)
	}
	}

	// Maps from block to newly-inserted phi function in block.
	newmemphis := make(map[*Block]rewrite)

	// Insert phi functions as necessary for future changes to flow graph.
	for i, emc := range tofixBackedges {
	e := emc.e
	h := e.b

	// find the phi function for the memory input at "h", if there is one.
	var headerMemPhi *Value // look for header mem phi

	for _, v := range h.Values {
	if v.Op == OpPhi && v.Type.IsMemory() {
	headerMemPhi = v
	}
	}

	if headerMemPhi == nil {
	// if the header is nil, make a trivial phi from the dominator
	mem0 := memDefsAtBlockEnds[idom[h.ID].ID]
	headerMemPhi = newPhiFor(h, mem0)
	newmemphis[h] = rewrite{before: mem0, after: headerMemPhi}
	addDFphis(mem0, h, h, f, memDefsAtBlockEnds, newmemphis, sdom)

	}
	tofixBackedges[i].m = headerMemPhi

	}
	if f.pass.debug > 0 {
	for b, r := range newmemphis {
	fmt.Printf("before b=%s, rewrite=%s\n", b, r.String())
	}
	}

	// dfPhiTargets notes inputs to phis in dominance frontiers that should not
	// be rewritten as part of the dominated children of some outer rewrite.
	dfPhiTargets := make(map[rewriteTarget]bool)

	rewriteNewPhis(f.Entry, f.Entry, f, memDefsAtBlockEnds, newmemphis, dfPhiTargets, sdom)

	if f.pass.debug > 0 {
	for b, r := range newmemphis {
	fmt.Printf("after b=%s, rewrite=%s\n", b, r.String())
	}
	}

	// Apply collected rewrites.
	for _, r := range newmemphis {
	for _, rw := range r.rewrites {
	rw.v.SetArg(rw.i, r.after)
	}
	}

	// Rewrite backedges to include reschedule checks.
	for _, emc := range tofixBackedges {
	e := emc.e
	headerMemPhi := emc.m
	h := e.b
	i := e.i
	p := h.Preds[i]
	bb := p.b
	mem0 := headerMemPhi.Args[i]
	// bb e->p h,
	// Because we're going to insert a rare-call, make sure the
	// looping edge still looks likely.
	likely := BranchLikely
	if p.i != 0 {
	likely = BranchUnlikely
	}
	if bb.Kind != BlockPlain { // backedges can be unconditional. e.g., if x { something; continue }
	bb.Likely = likely
	}

	// rewrite edge to include reschedule check
	// existing edges:
	//
	// bb.Succs[p.i] == Edge{h, i}
	// h.Preds[i] == p == Edge{bb,p.i}
	//
	// new block(s):
	// test:
	// if sp < g.limit { goto sched }
	// goto join
	// sched:
	// mem1 := call resched (mem0)
	// goto join
	// join:
	// mem2 := phi(mem0, mem1)
	// goto h
	//
	// and correct arg i of headerMemPhi and headerCtrPhi
	//
	// EXCEPT: join block containing only phi functions is bad
	// for the register allocator. Therefore, there is no
	// join, and branches targeting join must instead target
	// the header, and the other phi functions within header are
	// adjusted for the additional input.

	test := f.NewBlock(BlockIf)
	sched := f.NewBlock(BlockPlain)

	test.Pos = bb.Pos
	sched.Pos = bb.Pos

	// if sp < g.limit { goto sched }
	// goto header

	cfgtypes := &f.Config.Types
	pt := cfgtypes.Uintptr
	g := test.NewValue1(bb.Pos, OpGetG, pt, mem0)
	sp := test.NewValue0(bb.Pos, OpSP, pt)
	cmpOp := OpLess64U
	if pt.Size() == 4 {
	cmpOp = OpLess32U
	}
	limaddr := test.NewValue1I(bb.Pos, OpOffPtr, pt, 2*pt.Size(), g)
	lim := test.NewValue2(bb.Pos, OpLoad, pt, limaddr, mem0)
	cmp := test.NewValue2(bb.Pos, cmpOp, cfgtypes.Bool, sp, lim)
	test.SetControl(cmp)

	// if true, goto sched
	test.AddEdgeTo(sched)

	// if false, rewrite edge to header.
	// do NOT remove+add, because that will perturb all the other phi functions
	// as well as messing up other edges to the header.
	test.Succs = append(test.Succs, Edge{h, i})
	h.Preds[i] = Edge{test, 1}
	headerMemPhi.SetArg(i, mem0)

	test.Likely = BranchUnlikely

	// sched:
	// mem1 := call resched (mem0)
	// goto header
	resched := f.fe.Syslook("goschedguarded")
	mem1 := sched.NewValue1A(bb.Pos, OpStaticCall, types.TypeMem, StaticAuxCall(resched, nil, nil), mem0)
	sched.AddEdgeTo(h)
	headerMemPhi.AddArg(mem1)

	bb.Succs[p.i] = Edge{test, 0}
	test.Preds = append(test.Preds, Edge{bb, p.i})

	// Must correct all the other phi functions in the header for new incoming edge.
	// Except for mem phis, it will be the same value seen on the original
	// backedge at index i.
	for _, v := range h.Values {
	if v.Op == OpPhi && v != headerMemPhi {
	v.AddArg(v.Args[i])
	}
	}
	}

	f.invalidateCFG()

	if f.pass.debug > 1 {
	sdom = newSparseTree(f, f.Idom())
	fmt.Printf("after %s = %s\n", f.Name, sdom.treestructure(f.Entry))
	}
	}

	// newPhiFor inserts a new Phi function into b,
	// with all inputs set to v.
	func newPhiFor(b Block, v Value) *Value {
	phiV := b.NewValue0(b.Pos, OpPhi, v.Type)

	for range b.Preds {
	phiV.AddArg(v)
	}
	return phiV
	}

	// rewriteNewPhis updates newphis[h] to record all places where the new phi function inserted
	// in block h will replace a previous definition. Block b is the block currently being processed;
	// if b has its own phi definition then it takes the place of h.
	// defsForUses provides information about other definitions of the variable that are present
	// (and if nil, indicates that the variable is no longer live)
	// sdom must yield a preorder of the flow graph if recursively walked, root-to-children.
	// The result of newSparseOrderedTree with order supplied by a dfs-postorder satisfies this
	// requirement.
	func rewriteNewPhis(h, b Block, f Func, defsForUses []Value, newphis map[Block]rewrite, dfPhiTargets map[rewriteTarget]bool, sdom SparseTree) {
	// If b is a block with a new phi, then a new rewrite applies below it in the dominator tree.
	if _, ok := newphis[b]; ok {
	h = b
	}
	change := newphis[h]
	x := change.before
	y := change.after

	// Apply rewrites to this block
	if x != nil { // don't waste time on the common case of no definition.
	p := &change.rewrites
	for _, v := range b.Values {
	if v == y { // don't rewrite self -- phi inputs are handled below.
	continue
	}
	for i, w := range v.Args {
	if w != x {
	continue
	}
	tgt := rewriteTarget{v, i}

	// It's possible dominated control flow will rewrite this instead.
	// Visiting in preorder (a property of how sdom was constructed)
	// ensures that these are seen in the proper order.
	if dfPhiTargets[tgt] {
	continue
	}
	p = append(p, tgt)
	if f.pass.debug > 1 {
	fmt.Printf("added block target for h=%v, b=%v, x=%v, y=%v, tgt.v=%s, tgt.i=%d\n",
	h, b, x, y, v, i)
	}
	}
	}

	// Rewrite appropriate inputs of phis reached in successors
	// in dominance frontier, self, and dominated.
	// If the variable def reaching uses in b is itself defined in b, then the new phi function
	// does not reach the successors of b. (This assumes a bit about the structure of the
	// phi use-def graph, but it's true for memory.)
	if dfu := defsForUses[b.ID]; dfu != nil && dfu.Block != b {
	for _, e := range b.Succs {
	s := e.b

	for _, v := range s.Values {
	if v.Op == OpPhi && v.Args[e.i] == x {
	tgt := rewriteTarget{v, e.i}
	p = append(p, tgt)
	dfPhiTargets[tgt] = true
	if f.pass.debug > 1 {
	fmt.Printf("added phi target for h=%v, b=%v, s=%v, x=%v, y=%v, tgt.v=%s, tgt.i=%d\n",
	h, b, s, x, y, v.LongString(), e.i)
	}
	break
	}
	}
	}
	}
	newphis[h] = change
	}

	for c := sdom[b.ID].child; c != nil; c = sdom[c.ID].sibling {
	rewriteNewPhis(h, c, f, defsForUses, newphis, dfPhiTargets, sdom) // TODO: convert to explicit stack from recursion.
	}
	}

	// addDFphis creates new trivial phis that are necessary to correctly reflect (within SSA)
	// a new definition for variable "x" inserted at h (usually but not necessarily a phi).
	// These new phis can only occur at the dominance frontier of h; block s is in the dominance
	// frontier of h if h does not strictly dominate s and if s is a successor of a block b where
	// either b = h or h strictly dominates b.
	// These newly created phis are themselves new definitions that may require addition of their
	// own trivial phi functions in their own dominance frontier, and this is handled recursively.
	func addDFphis(x Value, h, b Block, f Func, defForUses []Value, newphis map[*Block]rewrite, sdom SparseTree) {
	oldv := defForUses[b.ID]
	if oldv != x { // either a new definition replacing x, or nil if it is proven that there are no uses reachable from b
	return
	}
	idom := f.Idom()
	outer:
	for _, e := range b.Succs {
	s := e.b
	// check phi functions in the dominance frontier
	if sdom.isAncestor(h, s) {
	continue // h dominates s, successor of b, therefore s is not in the frontier.
	}
	if _, ok := newphis[s]; ok {
	continue // successor s of b already has a new phi function, so there is no need to add another.
	}
	if x != nil {
	for _, v := range s.Values {
	if v.Op == OpPhi && v.Args[e.i] == x {
	continue outer // successor s of b has an old phi function, so there is no need to add another.
	}
	}
	}

	old := defForUses[idom[s.ID].ID] // new phi function is correct-but-redundant, combining value "old" on all inputs.
	headerPhi := newPhiFor(s, old)
	// the new phi will replace "old" in block s and all blocks dominated by s.
	newphis[s] = rewrite{before: old, after: headerPhi} // record new phi, to have inputs labeled "old" rewritten to "headerPhi"
	addDFphis(old, s, s, f, defForUses, newphis, sdom) // the new definition may also create new phi functions.
	}
	for c := sdom[b.ID].child; c != nil; c = sdom[c.ID].sibling {
	addDFphis(x, h, c, f, defForUses, newphis, sdom) // TODO: convert to explicit stack from recursion.
	}
	}

	// findLastMems maps block ids to last memory-output op in a block, if any
	func findLastMems(f Func) []Value {

	var stores []*Value
	lastMems := make([]*Value, f.NumBlocks())
	storeUse := f.newSparseSet(f.NumValues())
	defer f.retSparseSet(storeUse)
	for _, b := range f.Blocks {
	// Find all the stores in this block. Categorize their uses:
	// storeUse contains stores which are used by a subsequent store.
	storeUse.clear()
	stores = stores[:0]
	var memPhi *Value
	for _, v := range b.Values {
	if v.Op == OpPhi {
	if v.Type.IsMemory() {
	memPhi = v
	}
	continue
	}
	if v.Type.IsMemory() {
	stores = append(stores, v)
	for _, a := range v.Args {
	if a.Block == b && a.Type.IsMemory() {
	storeUse.add(a.ID)
	}
	}
	}
	}
	if len(stores) == 0 {
	lastMems[b.ID] = memPhi
	continue
	}

	// find last store in the block
	var last *Value
	for _, v := range stores {
	if storeUse.contains(v.ID) {
	continue
	}
	if last != nil {
	b.Fatalf("two final stores - simultaneous live stores %s %s", last, v)
	}
	last = v
	}
	if last == nil {
	b.Fatalf("no last store found - cycle?")
	}
	lastMems[b.ID] = last
	}
	return lastMems
	}

	// mark values
	type markKind uint8

	const (
	notFound markKind = iota // block has not been discovered yet
	notExplored // discovered and in queue, outedges not processed yet
	explored // discovered and in queue, outedges processed
	done // all done, in output ordering
	)

	type backedgesState struct {
	b *Block
	i int
	}

	// backedges returns a slice of successor edges that are back
	// edges. For reducible loops, edge.b is the header.
	func backedges(f *Func) []Edge {
	edges := []Edge{}
	mark := make([]markKind, f.NumBlocks())
	stack := []backedgesState{}

	mark[f.Entry.ID] = notExplored
	stack = append(stack, backedgesState{f.Entry, 0})

	for len(stack) > 0 {
	l := len(stack)
	x := stack[l-1]
	if x.i < len(x.b.Succs) {
	e := x.b.Succs[x.i]
	stack[l-1].i++
	s := e.b
	if mark[s.ID] == notFound {
	mark[s.ID] = notExplored
	stack = append(stack, backedgesState{s, 0})
	} else if mark[s.ID] == notExplored {
	edges = append(edges, e)
	}
	} else {
	mark[x.b.ID] = done
	stack = stack[0 : l-1]
	}
	}
	return edges
	}