go/ssa/lift.go - tools - Git at Google

 // Copyright 2013 The Go Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.

 package ssa

 // This file defines the lifting pass which tries to "lift" Alloc
 // cells (new/local variables) into SSA registers, replacing loads
 // with the dominating stored value, eliminating loads and stores, and
 // inserting φ-nodes as needed.

 // Cited papers and resources:
 //
 // Ron Cytron et al. 1991. Efficiently computing SSA form...
 // http://doi.acm.org/10.1145/115372.115320
 //
 // Cooper, Harvey, Kennedy.  2001.  A Simple, Fast Dominance Algorithm.
 // Software Practice and Experience 2001, 4:1-10.
 // http://www.hipersoft.rice.edu/grads/publications/dom14.pdf
 //
 // Daniel Berlin, llvmdev mailing list, 2012.
 // http://lists.cs.uiuc.edu/pipermail/llvmdev/2012-January/046638.html
 // (Be sure to expand the whole thread.)

 // TODO(adonovan): opt: there are many optimizations worth evaluating, and
 // the conventional wisdom for SSA construction is that a simple
 // algorithm well engineered often beats those of better asymptotic
 // complexity on all but the most egregious inputs.
 //
 // Danny Berlin suggests that the Cooper et al. algorithm for
 // computing the dominance frontier is superior to Cytron et al.
 // Furthermore he recommends that rather than computing the DF for the
 // whole function then renaming all alloc cells, it may be cheaper to
 // compute the DF for each alloc cell separately and throw it away.
 //
 // Consider exploiting liveness information to avoid creating dead
 // φ-nodes which we then immediately remove.
 //
 // Also see many other "TODO: opt" suggestions in the code.

 import (
 	"fmt"
 	"go/token"
 	"go/types"
 	"math/big"
 	"os"
 )

 // If true, show diagnostic information at each step of lifting.
 // Very verbose.
 const debugLifting = false

 // domFrontier maps each block to the set of blocks in its dominance
 // frontier.  The outer slice is conceptually a map keyed by
 // Block.Index.  The inner slice is conceptually a set, possibly
 // containing duplicates.
 //
 // TODO(adonovan): opt: measure impact of dups; consider a packed bit
 // representation, e.g. big.Int, and bitwise parallel operations for
 // the union step in the Children loop.
 //
 // domFrontier's methods mutate the slice's elements but not its
 // length, so their receivers needn't be pointers.
 //
 type domFrontier [][]*BasicBlock

 func (df domFrontier) add(u, v *BasicBlock) {
 	p := &df[u.Index]
 	*p = append(*p, v)
 }

 // build builds the dominance frontier df for the dominator (sub)tree
 // rooted at u, using the Cytron et al. algorithm.
 //
 // TODO(adonovan): opt: consider Berlin approach, computing pruned SSA
 // by pruning the entire IDF computation, rather than merely pruning
 // the DF -> IDF step.
 func (df domFrontier) build(u *BasicBlock) {
 	// Encounter each node u in postorder of dom tree.
 	for _, child := range u.dom.children {
 		df.build(child)
 	}
 	for _, vb := range u.Succs {
 		if v := vb.dom; v.idom != u {
 			df.add(u, vb)
 		}
 	}
 	for _, w := range u.dom.children {
 		for _, vb := range df[w.Index] {
 			// TODO(adonovan): opt: use word-parallel bitwise union.
 			if v := vb.dom; v.idom != u {
 				df.add(u, vb)
 			}
 		}
 	}
 }

 func buildDomFrontier(fn *Function) domFrontier {
 	df := make(domFrontier, len(fn.Blocks))
 	df.build(fn.Blocks[0])
 	if fn.Recover != nil {
 		df.build(fn.Recover)
 	}
 	return df
 }

 func removeInstr(refs []Instruction, instr Instruction) []Instruction {
 	i := 0
 	for _, ref := range refs {
 		if ref == instr {
 			continue
 		}
 		refs[i] = ref
 		i++
 	}
 	for j := i; j != len(refs); j++ {
 		refs[j] = nil // aid GC
 	}
 	return refs[:i]
 }

 // lift replaces local and new Allocs accessed only with
 // load/store by SSA registers, inserting φ-nodes where necessary.
 // The result is a program in classical pruned SSA form.
 //
 // Preconditions:
 // - fn has no dead blocks (blockopt has run).
 // - Def/use info (Operands and Referrers) is up-to-date.
 // - The dominator tree is up-to-date.
 //
 func lift(fn *Function) {
 	// TODO(adonovan): opt: lots of little optimizations may be
 	// worthwhile here, especially if they cause us to avoid
 	// buildDomFrontier.  For example:
 	//
 	// - Alloc never loaded?  Eliminate.
 	// - Alloc never stored?  Replace all loads with a zero constant.
 	// - Alloc stored once?  Replace loads with dominating store;
 	//   don't forget that an Alloc is itself an effective store
 	//   of zero.
 	// - Alloc used only within a single block?
 	//   Use degenerate algorithm avoiding φ-nodes.
 	// - Consider synergy with scalar replacement of aggregates (SRA).
 	//   e.g. *(&x.f) where x is an Alloc.
 	//   Perhaps we'd get better results if we generated this as x.f
 	//   i.e. Field(x, .f) instead of Load(FieldIndex(x, .f)).
 	//   Unclear.
 	//
 	// But we will start with the simplest correct code.
 	df := buildDomFrontier(fn)

 	if debugLifting {
 		title := false
 		for i, blocks := range df {
 			if blocks != nil {
 				if !title {
 					fmt.Fprintf(os.Stderr, "Dominance frontier of %s:\n", fn)
 					title = true
 				}
 				fmt.Fprintf(os.Stderr, "\t%s: %s\n", fn.Blocks[i], blocks)
 			}
 		}
 	}

 	newPhis := make(newPhiMap)

 	// During this pass we will replace some BasicBlock.Instrs
 	// (allocs, loads and stores) with nil, keeping a count in
 	// BasicBlock.gaps.  At the end we will reset Instrs to the
 	// concatenation of all non-dead newPhis and non-nil Instrs
 	// for the block, reusing the original array if space permits.

 	// While we're here, we also eliminate 'rundefers'
 	// instructions in functions that contain no 'defer'
 	// instructions.
 	usesDefer := false

 	// A counter used to generate ~unique ids for Phi nodes, as an
 	// aid to debugging.  We use large numbers to make them highly
 	// visible.  All nodes are renumbered later.
 	fresh := 1000

 	// Determine which allocs we can lift and number them densely.
 	// The renaming phase uses this numbering for compact maps.
 	numAllocs := 0
 	for _, b := range fn.Blocks {
 		b.gaps = 0
 		b.rundefers = 0
 		for _, instr := range b.Instrs {
 			switch instr := instr.(type) {
 			case *Alloc:
 				index := -1
 				if liftAlloc(df, instr, newPhis, &fresh) {
 					index = numAllocs
 					numAllocs++
 				}
 				instr.index = index
 			case *Defer:
 				usesDefer = true
 			case *RunDefers:
 				b.rundefers++
 			}
 		}
 	}

 	// renaming maps an alloc (keyed by index) to its replacement
 	// value.  Initially the renaming contains nil, signifying the
 	// zero constant of the appropriate type; we construct the
 	// Const lazily at most once on each path through the domtree.
 	// TODO(adonovan): opt: cache per-function not per subtree.
 	renaming := make([]Value, numAllocs)

 	// Renaming.
 	rename(fn.Blocks[0], renaming, newPhis)

 	// Eliminate dead φ-nodes.
 	removeDeadPhis(fn.Blocks, newPhis)

 	// Prepend remaining live φ-nodes to each block.
 	for _, b := range fn.Blocks {
 		nps := newPhis[b]
 		j := len(nps)

 		rundefersToKill := b.rundefers
 		if usesDefer {
 			rundefersToKill = 0
 		}

 		if j+b.gaps+rundefersToKill == 0 {
 			continue // fast path: no new phis or gaps
 		}

 		// Compact nps + non-nil Instrs into a new slice.
 		// TODO(adonovan): opt: compact in situ (rightwards)
 		// if Instrs has sufficient space or slack.
 		dst := make([]Instruction, len(b.Instrs)+j-b.gaps-rundefersToKill)
 		for i, np := range nps {
 			dst[i] = np.phi
 		}
 		for _, instr := range b.Instrs {
 			if instr == nil {
 				continue
 			}
 			if !usesDefer {
 				if _, ok := instr.(*RunDefers); ok {
 					continue
 				}
 			}
 			dst[j] = instr
 			j++
 		}
 		b.Instrs = dst
 	}

 	// Remove any fn.Locals that were lifted.
 	j := 0
 	for _, l := range fn.Locals {
 		if l.index < 0 {
 			fn.Locals[j] = l
 			j++
 		}
 	}
 	// Nil out fn.Locals[j:] to aid GC.
 	for i := j; i < len(fn.Locals); i++ {
 		fn.Locals[i] = nil
 	}
 	fn.Locals = fn.Locals[:j]
 }

 // removeDeadPhis removes φ-nodes not transitively needed by a
 // non-Phi, non-DebugRef instruction.
 func removeDeadPhis(blocks []*BasicBlock, newPhis newPhiMap) {
 	// First pass: find the set of "live" φ-nodes: those reachable
 	// from some non-Phi instruction.
 	//
 	// We compute reachability in reverse, starting from each φ,
 	// rather than forwards, starting from each live non-Phi
 	// instruction, because this way visits much less of the
 	// Value graph.
 	livePhis := make(map[*Phi]bool)
 	for _, npList := range newPhis {
 		for _, np := range npList {
 			phi := np.phi
 			if !livePhis[phi] && phiHasDirectReferrer(phi) {
 				markLivePhi(livePhis, phi)
 			}
 		}
 	}

 	// Existing φ-nodes due to && and || operators
 	// are all considered live (see Go issue 19622).
 	for _, b := range blocks {
 		for _, phi := range b.phis() {
 			markLivePhi(livePhis, phi.(*Phi))
 		}
 	}

 	// Second pass: eliminate unused phis from newPhis.
 	for block, npList := range newPhis {
 		j := 0
 		for _, np := range npList {
 			if livePhis[np.phi] {
 				npList[j] = np
 				j++
 			} else {
 				// discard it, first removing it from referrers
 				for _, val := range np.phi.Edges {
 					if refs := val.Referrers(); refs != nil {
 						*refs = removeInstr(*refs, np.phi)
 					}
 				}
 				np.phi.block = nil
 			}
 		}
 		newPhis[block] = npList[:j]
 	}
 }

 // markLivePhi marks phi, and all φ-nodes transitively reachable via
 // its Operands, live.
 func markLivePhi(livePhis map[*Phi]bool, phi *Phi) {
 	livePhis[phi] = true
 	for _, rand := range phi.Operands(nil) {
 		if q, ok := (*rand).(*Phi); ok {
 			if !livePhis[q] {
 				markLivePhi(livePhis, q)
 			}
 		}
 	}
 }

 // phiHasDirectReferrer reports whether phi is directly referred to by
 // a non-Phi instruction.  Such instructions are the
 // roots of the liveness traversal.
 func phiHasDirectReferrer(phi *Phi) bool {
 	for _, instr := range *phi.Referrers() {
 		if _, ok := instr.(*Phi); !ok {
 			return true
 		}
 	}
 	return false
 }

 type blockSet struct{ big.Int } // (inherit methods from Int)

 // add adds b to the set and returns true if the set changed.
 func (s *blockSet) add(b *BasicBlock) bool {
 	i := b.Index
 	if s.Bit(i) != 0 {
 		return false
 	}
 	s.SetBit(&s.Int, i, 1)
 	return true
 }

 // take removes an arbitrary element from a set s and
 // returns its index, or returns -1 if empty.
 func (s *blockSet) take() int {
 	l := s.BitLen()
 	for i := 0; i < l; i++ {
 		if s.Bit(i) == 1 {
 			s.SetBit(&s.Int, i, 0)
 			return i
 		}
 	}
 	return -1
 }

 // newPhi is a pair of a newly introduced φ-node and the lifted Alloc
 // it replaces.
 type newPhi struct {
 	phi   *Phi
 	alloc *Alloc
 }

 // newPhiMap records for each basic block, the set of newPhis that
 // must be prepended to the block.
 type newPhiMap map[*BasicBlock][]newPhi

 // liftAlloc determines whether alloc can be lifted into registers,
 // and if so, it populates newPhis with all the φ-nodes it may require
 // and returns true.
 //
 // fresh is a source of fresh ids for phi nodes.
 //
 func liftAlloc(df domFrontier, alloc *Alloc, newPhis newPhiMap, fresh *int) bool {
 	// Don't lift aggregates into registers, because we don't have
 	// a way to express their zero-constants.
 	switch deref(alloc.Type()).Underlying().(type) {
 	case *types.Array, *types.Struct:
 		return false
 	}

 	// Don't lift named return values in functions that defer
 	// calls that may recover from panic.
 	if fn := alloc.Parent(); fn.Recover != nil {
 		for _, nr := range fn.namedResults {
 			if nr == alloc {
 				return false
 			}
 		}
 	}

 	// Compute defblocks, the set of blocks containing a
 	// definition of the alloc cell.
 	var defblocks blockSet
 	for _, instr := range *alloc.Referrers() {
 		// Bail out if we discover the alloc is not liftable;
 		// the only operations permitted to use the alloc are
 		// loads/stores into the cell, and DebugRef.
 		switch instr := instr.(type) {
 		case *Store:
 			if instr.Val == alloc {
 				return false // address used as value
 			}
 			if instr.Addr != alloc {
 				panic("Alloc.Referrers is inconsistent")
 			}
 			defblocks.add(instr.Block())
 		case *UnOp:
 			if instr.Op != token.MUL {
 				return false // not a load
 			}
 			if instr.X != alloc {
 				panic("Alloc.Referrers is inconsistent")
 			}
 		case *DebugRef:
 			// ok
 		default:
 			return false // some other instruction
 		}
 	}
 	// The Alloc itself counts as a (zero) definition of the cell.
 	defblocks.add(alloc.Block())

 	if debugLifting {
 		fmt.Fprintln(os.Stderr, "\tlifting ", alloc, alloc.Name())
 	}

 	fn := alloc.Parent()

 	// Φ-insertion.
 	//
 	// What follows is the body of the main loop of the insert-φ
 	// function described by Cytron et al, but instead of using
 	// counter tricks, we just reset the 'hasAlready' and 'work'
 	// sets each iteration.  These are bitmaps so it's pretty cheap.
 	//
 	// TODO(adonovan): opt: recycle slice storage for W,
 	// hasAlready, defBlocks across liftAlloc calls.
 	var hasAlready blockSet

 	// Initialize W and work to defblocks.
 	var work blockSet = defblocks // blocks seen
 	var W blockSet                // blocks to do
 	W.Set(&defblocks.Int)

 	// Traverse iterated dominance frontier, inserting φ-nodes.
 	for i := W.take(); i != -1; i = W.take() {
 		u := fn.Blocks[i]
 		for _, v := range df[u.Index] {
 			if hasAlready.add(v) {
 				// Create φ-node.
 				// It will be prepended to v.Instrs later, if needed.
 				phi := &Phi{
 					Edges:   make([]Value, len(v.Preds)),
 					Comment: alloc.Comment,
 				}
 				// This is merely a debugging aid:
 				phi.setNum(*fresh)
 				*fresh++

 				phi.pos = alloc.Pos()
 				phi.setType(deref(alloc.Type()))
 				phi.block = v
 				if debugLifting {
 					fmt.Fprintf(os.Stderr, "\tplace %s = %s at block %s\n", phi.Name(), phi, v)
 				}
 				newPhis[v] = append(newPhis[v], newPhi{phi, alloc})

 				if work.add(v) {
 					W.add(v)
 				}
 			}
 		}
 	}

 	return true
 }

 // replaceAll replaces all intraprocedural uses of x with y,
 // updating x.Referrers and y.Referrers.
 // Precondition: x.Referrers() != nil, i.e. x must be local to some function.
 //
 func replaceAll(x, y Value) {
 	var rands []*Value
 	pxrefs := x.Referrers()
 	pyrefs := y.Referrers()
 	for _, instr := range *pxrefs {
 		rands = instr.Operands(rands[:0]) // recycle storage
 		for _, rand := range rands {
 			if *rand != nil {
 				if *rand == x {
 					*rand = y
 				}
 			}
 		}
 		if pyrefs != nil {
 			*pyrefs = append(*pyrefs, instr) // dups ok
 		}
 	}
 	*pxrefs = nil // x is now unreferenced
 }

 // renamed returns the value to which alloc is being renamed,
 // constructing it lazily if it's the implicit zero initialization.
 //
 func renamed(renaming []Value, alloc *Alloc) Value {
 	v := renaming[alloc.index]
 	if v == nil {
 		v = zeroConst(deref(alloc.Type()))
 		renaming[alloc.index] = v
 	}
 	return v
 }

 // rename implements the (Cytron et al) SSA renaming algorithm, a
 // preorder traversal of the dominator tree replacing all loads of
 // Alloc cells with the value stored to that cell by the dominating
 // store instruction.  For lifting, we need only consider loads,
 // stores and φ-nodes.
 //
 // renaming is a map from *Alloc (keyed by index number) to its
 // dominating stored value; newPhis[x] is the set of new φ-nodes to be
 // prepended to block x.
 //
 func rename(u *BasicBlock, renaming []Value, newPhis newPhiMap) {
 	// Each φ-node becomes the new name for its associated Alloc.
 	for _, np := range newPhis[u] {
 		phi := np.phi
 		alloc := np.alloc
 		renaming[alloc.index] = phi
 	}

 	// Rename loads and stores of allocs.
 	for i, instr := range u.Instrs {
 		switch instr := instr.(type) {
 		case *Alloc:
 			if instr.index >= 0 { // store of zero to Alloc cell
 				// Replace dominated loads by the zero value.
 				renaming[instr.index] = nil
 				if debugLifting {
 					fmt.Fprintf(os.Stderr, "\tkill alloc %s\n", instr)
 				}
 				// Delete the Alloc.
 				u.Instrs[i] = nil
 				u.gaps++
 			}

 		case *Store:
 			if alloc, ok := instr.Addr.(*Alloc); ok && alloc.index >= 0 { // store to Alloc cell
 				// Replace dominated loads by the stored value.
 				renaming[alloc.index] = instr.Val
 				if debugLifting {
 					fmt.Fprintf(os.Stderr, "\tkill store %s; new value: %s\n",
 						instr, instr.Val.Name())
 				}
 				// Remove the store from the referrer list of the stored value.
 				if refs := instr.Val.Referrers(); refs != nil {
 					*refs = removeInstr(*refs, instr)
 				}
 				// Delete the Store.
 				u.Instrs[i] = nil
 				u.gaps++
 			}

 		case *UnOp:
 			if instr.Op == token.MUL {
 				if alloc, ok := instr.X.(*Alloc); ok && alloc.index >= 0 { // load of Alloc cell
 					newval := renamed(renaming, alloc)
 					if debugLifting {
 						fmt.Fprintf(os.Stderr, "\tupdate load %s = %s with %s\n",
 							instr.Name(), instr, newval.Name())
 					}
 					// Replace all references to
 					// the loaded value by the
 					// dominating stored value.
 					replaceAll(instr, newval)
 					// Delete the Load.
 					u.Instrs[i] = nil
 					u.gaps++
 				}
 			}

 		case *DebugRef:
 			if alloc, ok := instr.X.(*Alloc); ok && alloc.index >= 0 { // ref of Alloc cell
 				if instr.IsAddr {
 					instr.X = renamed(renaming, alloc)
 					instr.IsAddr = false

 					// Add DebugRef to instr.X's referrers.
 					if refs := instr.X.Referrers(); refs != nil {
 						*refs = append(*refs, instr)
 					}
 				} else {
 					// A source expression denotes the address
 					// of an Alloc that was optimized away.
 					instr.X = nil

 					// Delete the DebugRef.
 					u.Instrs[i] = nil
 					u.gaps++
 				}
 			}
 		}
 	}

 	// For each φ-node in a CFG successor, rename the edge.
 	for _, v := range u.Succs {
 		phis := newPhis[v]
 		if len(phis) == 0 {
 			continue
 		}
 		i := v.predIndex(u)
 		for _, np := range phis {
 			phi := np.phi
 			alloc := np.alloc
 			newval := renamed(renaming, alloc)
 			if debugLifting {
 				fmt.Fprintf(os.Stderr, "\tsetphi %s edge %s -> %s (#%d) (alloc=%s) := %s\n",
 					phi.Name(), u, v, i, alloc.Name(), newval.Name())
 			}
 			phi.Edges[i] = newval
 			if prefs := newval.Referrers(); prefs != nil {
 				*prefs = append(*prefs, phi)
 			}
 		}
 	}

 	// Continue depth-first recursion over domtree, pushing a
 	// fresh copy of the renaming map for each subtree.
 	for i, v := range u.dom.children {
 		r := renaming
 		if i < len(u.dom.children)-1 {
 			// On all but the final iteration, we must make
 			// a copy to avoid destructive update.
 			r = make([]Value, len(renaming))
 			copy(r, renaming)
 		}
 		rename(v, r, newPhis)
 	}

 }
	// Copyright 2013 The Go Authors. All rights reserved.
	// Use of this source code is governed by a BSD-style
	// license that can be found in the LICENSE file.

	package ssa

	// This file defines the lifting pass which tries to "lift" Alloc
	// cells (new/local variables) into SSA registers, replacing loads
	// with the dominating stored value, eliminating loads and stores, and
	// inserting φ-nodes as needed.

	// Cited papers and resources:
	//
	// Ron Cytron et al. 1991. Efficiently computing SSA form...
	// http://doi.acm.org/10.1145/115372.115320
	//
	// Cooper, Harvey, Kennedy. 2001. A Simple, Fast Dominance Algorithm.
	// Software Practice and Experience 2001, 4:1-10.
	// http://www.hipersoft.rice.edu/grads/publications/dom14.pdf
	//
	// Daniel Berlin, llvmdev mailing list, 2012.
	// http://lists.cs.uiuc.edu/pipermail/llvmdev/2012-January/046638.html
	// (Be sure to expand the whole thread.)

	// TODO(adonovan): opt: there are many optimizations worth evaluating, and
	// the conventional wisdom for SSA construction is that a simple
	// algorithm well engineered often beats those of better asymptotic
	// complexity on all but the most egregious inputs.
	//
	// Danny Berlin suggests that the Cooper et al. algorithm for
	// computing the dominance frontier is superior to Cytron et al.
	// Furthermore he recommends that rather than computing the DF for the
	// whole function then renaming all alloc cells, it may be cheaper to
	// compute the DF for each alloc cell separately and throw it away.
	//
	// Consider exploiting liveness information to avoid creating dead
	// φ-nodes which we then immediately remove.
	//
	// Also see many other "TODO: opt" suggestions in the code.

	import (
	"fmt"
	"go/token"
	"go/types"
	"math/big"
	"os"
	)

	// If true, show diagnostic information at each step of lifting.
	// Very verbose.
	const debugLifting = false

	// domFrontier maps each block to the set of blocks in its dominance
	// frontier. The outer slice is conceptually a map keyed by
	// Block.Index. The inner slice is conceptually a set, possibly
	// containing duplicates.
	//
	// TODO(adonovan): opt: measure impact of dups; consider a packed bit
	// representation, e.g. big.Int, and bitwise parallel operations for
	// the union step in the Children loop.
	//
	// domFrontier's methods mutate the slice's elements but not its
	// length, so their receivers needn't be pointers.
	//
	type domFrontier [][]*BasicBlock

	func (df domFrontier) add(u, v *BasicBlock) {
	p := &df[u.Index]
	p = append(p, v)
	}

	// build builds the dominance frontier df for the dominator (sub)tree
	// rooted at u, using the Cytron et al. algorithm.
	//
	// TODO(adonovan): opt: consider Berlin approach, computing pruned SSA
	// by pruning the entire IDF computation, rather than merely pruning
	// the DF -> IDF step.
	func (df domFrontier) build(u *BasicBlock) {
	// Encounter each node u in postorder of dom tree.
	for _, child := range u.dom.children {
	df.build(child)
	}
	for _, vb := range u.Succs {
	if v := vb.dom; v.idom != u {
	df.add(u, vb)
	}
	}
	for _, w := range u.dom.children {
	for _, vb := range df[w.Index] {
	// TODO(adonovan): opt: use word-parallel bitwise union.
	if v := vb.dom; v.idom != u {
	df.add(u, vb)
	}
	}
	}
	}

	func buildDomFrontier(fn *Function) domFrontier {
	df := make(domFrontier, len(fn.Blocks))
	df.build(fn.Blocks[0])
	if fn.Recover != nil {
	df.build(fn.Recover)
	}
	return df
	}

	func removeInstr(refs []Instruction, instr Instruction) []Instruction {
	i := 0
	for _, ref := range refs {
	if ref == instr {
	continue
	}
	refs[i] = ref
	i++
	}
	for j := i; j != len(refs); j++ {
	refs[j] = nil // aid GC
	}
	return refs[:i]
	}

	// lift replaces local and new Allocs accessed only with
	// load/store by SSA registers, inserting φ-nodes where necessary.
	// The result is a program in classical pruned SSA form.
	//
	// Preconditions:
	// - fn has no dead blocks (blockopt has run).
	// - Def/use info (Operands and Referrers) is up-to-date.
	// - The dominator tree is up-to-date.
	//
	func lift(fn *Function) {
	// TODO(adonovan): opt: lots of little optimizations may be
	// worthwhile here, especially if they cause us to avoid
	// buildDomFrontier. For example:
	//
	// - Alloc never loaded? Eliminate.
	// - Alloc never stored? Replace all loads with a zero constant.
	// - Alloc stored once? Replace loads with dominating store;
	// don't forget that an Alloc is itself an effective store
	// of zero.
	// - Alloc used only within a single block?
	// Use degenerate algorithm avoiding φ-nodes.
	// - Consider synergy with scalar replacement of aggregates (SRA).
	// e.g. *(&x.f) where x is an Alloc.
	// Perhaps we'd get better results if we generated this as x.f
	// i.e. Field(x, .f) instead of Load(FieldIndex(x, .f)).
	// Unclear.
	//
	// But we will start with the simplest correct code.
	df := buildDomFrontier(fn)

	if debugLifting {
	title := false
	for i, blocks := range df {
	if blocks != nil {
	if !title {
	fmt.Fprintf(os.Stderr, "Dominance frontier of %s:\n", fn)
	title = true
	}
	fmt.Fprintf(os.Stderr, "\t%s: %s\n", fn.Blocks[i], blocks)
	}
	}
	}

	newPhis := make(newPhiMap)

	// During this pass we will replace some BasicBlock.Instrs
	// (allocs, loads and stores) with nil, keeping a count in
	// BasicBlock.gaps. At the end we will reset Instrs to the
	// concatenation of all non-dead newPhis and non-nil Instrs
	// for the block, reusing the original array if space permits.

	// While we're here, we also eliminate 'rundefers'
	// instructions in functions that contain no 'defer'
	// instructions.
	usesDefer := false

	// A counter used to generate ~unique ids for Phi nodes, as an
	// aid to debugging. We use large numbers to make them highly
	// visible. All nodes are renumbered later.
	fresh := 1000

	// Determine which allocs we can lift and number them densely.
	// The renaming phase uses this numbering for compact maps.
	numAllocs := 0
	for _, b := range fn.Blocks {
	b.gaps = 0
	b.rundefers = 0
	for _, instr := range b.Instrs {
	switch instr := instr.(type) {
	case *Alloc:
	index := -1
	if liftAlloc(df, instr, newPhis, &fresh) {
	index = numAllocs
	numAllocs++
	}
	instr.index = index
	case *Defer:
	usesDefer = true
	case *RunDefers:
	b.rundefers++
	}
	}
	}

	// renaming maps an alloc (keyed by index) to its replacement
	// value. Initially the renaming contains nil, signifying the
	// zero constant of the appropriate type; we construct the
	// Const lazily at most once on each path through the domtree.
	// TODO(adonovan): opt: cache per-function not per subtree.
	renaming := make([]Value, numAllocs)

	// Renaming.
	rename(fn.Blocks[0], renaming, newPhis)

	// Eliminate dead φ-nodes.
	removeDeadPhis(fn.Blocks, newPhis)

	// Prepend remaining live φ-nodes to each block.
	for _, b := range fn.Blocks {
	nps := newPhis[b]
	j := len(nps)

	rundefersToKill := b.rundefers
	if usesDefer {
	rundefersToKill = 0
	}

	if j+b.gaps+rundefersToKill == 0 {
	continue // fast path: no new phis or gaps
	}

	// Compact nps + non-nil Instrs into a new slice.
	// TODO(adonovan): opt: compact in situ (rightwards)
	// if Instrs has sufficient space or slack.
	dst := make([]Instruction, len(b.Instrs)+j-b.gaps-rundefersToKill)
	for i, np := range nps {
	dst[i] = np.phi
	}
	for _, instr := range b.Instrs {
	if instr == nil {
	continue
	}
	if !usesDefer {
	if _, ok := instr.(*RunDefers); ok {
	continue
	}
	}
	dst[j] = instr
	j++
	}
	b.Instrs = dst
	}

	// Remove any fn.Locals that were lifted.
	j := 0
	for _, l := range fn.Locals {
	if l.index < 0 {
	fn.Locals[j] = l
	j++
	}
	}
	// Nil out fn.Locals[j:] to aid GC.
	for i := j; i < len(fn.Locals); i++ {
	fn.Locals[i] = nil
	}
	fn.Locals = fn.Locals[:j]
	}

	// removeDeadPhis removes φ-nodes not transitively needed by a
	// non-Phi, non-DebugRef instruction.
	func removeDeadPhis(blocks []*BasicBlock, newPhis newPhiMap) {
	// First pass: find the set of "live" φ-nodes: those reachable
	// from some non-Phi instruction.
	//
	// We compute reachability in reverse, starting from each φ,
	// rather than forwards, starting from each live non-Phi
	// instruction, because this way visits much less of the
	// Value graph.
	livePhis := make(map[*Phi]bool)
	for _, npList := range newPhis {
	for _, np := range npList {
	phi := np.phi
	if !livePhis[phi] && phiHasDirectReferrer(phi) {
	markLivePhi(livePhis, phi)
	}
	}
	}

	// Existing φ-nodes due to && and \|\| operators
	// are all considered live (see Go issue 19622).
	for _, b := range blocks {
	for _, phi := range b.phis() {
	markLivePhi(livePhis, phi.(*Phi))
	}
	}

	// Second pass: eliminate unused phis from newPhis.
	for block, npList := range newPhis {
	j := 0
	for _, np := range npList {
	if livePhis[np.phi] {
	npList[j] = np
	j++
	} else {
	// discard it, first removing it from referrers
	for _, val := range np.phi.Edges {
	if refs := val.Referrers(); refs != nil {
	refs = removeInstr(refs, np.phi)
	}
	}
	np.phi.block = nil
	}
	}
	newPhis[block] = npList[:j]
	}
	}

	// markLivePhi marks phi, and all φ-nodes transitively reachable via
	// its Operands, live.
	func markLivePhi(livePhis map[Phi]bool, phi Phi) {
	livePhis[phi] = true
	for _, rand := range phi.Operands(nil) {
	if q, ok := (rand).(Phi); ok {
	if !livePhis[q] {
	markLivePhi(livePhis, q)
	}
	}
	}
	}

	// phiHasDirectReferrer reports whether phi is directly referred to by
	// a non-Phi instruction. Such instructions are the
	// roots of the liveness traversal.
	func phiHasDirectReferrer(phi *Phi) bool {
	for _, instr := range *phi.Referrers() {
	if _, ok := instr.(*Phi); !ok {
	return true
	}
	}
	return false
	}

	type blockSet struct{ big.Int } // (inherit methods from Int)

	// add adds b to the set and returns true if the set changed.
	func (s blockSet) add(b BasicBlock) bool {
	i := b.Index
	if s.Bit(i) != 0 {
	return false
	}
	s.SetBit(&s.Int, i, 1)
	return true
	}

	// take removes an arbitrary element from a set s and
	// returns its index, or returns -1 if empty.
	func (s *blockSet) take() int {
	l := s.BitLen()
	for i := 0; i < l; i++ {
	if s.Bit(i) == 1 {
	s.SetBit(&s.Int, i, 0)
	return i
	}
	}
	return -1
	}

	// newPhi is a pair of a newly introduced φ-node and the lifted Alloc
	// it replaces.
	type newPhi struct {
	phi *Phi
	alloc *Alloc
	}

	// newPhiMap records for each basic block, the set of newPhis that
	// must be prepended to the block.
	type newPhiMap map[*BasicBlock][]newPhi

	// liftAlloc determines whether alloc can be lifted into registers,
	// and if so, it populates newPhis with all the φ-nodes it may require
	// and returns true.
	//
	// fresh is a source of fresh ids for phi nodes.
	//
	func liftAlloc(df domFrontier, alloc Alloc, newPhis newPhiMap, fresh int) bool {
	// Don't lift aggregates into registers, because we don't have
	// a way to express their zero-constants.
	switch deref(alloc.Type()).Underlying().(type) {
	case types.Array, types.Struct:
	return false
	}

	// Don't lift named return values in functions that defer
	// calls that may recover from panic.
	if fn := alloc.Parent(); fn.Recover != nil {
	for _, nr := range fn.namedResults {
	if nr == alloc {
	return false
	}
	}
	}

	// Compute defblocks, the set of blocks containing a
	// definition of the alloc cell.
	var defblocks blockSet
	for _, instr := range *alloc.Referrers() {
	// Bail out if we discover the alloc is not liftable;
	// the only operations permitted to use the alloc are
	// loads/stores into the cell, and DebugRef.
	switch instr := instr.(type) {
	case *Store:
	if instr.Val == alloc {
	return false // address used as value
	}
	if instr.Addr != alloc {
	panic("Alloc.Referrers is inconsistent")
	}
	defblocks.add(instr.Block())
	case *UnOp:
	if instr.Op != token.MUL {
	return false // not a load
	}
	if instr.X != alloc {
	panic("Alloc.Referrers is inconsistent")
	}
	case *DebugRef:
	// ok
	default:
	return false // some other instruction
	}
	}
	// The Alloc itself counts as a (zero) definition of the cell.
	defblocks.add(alloc.Block())

	if debugLifting {
	fmt.Fprintln(os.Stderr, "\tlifting ", alloc, alloc.Name())
	}

	fn := alloc.Parent()

	// Φ-insertion.
	//
	// What follows is the body of the main loop of the insert-φ
	// function described by Cytron et al, but instead of using
	// counter tricks, we just reset the 'hasAlready' and 'work'
	// sets each iteration. These are bitmaps so it's pretty cheap.
	//
	// TODO(adonovan): opt: recycle slice storage for W,
	// hasAlready, defBlocks across liftAlloc calls.
	var hasAlready blockSet

	// Initialize W and work to defblocks.
	var work blockSet = defblocks // blocks seen
	var W blockSet // blocks to do
	W.Set(&defblocks.Int)

	// Traverse iterated dominance frontier, inserting φ-nodes.
	for i := W.take(); i != -1; i = W.take() {
	u := fn.Blocks[i]
	for _, v := range df[u.Index] {
	if hasAlready.add(v) {
	// Create φ-node.
	// It will be prepended to v.Instrs later, if needed.
	phi := &Phi{
	Edges: make([]Value, len(v.Preds)),
	Comment: alloc.Comment,
	}
	// This is merely a debugging aid:
	phi.setNum(*fresh)
	*fresh++

	phi.pos = alloc.Pos()
	phi.setType(deref(alloc.Type()))
	phi.block = v
	if debugLifting {
	fmt.Fprintf(os.Stderr, "\tplace %s = %s at block %s\n", phi.Name(), phi, v)
	}
	newPhis[v] = append(newPhis[v], newPhi{phi, alloc})

	if work.add(v) {
	W.add(v)
	}
	}
	}
	}

	return true
	}

	// replaceAll replaces all intraprocedural uses of x with y,
	// updating x.Referrers and y.Referrers.
	// Precondition: x.Referrers() != nil, i.e. x must be local to some function.
	//
	func replaceAll(x, y Value) {
	var rands []*Value
	pxrefs := x.Referrers()
	pyrefs := y.Referrers()
	for _, instr := range *pxrefs {
	rands = instr.Operands(rands[:0]) // recycle storage
	for _, rand := range rands {
	if *rand != nil {
	if *rand == x {
	*rand = y
	}
	}
	}
	if pyrefs != nil {
	pyrefs = append(pyrefs, instr) // dups ok
	}
	}
	*pxrefs = nil // x is now unreferenced
	}

	// renamed returns the value to which alloc is being renamed,
	// constructing it lazily if it's the implicit zero initialization.
	//
	func renamed(renaming []Value, alloc *Alloc) Value {
	v := renaming[alloc.index]
	if v == nil {
	v = zeroConst(deref(alloc.Type()))
	renaming[alloc.index] = v
	}
	return v
	}

	// rename implements the (Cytron et al) SSA renaming algorithm, a
	// preorder traversal of the dominator tree replacing all loads of
	// Alloc cells with the value stored to that cell by the dominating
	// store instruction. For lifting, we need only consider loads,
	// stores and φ-nodes.
	//
	// renaming is a map from *Alloc (keyed by index number) to its
	// dominating stored value; newPhis[x] is the set of new φ-nodes to be
	// prepended to block x.
	//
	func rename(u *BasicBlock, renaming []Value, newPhis newPhiMap) {
	// Each φ-node becomes the new name for its associated Alloc.
	for _, np := range newPhis[u] {
	phi := np.phi
	alloc := np.alloc
	renaming[alloc.index] = phi
	}

	// Rename loads and stores of allocs.
	for i, instr := range u.Instrs {
	switch instr := instr.(type) {
	case *Alloc:
	if instr.index >= 0 { // store of zero to Alloc cell
	// Replace dominated loads by the zero value.
	renaming[instr.index] = nil
	if debugLifting {
	fmt.Fprintf(os.Stderr, "\tkill alloc %s\n", instr)
	}
	// Delete the Alloc.
	u.Instrs[i] = nil
	u.gaps++
	}

	case *Store:
	if alloc, ok := instr.Addr.(*Alloc); ok && alloc.index >= 0 { // store to Alloc cell
	// Replace dominated loads by the stored value.
	renaming[alloc.index] = instr.Val
	if debugLifting {
	fmt.Fprintf(os.Stderr, "\tkill store %s; new value: %s\n",
	instr, instr.Val.Name())
	}
	// Remove the store from the referrer list of the stored value.
	if refs := instr.Val.Referrers(); refs != nil {
	refs = removeInstr(refs, instr)
	}
	// Delete the Store.
	u.Instrs[i] = nil
	u.gaps++
	}

	case *UnOp:
	if instr.Op == token.MUL {
	if alloc, ok := instr.X.(*Alloc); ok && alloc.index >= 0 { // load of Alloc cell
	newval := renamed(renaming, alloc)
	if debugLifting {
	fmt.Fprintf(os.Stderr, "\tupdate load %s = %s with %s\n",
	instr.Name(), instr, newval.Name())
	}
	// Replace all references to
	// the loaded value by the
	// dominating stored value.
	replaceAll(instr, newval)
	// Delete the Load.
	u.Instrs[i] = nil
	u.gaps++
	}
	}

	case *DebugRef:
	if alloc, ok := instr.X.(*Alloc); ok && alloc.index >= 0 { // ref of Alloc cell
	if instr.IsAddr {
	instr.X = renamed(renaming, alloc)
	instr.IsAddr = false

	// Add DebugRef to instr.X's referrers.
	if refs := instr.X.Referrers(); refs != nil {
	refs = append(refs, instr)
	}
	} else {
	// A source expression denotes the address
	// of an Alloc that was optimized away.
	instr.X = nil

	// Delete the DebugRef.
	u.Instrs[i] = nil
	u.gaps++
	}
	}
	}
	}

	// For each φ-node in a CFG successor, rename the edge.
	for _, v := range u.Succs {
	phis := newPhis[v]
	if len(phis) == 0 {
	continue
	}
	i := v.predIndex(u)
	for _, np := range phis {
	phi := np.phi
	alloc := np.alloc
	newval := renamed(renaming, alloc)
	if debugLifting {
	fmt.Fprintf(os.Stderr, "\tsetphi %s edge %s -> %s (#%d) (alloc=%s) := %s\n",
	phi.Name(), u, v, i, alloc.Name(), newval.Name())
	}
	phi.Edges[i] = newval
	if prefs := newval.Referrers(); prefs != nil {
	prefs = append(prefs, phi)
	}
	}
	}

	// Continue depth-first recursion over domtree, pushing a
	// fresh copy of the renaming map for each subtree.
	for i, v := range u.dom.children {
	r := renaming
	if i < len(u.dom.children)-1 {
	// On all but the final iteration, we must make
	// a copy to avoid destructive update.
	r = make([]Value, len(renaming))
	copy(r, renaming)
	}
	rename(v, r, newPhis)
	}

	}