| // Copyright 2013 The Go Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style |
| // license that can be found in the LICENSE file. |
| |
| package ssa |
| |
| // This file defines the lifting pass which tries to "lift" Alloc |
| // cells (new/local variables) into SSA registers, replacing loads |
| // with the dominating stored value, eliminating loads and stores, and |
| // inserting φ-nodes as needed. |
| |
| // Cited papers and resources: |
| // |
| // Ron Cytron et al. 1991. Efficiently computing SSA form... |
| // http://doi.acm.org/10.1145/115372.115320 |
| // |
| // Cooper, Harvey, Kennedy. 2001. A Simple, Fast Dominance Algorithm. |
| // Software Practice and Experience 2001, 4:1-10. |
| // http://www.hipersoft.rice.edu/grads/publications/dom14.pdf |
| // |
| // Daniel Berlin, llvmdev mailing list, 2012. |
| // http://lists.cs.uiuc.edu/pipermail/llvmdev/2012-January/046638.html |
| // (Be sure to expand the whole thread.) |
| |
| // TODO(adonovan): opt: there are many optimizations worth evaluating, and |
| // the conventional wisdom for SSA construction is that a simple |
| // algorithm well engineered often beats those of better asymptotic |
| // complexity on all but the most egregious inputs. |
| // |
| // Danny Berlin suggests that the Cooper et al. algorithm for |
| // computing the dominance frontier is superior to Cytron et al. |
| // Furthermore he recommends that rather than computing the DF for the |
| // whole function then renaming all alloc cells, it may be cheaper to |
| // compute the DF for each alloc cell separately and throw it away. |
| // |
| // Consider exploiting liveness information to avoid creating dead |
| // φ-nodes which we then immediately remove. |
| // |
| // Also see many other "TODO: opt" suggestions in the code. |
| |
| import ( |
| "fmt" |
| "go/token" |
| "go/types" |
| "math/big" |
| "os" |
| ) |
| |
| // If true, show diagnostic information at each step of lifting. |
| // Very verbose. |
| const debugLifting = false |
| |
| // domFrontier maps each block to the set of blocks in its dominance |
| // frontier. The outer slice is conceptually a map keyed by |
| // Block.Index. The inner slice is conceptually a set, possibly |
| // containing duplicates. |
| // |
| // TODO(adonovan): opt: measure impact of dups; consider a packed bit |
| // representation, e.g. big.Int, and bitwise parallel operations for |
| // the union step in the Children loop. |
| // |
| // domFrontier's methods mutate the slice's elements but not its |
| // length, so their receivers needn't be pointers. |
| type domFrontier [][]*BasicBlock |
| |
| func (df domFrontier) add(u, v *BasicBlock) { |
| p := &df[u.Index] |
| *p = append(*p, v) |
| } |
| |
| // build builds the dominance frontier df for the dominator (sub)tree |
| // rooted at u, using the Cytron et al. algorithm. |
| // |
| // TODO(adonovan): opt: consider Berlin approach, computing pruned SSA |
| // by pruning the entire IDF computation, rather than merely pruning |
| // the DF -> IDF step. |
| func (df domFrontier) build(u *BasicBlock) { |
| // Encounter each node u in postorder of dom tree. |
| for _, child := range u.dom.children { |
| df.build(child) |
| } |
| for _, vb := range u.Succs { |
| if v := vb.dom; v.idom != u { |
| df.add(u, vb) |
| } |
| } |
| for _, w := range u.dom.children { |
| for _, vb := range df[w.Index] { |
| // TODO(adonovan): opt: use word-parallel bitwise union. |
| if v := vb.dom; v.idom != u { |
| df.add(u, vb) |
| } |
| } |
| } |
| } |
| |
| func buildDomFrontier(fn *Function) domFrontier { |
| df := make(domFrontier, len(fn.Blocks)) |
| df.build(fn.Blocks[0]) |
| if fn.Recover != nil { |
| df.build(fn.Recover) |
| } |
| return df |
| } |
| |
| func removeInstr(refs []Instruction, instr Instruction) []Instruction { |
| i := 0 |
| for _, ref := range refs { |
| if ref == instr { |
| continue |
| } |
| refs[i] = ref |
| i++ |
| } |
| for j := i; j != len(refs); j++ { |
| refs[j] = nil // aid GC |
| } |
| return refs[:i] |
| } |
| |
| // lift replaces local and new Allocs accessed only with |
| // load/store by SSA registers, inserting φ-nodes where necessary. |
| // The result is a program in classical pruned SSA form. |
| // |
| // Preconditions: |
| // - fn has no dead blocks (blockopt has run). |
| // - Def/use info (Operands and Referrers) is up-to-date. |
| // - The dominator tree is up-to-date. |
| func lift(fn *Function) { |
| // TODO(adonovan): opt: lots of little optimizations may be |
| // worthwhile here, especially if they cause us to avoid |
| // buildDomFrontier. For example: |
| // |
| // - Alloc never loaded? Eliminate. |
| // - Alloc never stored? Replace all loads with a zero constant. |
| // - Alloc stored once? Replace loads with dominating store; |
| // don't forget that an Alloc is itself an effective store |
| // of zero. |
| // - Alloc used only within a single block? |
| // Use degenerate algorithm avoiding φ-nodes. |
| // - Consider synergy with scalar replacement of aggregates (SRA). |
| // e.g. *(&x.f) where x is an Alloc. |
| // Perhaps we'd get better results if we generated this as x.f |
| // i.e. Field(x, .f) instead of Load(FieldIndex(x, .f)). |
| // Unclear. |
| // |
| // But we will start with the simplest correct code. |
| df := buildDomFrontier(fn) |
| |
| if debugLifting { |
| title := false |
| for i, blocks := range df { |
| if blocks != nil { |
| if !title { |
| fmt.Fprintf(os.Stderr, "Dominance frontier of %s:\n", fn) |
| title = true |
| } |
| fmt.Fprintf(os.Stderr, "\t%s: %s\n", fn.Blocks[i], blocks) |
| } |
| } |
| } |
| |
| newPhis := make(newPhiMap) |
| |
| // During this pass we will replace some BasicBlock.Instrs |
| // (allocs, loads and stores) with nil, keeping a count in |
| // BasicBlock.gaps. At the end we will reset Instrs to the |
| // concatenation of all non-dead newPhis and non-nil Instrs |
| // for the block, reusing the original array if space permits. |
| |
| // While we're here, we also eliminate 'rundefers' |
| // instructions in functions that contain no 'defer' |
| // instructions. |
| usesDefer := false |
| |
| // A counter used to generate ~unique ids for Phi nodes, as an |
| // aid to debugging. We use large numbers to make them highly |
| // visible. All nodes are renumbered later. |
| fresh := 1000 |
| |
| // Determine which allocs we can lift and number them densely. |
| // The renaming phase uses this numbering for compact maps. |
| numAllocs := 0 |
| for _, b := range fn.Blocks { |
| b.gaps = 0 |
| b.rundefers = 0 |
| for _, instr := range b.Instrs { |
| switch instr := instr.(type) { |
| case *Alloc: |
| index := -1 |
| if liftAlloc(df, instr, newPhis, &fresh) { |
| index = numAllocs |
| numAllocs++ |
| } |
| instr.index = index |
| case *Defer: |
| usesDefer = true |
| case *RunDefers: |
| b.rundefers++ |
| } |
| } |
| } |
| |
| // renaming maps an alloc (keyed by index) to its replacement |
| // value. Initially the renaming contains nil, signifying the |
| // zero constant of the appropriate type; we construct the |
| // Const lazily at most once on each path through the domtree. |
| // TODO(adonovan): opt: cache per-function not per subtree. |
| renaming := make([]Value, numAllocs) |
| |
| // Renaming. |
| rename(fn.Blocks[0], renaming, newPhis) |
| |
| // Eliminate dead φ-nodes. |
| removeDeadPhis(fn.Blocks, newPhis) |
| |
| // Prepend remaining live φ-nodes to each block. |
| for _, b := range fn.Blocks { |
| nps := newPhis[b] |
| j := len(nps) |
| |
| rundefersToKill := b.rundefers |
| if usesDefer { |
| rundefersToKill = 0 |
| } |
| |
| if j+b.gaps+rundefersToKill == 0 { |
| continue // fast path: no new phis or gaps |
| } |
| |
| // Compact nps + non-nil Instrs into a new slice. |
| // TODO(adonovan): opt: compact in situ (rightwards) |
| // if Instrs has sufficient space or slack. |
| dst := make([]Instruction, len(b.Instrs)+j-b.gaps-rundefersToKill) |
| for i, np := range nps { |
| dst[i] = np.phi |
| } |
| for _, instr := range b.Instrs { |
| if instr == nil { |
| continue |
| } |
| if !usesDefer { |
| if _, ok := instr.(*RunDefers); ok { |
| continue |
| } |
| } |
| dst[j] = instr |
| j++ |
| } |
| b.Instrs = dst |
| } |
| |
| // Remove any fn.Locals that were lifted. |
| j := 0 |
| for _, l := range fn.Locals { |
| if l.index < 0 { |
| fn.Locals[j] = l |
| j++ |
| } |
| } |
| // Nil out fn.Locals[j:] to aid GC. |
| for i := j; i < len(fn.Locals); i++ { |
| fn.Locals[i] = nil |
| } |
| fn.Locals = fn.Locals[:j] |
| } |
| |
| // removeDeadPhis removes φ-nodes not transitively needed by a |
| // non-Phi, non-DebugRef instruction. |
| func removeDeadPhis(blocks []*BasicBlock, newPhis newPhiMap) { |
| // First pass: find the set of "live" φ-nodes: those reachable |
| // from some non-Phi instruction. |
| // |
| // We compute reachability in reverse, starting from each φ, |
| // rather than forwards, starting from each live non-Phi |
| // instruction, because this way visits much less of the |
| // Value graph. |
| livePhis := make(map[*Phi]bool) |
| for _, npList := range newPhis { |
| for _, np := range npList { |
| phi := np.phi |
| if !livePhis[phi] && phiHasDirectReferrer(phi) { |
| markLivePhi(livePhis, phi) |
| } |
| } |
| } |
| |
| // Existing φ-nodes due to && and || operators |
| // are all considered live (see Go issue 19622). |
| for _, b := range blocks { |
| for _, phi := range b.phis() { |
| markLivePhi(livePhis, phi.(*Phi)) |
| } |
| } |
| |
| // Second pass: eliminate unused phis from newPhis. |
| for block, npList := range newPhis { |
| j := 0 |
| for _, np := range npList { |
| if livePhis[np.phi] { |
| npList[j] = np |
| j++ |
| } else { |
| // discard it, first removing it from referrers |
| for _, val := range np.phi.Edges { |
| if refs := val.Referrers(); refs != nil { |
| *refs = removeInstr(*refs, np.phi) |
| } |
| } |
| np.phi.block = nil |
| } |
| } |
| newPhis[block] = npList[:j] |
| } |
| } |
| |
| // markLivePhi marks phi, and all φ-nodes transitively reachable via |
| // its Operands, live. |
| func markLivePhi(livePhis map[*Phi]bool, phi *Phi) { |
| livePhis[phi] = true |
| for _, rand := range phi.Operands(nil) { |
| if q, ok := (*rand).(*Phi); ok { |
| if !livePhis[q] { |
| markLivePhi(livePhis, q) |
| } |
| } |
| } |
| } |
| |
| // phiHasDirectReferrer reports whether phi is directly referred to by |
| // a non-Phi instruction. Such instructions are the |
| // roots of the liveness traversal. |
| func phiHasDirectReferrer(phi *Phi) bool { |
| for _, instr := range *phi.Referrers() { |
| if _, ok := instr.(*Phi); !ok { |
| return true |
| } |
| } |
| return false |
| } |
| |
| type blockSet struct{ big.Int } // (inherit methods from Int) |
| |
| // add adds b to the set and returns true if the set changed. |
| func (s *blockSet) add(b *BasicBlock) bool { |
| i := b.Index |
| if s.Bit(i) != 0 { |
| return false |
| } |
| s.SetBit(&s.Int, i, 1) |
| return true |
| } |
| |
| // take removes an arbitrary element from a set s and |
| // returns its index, or returns -1 if empty. |
| func (s *blockSet) take() int { |
| l := s.BitLen() |
| for i := 0; i < l; i++ { |
| if s.Bit(i) == 1 { |
| s.SetBit(&s.Int, i, 0) |
| return i |
| } |
| } |
| return -1 |
| } |
| |
| // newPhi is a pair of a newly introduced φ-node and the lifted Alloc |
| // it replaces. |
| type newPhi struct { |
| phi *Phi |
| alloc *Alloc |
| } |
| |
| // newPhiMap records for each basic block, the set of newPhis that |
| // must be prepended to the block. |
| type newPhiMap map[*BasicBlock][]newPhi |
| |
| // liftAlloc determines whether alloc can be lifted into registers, |
| // and if so, it populates newPhis with all the φ-nodes it may require |
| // and returns true. |
| // |
| // fresh is a source of fresh ids for phi nodes. |
| func liftAlloc(df domFrontier, alloc *Alloc, newPhis newPhiMap, fresh *int) bool { |
| // Don't lift aggregates into registers, because we don't have |
| // a way to express their zero-constants. |
| switch deref(alloc.Type()).Underlying().(type) { |
| case *types.Array, *types.Struct: |
| return false |
| } |
| |
| // Don't lift named return values in functions that defer |
| // calls that may recover from panic. |
| if fn := alloc.Parent(); fn.Recover != nil { |
| for _, nr := range fn.namedResults { |
| if nr == alloc { |
| return false |
| } |
| } |
| } |
| |
| // Compute defblocks, the set of blocks containing a |
| // definition of the alloc cell. |
| var defblocks blockSet |
| for _, instr := range *alloc.Referrers() { |
| // Bail out if we discover the alloc is not liftable; |
| // the only operations permitted to use the alloc are |
| // loads/stores into the cell, and DebugRef. |
| switch instr := instr.(type) { |
| case *Store: |
| if instr.Val == alloc { |
| return false // address used as value |
| } |
| if instr.Addr != alloc { |
| panic("Alloc.Referrers is inconsistent") |
| } |
| defblocks.add(instr.Block()) |
| case *UnOp: |
| if instr.Op != token.MUL { |
| return false // not a load |
| } |
| if instr.X != alloc { |
| panic("Alloc.Referrers is inconsistent") |
| } |
| case *DebugRef: |
| // ok |
| default: |
| return false // some other instruction |
| } |
| } |
| // The Alloc itself counts as a (zero) definition of the cell. |
| defblocks.add(alloc.Block()) |
| |
| if debugLifting { |
| fmt.Fprintln(os.Stderr, "\tlifting ", alloc, alloc.Name()) |
| } |
| |
| fn := alloc.Parent() |
| |
| // Φ-insertion. |
| // |
| // What follows is the body of the main loop of the insert-φ |
| // function described by Cytron et al, but instead of using |
| // counter tricks, we just reset the 'hasAlready' and 'work' |
| // sets each iteration. These are bitmaps so it's pretty cheap. |
| // |
| // TODO(adonovan): opt: recycle slice storage for W, |
| // hasAlready, defBlocks across liftAlloc calls. |
| var hasAlready blockSet |
| |
| // Initialize W and work to defblocks. |
| var work blockSet = defblocks // blocks seen |
| var W blockSet // blocks to do |
| W.Set(&defblocks.Int) |
| |
| // Traverse iterated dominance frontier, inserting φ-nodes. |
| for i := W.take(); i != -1; i = W.take() { |
| u := fn.Blocks[i] |
| for _, v := range df[u.Index] { |
| if hasAlready.add(v) { |
| // Create φ-node. |
| // It will be prepended to v.Instrs later, if needed. |
| phi := &Phi{ |
| Edges: make([]Value, len(v.Preds)), |
| Comment: alloc.Comment, |
| } |
| // This is merely a debugging aid: |
| phi.setNum(*fresh) |
| *fresh++ |
| |
| phi.pos = alloc.Pos() |
| phi.setType(deref(alloc.Type())) |
| phi.block = v |
| if debugLifting { |
| fmt.Fprintf(os.Stderr, "\tplace %s = %s at block %s\n", phi.Name(), phi, v) |
| } |
| newPhis[v] = append(newPhis[v], newPhi{phi, alloc}) |
| |
| if work.add(v) { |
| W.add(v) |
| } |
| } |
| } |
| } |
| |
| return true |
| } |
| |
| // replaceAll replaces all intraprocedural uses of x with y, |
| // updating x.Referrers and y.Referrers. |
| // Precondition: x.Referrers() != nil, i.e. x must be local to some function. |
| func replaceAll(x, y Value) { |
| var rands []*Value |
| pxrefs := x.Referrers() |
| pyrefs := y.Referrers() |
| for _, instr := range *pxrefs { |
| rands = instr.Operands(rands[:0]) // recycle storage |
| for _, rand := range rands { |
| if *rand != nil { |
| if *rand == x { |
| *rand = y |
| } |
| } |
| } |
| if pyrefs != nil { |
| *pyrefs = append(*pyrefs, instr) // dups ok |
| } |
| } |
| *pxrefs = nil // x is now unreferenced |
| } |
| |
| // renamed returns the value to which alloc is being renamed, |
| // constructing it lazily if it's the implicit zero initialization. |
| func renamed(renaming []Value, alloc *Alloc) Value { |
| v := renaming[alloc.index] |
| if v == nil { |
| v = zeroConst(deref(alloc.Type())) |
| renaming[alloc.index] = v |
| } |
| return v |
| } |
| |
| // rename implements the (Cytron et al) SSA renaming algorithm, a |
| // preorder traversal of the dominator tree replacing all loads of |
| // Alloc cells with the value stored to that cell by the dominating |
| // store instruction. For lifting, we need only consider loads, |
| // stores and φ-nodes. |
| // |
| // renaming is a map from *Alloc (keyed by index number) to its |
| // dominating stored value; newPhis[x] is the set of new φ-nodes to be |
| // prepended to block x. |
| func rename(u *BasicBlock, renaming []Value, newPhis newPhiMap) { |
| // Each φ-node becomes the new name for its associated Alloc. |
| for _, np := range newPhis[u] { |
| phi := np.phi |
| alloc := np.alloc |
| renaming[alloc.index] = phi |
| } |
| |
| // Rename loads and stores of allocs. |
| for i, instr := range u.Instrs { |
| switch instr := instr.(type) { |
| case *Alloc: |
| if instr.index >= 0 { // store of zero to Alloc cell |
| // Replace dominated loads by the zero value. |
| renaming[instr.index] = nil |
| if debugLifting { |
| fmt.Fprintf(os.Stderr, "\tkill alloc %s\n", instr) |
| } |
| // Delete the Alloc. |
| u.Instrs[i] = nil |
| u.gaps++ |
| } |
| |
| case *Store: |
| if alloc, ok := instr.Addr.(*Alloc); ok && alloc.index >= 0 { // store to Alloc cell |
| // Replace dominated loads by the stored value. |
| renaming[alloc.index] = instr.Val |
| if debugLifting { |
| fmt.Fprintf(os.Stderr, "\tkill store %s; new value: %s\n", |
| instr, instr.Val.Name()) |
| } |
| // Remove the store from the referrer list of the stored value. |
| if refs := instr.Val.Referrers(); refs != nil { |
| *refs = removeInstr(*refs, instr) |
| } |
| // Delete the Store. |
| u.Instrs[i] = nil |
| u.gaps++ |
| } |
| |
| case *UnOp: |
| if instr.Op == token.MUL { |
| if alloc, ok := instr.X.(*Alloc); ok && alloc.index >= 0 { // load of Alloc cell |
| newval := renamed(renaming, alloc) |
| if debugLifting { |
| fmt.Fprintf(os.Stderr, "\tupdate load %s = %s with %s\n", |
| instr.Name(), instr, newval.Name()) |
| } |
| // Replace all references to |
| // the loaded value by the |
| // dominating stored value. |
| replaceAll(instr, newval) |
| // Delete the Load. |
| u.Instrs[i] = nil |
| u.gaps++ |
| } |
| } |
| |
| case *DebugRef: |
| if alloc, ok := instr.X.(*Alloc); ok && alloc.index >= 0 { // ref of Alloc cell |
| if instr.IsAddr { |
| instr.X = renamed(renaming, alloc) |
| instr.IsAddr = false |
| |
| // Add DebugRef to instr.X's referrers. |
| if refs := instr.X.Referrers(); refs != nil { |
| *refs = append(*refs, instr) |
| } |
| } else { |
| // A source expression denotes the address |
| // of an Alloc that was optimized away. |
| instr.X = nil |
| |
| // Delete the DebugRef. |
| u.Instrs[i] = nil |
| u.gaps++ |
| } |
| } |
| } |
| } |
| |
| // For each φ-node in a CFG successor, rename the edge. |
| for _, v := range u.Succs { |
| phis := newPhis[v] |
| if len(phis) == 0 { |
| continue |
| } |
| i := v.predIndex(u) |
| for _, np := range phis { |
| phi := np.phi |
| alloc := np.alloc |
| newval := renamed(renaming, alloc) |
| if debugLifting { |
| fmt.Fprintf(os.Stderr, "\tsetphi %s edge %s -> %s (#%d) (alloc=%s) := %s\n", |
| phi.Name(), u, v, i, alloc.Name(), newval.Name()) |
| } |
| phi.Edges[i] = newval |
| if prefs := newval.Referrers(); prefs != nil { |
| *prefs = append(*prefs, phi) |
| } |
| } |
| } |
| |
| // Continue depth-first recursion over domtree, pushing a |
| // fresh copy of the renaming map for each subtree. |
| for i, v := range u.dom.children { |
| r := renaming |
| if i < len(u.dom.children)-1 { |
| // On all but the final iteration, we must make |
| // a copy to avoid destructive update. |
| r = make([]Value, len(renaming)) |
| copy(r, renaming) |
| } |
| rename(v, r, newPhis) |
| } |
| |
| } |