blob: 85b6a84cc3f426c905160af30fe6622a360fc1c7 [file] [log] [blame]
// Copyright 2015 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package ssa
import "cmd/compile/internal/base"
// tighten moves Values closer to the Blocks in which they are used.
// This can reduce the amount of register spilling required,
// if it doesn't also create more live values.
// A Value can be moved to any block that
// dominates all blocks in which it is used.
func tighten(f *Func) {
if base.Flag.N != 0 && len(f.Blocks) < 10000 {
// Skip the optimization in -N mode, except for huge functions.
// Too many values live across blocks can cause pathological
// behavior in the register allocator (see issue 52180).
return
}
canMove := f.Cache.allocBoolSlice(f.NumValues())
defer f.Cache.freeBoolSlice(canMove)
// Compute the memory states of each block.
startMem := f.Cache.allocValueSlice(f.NumBlocks())
defer f.Cache.freeValueSlice(startMem)
endMem := f.Cache.allocValueSlice(f.NumBlocks())
defer f.Cache.freeValueSlice(endMem)
memState(f, startMem, endMem)
for _, b := range f.Blocks {
for _, v := range b.Values {
if v.Op.isLoweredGetClosurePtr() {
// Must stay in the entry block.
continue
}
switch v.Op {
case OpPhi, OpArg, OpArgIntReg, OpArgFloatReg, OpSelect0, OpSelect1, OpSelectN:
// Phis need to stay in their block.
// Arg must stay in the entry block.
// Tuple selectors must stay with the tuple generator.
// SelectN is typically, ultimately, a register.
continue
}
// Count arguments which will need a register.
narg := 0
for _, a := range v.Args {
// SP and SB are special registers and have no effect on
// the allocation of general-purpose registers.
if a.needRegister() && a.Op != OpSB && a.Op != OpSP {
narg++
}
}
if narg >= 2 && !v.Type.IsFlags() {
// Don't move values with more than one input, as that may
// increase register pressure.
// We make an exception for flags, as we want flag generators
// moved next to uses (because we only have 1 flag register).
continue
}
canMove[v.ID] = true
}
}
// Build data structure for fast least-common-ancestor queries.
lca := makeLCArange(f)
// For each moveable value, record the block that dominates all uses found so far.
target := f.Cache.allocBlockSlice(f.NumValues())
defer f.Cache.freeBlockSlice(target)
// Grab loop information.
// We use this to make sure we don't tighten a value into a (deeper) loop.
idom := f.Idom()
loops := f.loopnest()
loops.calculateDepths()
changed := true
for changed {
changed = false
// Reset target
for i := range target {
target[i] = nil
}
// Compute target locations (for moveable values only).
// target location = the least common ancestor of all uses in the dominator tree.
for _, b := range f.Blocks {
for _, v := range b.Values {
for i, a := range v.Args {
if !canMove[a.ID] {
continue
}
use := b
if v.Op == OpPhi {
use = b.Preds[i].b
}
if target[a.ID] == nil {
target[a.ID] = use
} else {
target[a.ID] = lca.find(target[a.ID], use)
}
}
}
for _, c := range b.ControlValues() {
if !canMove[c.ID] {
continue
}
if target[c.ID] == nil {
target[c.ID] = b
} else {
target[c.ID] = lca.find(target[c.ID], b)
}
}
}
// If the target location is inside a loop,
// move the target location up to just before the loop head.
for _, b := range f.Blocks {
origloop := loops.b2l[b.ID]
for _, v := range b.Values {
t := target[v.ID]
if t == nil {
continue
}
targetloop := loops.b2l[t.ID]
for targetloop != nil && (origloop == nil || targetloop.depth > origloop.depth) {
t = idom[targetloop.header.ID]
target[v.ID] = t
targetloop = loops.b2l[t.ID]
}
}
}
// Move values to target locations.
for _, b := range f.Blocks {
for i := 0; i < len(b.Values); i++ {
v := b.Values[i]
t := target[v.ID]
if t == nil || t == b {
// v is not moveable, or is already in correct place.
continue
}
if mem := v.MemoryArg(); mem != nil {
if startMem[t.ID] != mem {
// We can't move a value with a memory arg unless the target block
// has that memory arg as its starting memory.
continue
}
}
if f.pass.debug > 0 {
b.Func.Warnl(v.Pos, "%v is moved", v.Op)
}
// Move v to the block which dominates its uses.
t.Values = append(t.Values, v)
v.Block = t
last := len(b.Values) - 1
b.Values[i] = b.Values[last]
b.Values[last] = nil
b.Values = b.Values[:last]
changed = true
i--
}
}
}
}
// phiTighten moves constants closer to phi users.
// This pass avoids having lots of constants live for lots of the program.
// See issue 16407.
func phiTighten(f *Func) {
for _, b := range f.Blocks {
for _, v := range b.Values {
if v.Op != OpPhi {
continue
}
for i, a := range v.Args {
if !a.rematerializeable() {
continue // not a constant we can move around
}
if a.Block == b.Preds[i].b {
continue // already in the right place
}
// Make a copy of a, put in predecessor block.
v.SetArg(i, a.copyInto(b.Preds[i].b))
}
}
}
}
// memState computes the memory state at the beginning and end of each block of
// the function. The memory state is represented by a value of mem type.
// The returned result is stored in startMem and endMem, and endMem is nil for
// blocks with no successors (Exit,Ret,RetJmp blocks). This algorithm is not
// suitable for infinite loop blocks that do not contain any mem operations.
// For example:
// b1:
//
// (some values)
//
// plain -> b2
// b2: <- b1 b2
// Plain -> b2
//
// Algorithm introduction:
// 1. The start memory state of a block is InitMem, a Phi node of type mem or
// an incoming memory value.
// 2. The start memory state of a block is consistent with the end memory state
// of its parent nodes. If the start memory state of a block is a Phi value,
// then the end memory state of its parent nodes is consistent with the
// corresponding argument value of the Phi node.
// 3. The algorithm first obtains the memory state of some blocks in the tree
// in the first step. Then floods the known memory state to other nodes in
// the second step.
func memState(f *Func, startMem, endMem []*Value) {
// This slice contains the set of blocks that have had their startMem set but this
// startMem value has not yet been propagated to the endMem of its predecessors
changed := make([]*Block, 0)
// First step, init the memory state of some blocks.
for _, b := range f.Blocks {
for _, v := range b.Values {
var mem *Value
if v.Op == OpPhi {
if v.Type.IsMemory() {
mem = v
}
} else if v.Op == OpInitMem {
mem = v // This is actually not needed.
} else if a := v.MemoryArg(); a != nil && a.Block != b {
// The only incoming memory value doesn't belong to this block.
mem = a
}
if mem != nil {
if old := startMem[b.ID]; old != nil {
if old == mem {
continue
}
f.Fatalf("func %s, startMem[%v] has different values, old %v, new %v", f.Name, b, old, mem)
}
startMem[b.ID] = mem
changed = append(changed, b)
}
}
}
// Second step, floods the known memory state of some blocks to others.
for len(changed) != 0 {
top := changed[0]
changed = changed[1:]
mem := startMem[top.ID]
for i, p := range top.Preds {
pb := p.b
if endMem[pb.ID] != nil {
continue
}
if mem.Op == OpPhi && mem.Block == top {
endMem[pb.ID] = mem.Args[i]
} else {
endMem[pb.ID] = mem
}
if startMem[pb.ID] == nil {
startMem[pb.ID] = endMem[pb.ID]
changed = append(changed, pb)
}
}
}
}