blob: f16959dd572973e62fb46a8ab1a61522b173b1ec [file] [log] [blame] [edit]
// Copyright 2017 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package ssa
import "cmd/internal/src"
// branchelim tries to eliminate branches by
// generating CondSelect instructions.
//
// Search for basic blocks that look like
//
// bb0 bb0
// | \ / \
// | bb1 or bb1 bb2 <- trivial if/else blocks
// | / \ /
// bb2 bb3
//
// where the intermediate blocks are mostly empty (with no side-effects);
// rewrite Phis in the postdominator as CondSelects.
func branchelim(f *Func) {
// FIXME: add support for lowering CondSelects on more architectures
switch f.Config.arch {
case "arm64", "ppc64le", "ppc64", "amd64", "wasm", "loong64":
// implemented
default:
return
}
// Find all the values used in computing the address of any load.
// Typically these values have operations like AddPtr, Lsh64x64, etc.
loadAddr := f.newSparseSet(f.NumValues())
defer f.retSparseSet(loadAddr)
for _, b := range f.Blocks {
for _, v := range b.Values {
switch v.Op {
case OpLoad, OpAtomicLoad8, OpAtomicLoad32, OpAtomicLoad64, OpAtomicLoadPtr, OpAtomicLoadAcq32, OpAtomicLoadAcq64:
loadAddr.add(v.Args[0].ID)
case OpMove:
loadAddr.add(v.Args[1].ID)
}
}
}
po := f.postorder()
for {
n := loadAddr.size()
for _, b := range po {
for i := len(b.Values) - 1; i >= 0; i-- {
v := b.Values[i]
if !loadAddr.contains(v.ID) {
continue
}
for _, a := range v.Args {
if a.Type.IsInteger() || a.Type.IsPtr() || a.Type.IsUnsafePtr() {
loadAddr.add(a.ID)
}
}
}
}
if loadAddr.size() == n {
break
}
}
change := true
for change {
change = false
for _, b := range f.Blocks {
change = elimIf(f, loadAddr, b) || elimIfElse(f, loadAddr, b) || change
}
}
}
func canCondSelect(v *Value, arch string, loadAddr *sparseSet) bool {
if loadAddr.contains(v.ID) {
// The result of the soon-to-be conditional move is used to compute a load address.
// We want to avoid generating a conditional move in this case
// because the load address would now be data-dependent on the condition.
// Previously it would only be control-dependent on the condition, which is faster
// if the branch predicts well (or possibly even if it doesn't, if the load will
// be an expensive cache miss).
// See issue #26306.
return false
}
if arch == "loong64" {
// We should not generate conditional moves if neither of the arguments is constant zero,
// because it requires three instructions (OR, MASKEQZ, MASKNEZ) and will increase the
// register pressure.
if !(v.Args[0].isGenericIntConst() && v.Args[0].AuxInt == 0) &&
!(v.Args[1].isGenericIntConst() && v.Args[1].AuxInt == 0) {
return false
}
}
// For now, stick to simple scalars that fit in registers
switch {
case v.Type.Size() > v.Block.Func.Config.RegSize:
return false
case v.Type.IsPtrShaped():
return true
case v.Type.IsInteger():
if arch == "amd64" && v.Type.Size() < 2 {
// amd64 doesn't support CMOV with byte registers
return false
}
return true
default:
return false
}
}
// elimIf converts the one-way branch starting at dom in f to a conditional move if possible.
// loadAddr is a set of values which are used to compute the address of a load.
// Those values are exempt from CMOV generation.
func elimIf(f *Func, loadAddr *sparseSet, dom *Block) bool {
// See if dom is an If with one arm that
// is trivial and succeeded by the other
// successor of dom.
if dom.Kind != BlockIf || dom.Likely != BranchUnknown {
return false
}
var simple, post *Block
for i := range dom.Succs {
bb, other := dom.Succs[i].Block(), dom.Succs[i^1].Block()
if isLeafPlain(bb) && bb.Succs[0].Block() == other {
simple = bb
post = other
break
}
}
if simple == nil || len(post.Preds) != 2 || post == dom {
return false
}
// We've found our diamond CFG of blocks.
// Now decide if fusing 'simple' into dom+post
// looks profitable.
// Check that there are Phis, and that all of them
// can be safely rewritten to CondSelect.
hasphis := false
for _, v := range post.Values {
if v.Op == OpPhi {
hasphis = true
if !canCondSelect(v, f.Config.arch, loadAddr) {
return false
}
}
}
if !hasphis {
return false
}
// Pick some upper bound for the number of instructions
// we'd be willing to execute just to generate a dead
// argument to CondSelect. In the worst case, this is
// the number of useless instructions executed.
const maxfuseinsts = 2
if len(simple.Values) > maxfuseinsts || !canSpeculativelyExecute(simple) {
return false
}
// Replace Phi instructions in b with CondSelect instructions
swap := (post.Preds[0].Block() == dom) != (dom.Succs[0].Block() == post)
for _, v := range post.Values {
if v.Op != OpPhi {
continue
}
v.Op = OpCondSelect
if swap {
v.Args[0], v.Args[1] = v.Args[1], v.Args[0]
}
v.AddArg(dom.Controls[0])
}
// Put all of the instructions into 'dom'
// and update the CFG appropriately.
dom.Kind = post.Kind
dom.CopyControls(post)
dom.Aux = post.Aux
dom.Succs = append(dom.Succs[:0], post.Succs...)
for i := range dom.Succs {
e := dom.Succs[i]
e.b.Preds[e.i].b = dom
}
// Try really hard to preserve statement marks attached to blocks.
simplePos := simple.Pos
postPos := post.Pos
simpleStmt := simplePos.IsStmt() == src.PosIsStmt
postStmt := postPos.IsStmt() == src.PosIsStmt
for _, v := range simple.Values {
v.Block = dom
}
for _, v := range post.Values {
v.Block = dom
}
// findBlockPos determines if b contains a stmt-marked value
// that has the same line number as the Pos for b itself.
// (i.e. is the position on b actually redundant?)
findBlockPos := func(b *Block) bool {
pos := b.Pos
for _, v := range b.Values {
// See if there is a stmt-marked value already that matches simple.Pos (and perhaps post.Pos)
if pos.SameFileAndLine(v.Pos) && v.Pos.IsStmt() == src.PosIsStmt {
return true
}
}
return false
}
if simpleStmt {
simpleStmt = !findBlockPos(simple)
if !simpleStmt && simplePos.SameFileAndLine(postPos) {
postStmt = false
}
}
if postStmt {
postStmt = !findBlockPos(post)
}
// If simpleStmt and/or postStmt are still true, then try harder
// to find the corresponding statement marks new homes.
// setBlockPos determines if b contains a can-be-statement value
// that has the same line number as the Pos for b itself, and
// puts a statement mark on it, and returns whether it succeeded
// in this operation.
setBlockPos := func(b *Block) bool {
pos := b.Pos
for _, v := range b.Values {
if pos.SameFileAndLine(v.Pos) && !isPoorStatementOp(v.Op) {
v.Pos = v.Pos.WithIsStmt()
return true
}
}
return false
}
// If necessary and possible, add a mark to a value in simple
if simpleStmt {
if setBlockPos(simple) && simplePos.SameFileAndLine(postPos) {
postStmt = false
}
}
// If necessary and possible, add a mark to a value in post
if postStmt {
postStmt = !setBlockPos(post)
}
// Before giving up (this was added because it helps), try the end of "dom", and if that is not available,
// try the values in the successor block if it is uncomplicated.
if postStmt {
if dom.Pos.IsStmt() != src.PosIsStmt {
dom.Pos = postPos
} else {
// Try the successor block
if len(dom.Succs) == 1 && len(dom.Succs[0].Block().Preds) == 1 {
succ := dom.Succs[0].Block()
for _, v := range succ.Values {
if isPoorStatementOp(v.Op) {
continue
}
if postPos.SameFileAndLine(v.Pos) {
v.Pos = v.Pos.WithIsStmt()
}
postStmt = false
break
}
// If postStmt still true, tag the block itself if possible
if postStmt && succ.Pos.IsStmt() != src.PosIsStmt {
succ.Pos = postPos
}
}
}
}
dom.Values = append(dom.Values, simple.Values...)
dom.Values = append(dom.Values, post.Values...)
// Trash 'post' and 'simple'
clobberBlock(post)
clobberBlock(simple)
f.invalidateCFG()
return true
}
// is this a BlockPlain with one predecessor?
func isLeafPlain(b *Block) bool {
return b.Kind == BlockPlain && len(b.Preds) == 1
}
func clobberBlock(b *Block) {
b.Values = nil
b.Preds = nil
b.Succs = nil
b.Aux = nil
b.ResetControls()
b.Likely = BranchUnknown
b.Kind = BlockInvalid
}
// elimIfElse converts the two-way branch starting at dom in f to a conditional move if possible.
// loadAddr is a set of values which are used to compute the address of a load.
// Those values are exempt from CMOV generation.
func elimIfElse(f *Func, loadAddr *sparseSet, b *Block) bool {
// See if 'b' ends in an if/else: it should
// have two successors, both of which are BlockPlain
// and succeeded by the same block.
if b.Kind != BlockIf || b.Likely != BranchUnknown {
return false
}
yes, no := b.Succs[0].Block(), b.Succs[1].Block()
if !isLeafPlain(yes) || len(yes.Values) > 1 || !canSpeculativelyExecute(yes) {
return false
}
if !isLeafPlain(no) || len(no.Values) > 1 || !canSpeculativelyExecute(no) {
return false
}
if b.Succs[0].Block().Succs[0].Block() != b.Succs[1].Block().Succs[0].Block() {
return false
}
// block that postdominates the if/else
post := b.Succs[0].Block().Succs[0].Block()
if len(post.Preds) != 2 || post == b {
return false
}
hasphis := false
for _, v := range post.Values {
if v.Op == OpPhi {
hasphis = true
if !canCondSelect(v, f.Config.arch, loadAddr) {
return false
}
}
}
if !hasphis {
return false
}
// Don't generate CondSelects if branch is cheaper.
if !shouldElimIfElse(no, yes, post, f.Config.arch) {
return false
}
// now we're committed: rewrite each Phi as a CondSelect
swap := post.Preds[0].Block() != b.Succs[0].Block()
for _, v := range post.Values {
if v.Op != OpPhi {
continue
}
v.Op = OpCondSelect
if swap {
v.Args[0], v.Args[1] = v.Args[1], v.Args[0]
}
v.AddArg(b.Controls[0])
}
// Move the contents of all of these
// blocks into 'b' and update CFG edges accordingly
b.Kind = post.Kind
b.CopyControls(post)
b.Aux = post.Aux
b.Succs = append(b.Succs[:0], post.Succs...)
for i := range b.Succs {
e := b.Succs[i]
e.b.Preds[e.i].b = b
}
for i := range post.Values {
post.Values[i].Block = b
}
for i := range yes.Values {
yes.Values[i].Block = b
}
for i := range no.Values {
no.Values[i].Block = b
}
b.Values = append(b.Values, yes.Values...)
b.Values = append(b.Values, no.Values...)
b.Values = append(b.Values, post.Values...)
// trash post, yes, and no
clobberBlock(yes)
clobberBlock(no)
clobberBlock(post)
f.invalidateCFG()
return true
}
// shouldElimIfElse reports whether estimated cost of eliminating branch
// is lower than threshold.
func shouldElimIfElse(no, yes, post *Block, arch string) bool {
switch arch {
default:
return true
case "amd64":
const maxcost = 2
phi := 0
other := 0
for _, v := range post.Values {
if v.Op == OpPhi {
// Each phi results in CondSelect, which lowers into CMOV,
// CMOV has latency >1 on most CPUs.
phi++
}
for _, x := range v.Args {
if x.Block == no || x.Block == yes {
other++
}
}
}
cost := phi * 1
if phi > 1 {
// If we have more than 1 phi and some values in post have args
// in yes or no blocks, we may have to recalculate condition, because
// those args may clobber flags. For now assume that all operations clobber flags.
cost += other * 1
}
return cost < maxcost
}
}
// canSpeculativelyExecute reports whether every value in the block can
// be evaluated without causing any observable side effects (memory
// accesses, panics and so on) except for execution time changes. It
// also ensures that the block does not contain any phis which we can't
// speculatively execute.
// Warning: this function cannot currently detect values that represent
// instructions the execution of which need to be guarded with CPU
// hardware feature checks. See issue #34950.
func canSpeculativelyExecute(b *Block) bool {
// don't fuse memory ops, Phi ops, divides (can panic),
// or anything else with side-effects
for _, v := range b.Values {
if v.Op == OpPhi || isDivMod(v.Op) || isPtrArithmetic(v.Op) || v.Type.IsMemory() ||
v.MemoryArg() != nil || opcodeTable[v.Op].hasSideEffects {
return false
}
}
return true
}
func isDivMod(op Op) bool {
switch op {
case OpDiv8, OpDiv8u, OpDiv16, OpDiv16u,
OpDiv32, OpDiv32u, OpDiv64, OpDiv64u, OpDiv128u,
OpDiv32F, OpDiv64F,
OpMod8, OpMod8u, OpMod16, OpMod16u,
OpMod32, OpMod32u, OpMod64, OpMod64u:
return true
default:
return false
}
}
func isPtrArithmetic(op Op) bool {
// Pointer arithmetic can't be speculatively executed because the result
// may be an invalid pointer (if, for example, the condition is that the
// base pointer is not nil). See issue 56990.
switch op {
case OpOffPtr, OpAddPtr, OpSubPtr:
return true
default:
return false
}
}