src/cmd/compile/internal/ssa/branchelim.go - go - Git at Google

 // Copyright 2017 The Go Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.

 package ssa

 import "cmd/internal/src"

 // branchelim tries to eliminate branches by
 // generating CondSelect instructions.
 //
 // Search for basic blocks that look like
 //
 //	bb0            bb0
 //	 | \          /   \
 //	 | bb1  or  bb1   bb2    <- trivial if/else blocks
 //	 | /          \   /
 //	bb2            bb3
 //
 // where the intermediate blocks are mostly empty (with no side-effects);
 // rewrite Phis in the postdominator as CondSelects.
 func branchelim(f *Func) {
 	// FIXME: add support for lowering CondSelects on more architectures
 	switch f.Config.arch {
 	case "arm64", "ppc64le", "ppc64", "amd64", "wasm", "loong64":
 		// implemented
 	default:
 		return
 	}

 	// Find all the values used in computing the address of any load.
 	// Typically these values have operations like AddPtr, Lsh64x64, etc.
 	loadAddr := f.newSparseSet(f.NumValues())
 	defer f.retSparseSet(loadAddr)
 	for _, b := range f.Blocks {
 		for _, v := range b.Values {
 			switch v.Op {
 			case OpLoad, OpAtomicLoad8, OpAtomicLoad32, OpAtomicLoad64, OpAtomicLoadPtr, OpAtomicLoadAcq32, OpAtomicLoadAcq64:
 				loadAddr.add(v.Args[0].ID)
 			case OpMove:
 				loadAddr.add(v.Args[1].ID)
 			}
 		}
 	}
 	po := f.postorder()
 	for {
 		n := loadAddr.size()
 		for _, b := range po {
 			for i := len(b.Values) - 1; i >= 0; i-- {
 				v := b.Values[i]
 				if !loadAddr.contains(v.ID) {
 					continue
 				}
 				for _, a := range v.Args {
 					if a.Type.IsInteger() || a.Type.IsPtr() || a.Type.IsUnsafePtr() {
 						loadAddr.add(a.ID)
 					}
 				}
 			}
 		}
 		if loadAddr.size() == n {
 			break
 		}
 	}

 	change := true
 	for change {
 		change = false
 		for _, b := range f.Blocks {
 			change = elimIf(f, loadAddr, b) || elimIfElse(f, loadAddr, b) || change
 		}
 	}
 }

 func canCondSelect(v *Value, arch string, loadAddr *sparseSet) bool {
 	if loadAddr.contains(v.ID) {
 		// The result of the soon-to-be conditional move is used to compute a load address.
 		// We want to avoid generating a conditional move in this case
 		// because the load address would now be data-dependent on the condition.
 		// Previously it would only be control-dependent on the condition, which is faster
 		// if the branch predicts well (or possibly even if it doesn't, if the load will
 		// be an expensive cache miss).
 		// See issue #26306.
 		return false
 	}
 	if arch == "loong64" {
 		// We should not generate conditional moves if neither of the arguments is constant zero,
 		// because it requires three instructions (OR, MASKEQZ, MASKNEZ) and will increase the
 		// register pressure.
 		if !(v.Args[0].isGenericIntConst() && v.Args[0].AuxInt == 0) &&
 			!(v.Args[1].isGenericIntConst() && v.Args[1].AuxInt == 0) {
 			return false
 		}
 	}
 	// For now, stick to simple scalars that fit in registers
 	switch {
 	case v.Type.Size() > v.Block.Func.Config.RegSize:
 		return false
 	case v.Type.IsPtrShaped():
 		return true
 	case v.Type.IsInteger():
 		if arch == "amd64" && v.Type.Size() < 2 {
 			// amd64 doesn't support CMOV with byte registers
 			return false
 		}
 		return true
 	default:
 		return false
 	}
 }

 // elimIf converts the one-way branch starting at dom in f to a conditional move if possible.
 // loadAddr is a set of values which are used to compute the address of a load.
 // Those values are exempt from CMOV generation.
 func elimIf(f *Func, loadAddr *sparseSet, dom *Block) bool {
 	// See if dom is an If with one arm that
 	// is trivial and succeeded by the other
 	// successor of dom.
 	if dom.Kind != BlockIf || dom.Likely != BranchUnknown {
 		return false
 	}
 	var simple, post *Block
 	for i := range dom.Succs {
 		bb, other := dom.Succs[i].Block(), dom.Succs[i^1].Block()
 		if isLeafPlain(bb) && bb.Succs[0].Block() == other {
 			simple = bb
 			post = other
 			break
 		}
 	}
 	if simple == nil || len(post.Preds) != 2 || post == dom {
 		return false
 	}

 	// We've found our diamond CFG of blocks.
 	// Now decide if fusing 'simple' into dom+post
 	// looks profitable.

 	// Check that there are Phis, and that all of them
 	// can be safely rewritten to CondSelect.
 	hasphis := false
 	for _, v := range post.Values {
 		if v.Op == OpPhi {
 			hasphis = true
 			if !canCondSelect(v, f.Config.arch, loadAddr) {
 				return false
 			}
 		}
 	}
 	if !hasphis {
 		return false
 	}

 	// Pick some upper bound for the number of instructions
 	// we'd be willing to execute just to generate a dead
 	// argument to CondSelect. In the worst case, this is
 	// the number of useless instructions executed.
 	const maxfuseinsts = 2

 	if len(simple.Values) > maxfuseinsts || !canSpeculativelyExecute(simple) {
 		return false
 	}

 	// Replace Phi instructions in b with CondSelect instructions
 	swap := (post.Preds[0].Block() == dom) != (dom.Succs[0].Block() == post)
 	for _, v := range post.Values {
 		if v.Op != OpPhi {
 			continue
 		}
 		v.Op = OpCondSelect
 		if swap {
 			v.Args[0], v.Args[1] = v.Args[1], v.Args[0]
 		}
 		v.AddArg(dom.Controls[0])
 	}

 	// Put all of the instructions into 'dom'
 	// and update the CFG appropriately.
 	dom.Kind = post.Kind
 	dom.CopyControls(post)
 	dom.Aux = post.Aux
 	dom.Succs = append(dom.Succs[:0], post.Succs...)
 	for i := range dom.Succs {
 		e := dom.Succs[i]
 		e.b.Preds[e.i].b = dom
 	}

 	// Try really hard to preserve statement marks attached to blocks.
 	simplePos := simple.Pos
 	postPos := post.Pos
 	simpleStmt := simplePos.IsStmt() == src.PosIsStmt
 	postStmt := postPos.IsStmt() == src.PosIsStmt

 	for _, v := range simple.Values {
 		v.Block = dom
 	}
 	for _, v := range post.Values {
 		v.Block = dom
 	}

 	// findBlockPos determines if b contains a stmt-marked value
 	// that has the same line number as the Pos for b itself.
 	// (i.e. is the position on b actually redundant?)
 	findBlockPos := func(b *Block) bool {
 		pos := b.Pos
 		for _, v := range b.Values {
 			// See if there is a stmt-marked value already that matches simple.Pos (and perhaps post.Pos)
 			if pos.SameFileAndLine(v.Pos) && v.Pos.IsStmt() == src.PosIsStmt {
 				return true
 			}
 		}
 		return false
 	}
 	if simpleStmt {
 		simpleStmt = !findBlockPos(simple)
 		if !simpleStmt && simplePos.SameFileAndLine(postPos) {
 			postStmt = false
 		}

 	}
 	if postStmt {
 		postStmt = !findBlockPos(post)
 	}

 	// If simpleStmt and/or postStmt are still true, then try harder
 	// to find the corresponding statement marks new homes.

 	// setBlockPos determines if b contains a can-be-statement value
 	// that has the same line number as the Pos for b itself, and
 	// puts a statement mark on it, and returns whether it succeeded
 	// in this operation.
 	setBlockPos := func(b *Block) bool {
 		pos := b.Pos
 		for _, v := range b.Values {
 			if pos.SameFileAndLine(v.Pos) && !isPoorStatementOp(v.Op) {
 				v.Pos = v.Pos.WithIsStmt()
 				return true
 			}
 		}
 		return false
 	}
 	// If necessary and possible, add a mark to a value in simple
 	if simpleStmt {
 		if setBlockPos(simple) && simplePos.SameFileAndLine(postPos) {
 			postStmt = false
 		}
 	}
 	// If necessary and possible, add a mark to a value in post
 	if postStmt {
 		postStmt = !setBlockPos(post)
 	}

 	// Before giving up (this was added because it helps), try the end of "dom", and if that is not available,
 	// try the values in the successor block if it is uncomplicated.
 	if postStmt {
 		if dom.Pos.IsStmt() != src.PosIsStmt {
 			dom.Pos = postPos
 		} else {
 			// Try the successor block
 			if len(dom.Succs) == 1 && len(dom.Succs[0].Block().Preds) == 1 {
 				succ := dom.Succs[0].Block()
 				for _, v := range succ.Values {
 					if isPoorStatementOp(v.Op) {
 						continue
 					}
 					if postPos.SameFileAndLine(v.Pos) {
 						v.Pos = v.Pos.WithIsStmt()
 					}
 					postStmt = false
 					break
 				}
 				// If postStmt still true, tag the block itself if possible
 				if postStmt && succ.Pos.IsStmt() != src.PosIsStmt {
 					succ.Pos = postPos
 				}
 			}
 		}
 	}

 	dom.Values = append(dom.Values, simple.Values...)
 	dom.Values = append(dom.Values, post.Values...)

 	// Trash 'post' and 'simple'
 	clobberBlock(post)
 	clobberBlock(simple)

 	f.invalidateCFG()
 	return true
 }

 // is this a BlockPlain with one predecessor?
 func isLeafPlain(b *Block) bool {
 	return b.Kind == BlockPlain && len(b.Preds) == 1
 }

 func clobberBlock(b *Block) {
 	b.Values = nil
 	b.Preds = nil
 	b.Succs = nil
 	b.Aux = nil
 	b.ResetControls()
 	b.Likely = BranchUnknown
 	b.Kind = BlockInvalid
 }

 // elimIfElse converts the two-way branch starting at dom in f to a conditional move if possible.
 // loadAddr is a set of values which are used to compute the address of a load.
 // Those values are exempt from CMOV generation.
 func elimIfElse(f *Func, loadAddr *sparseSet, b *Block) bool {
 	// See if 'b' ends in an if/else: it should
 	// have two successors, both of which are BlockPlain
 	// and succeeded by the same block.
 	if b.Kind != BlockIf || b.Likely != BranchUnknown {
 		return false
 	}
 	yes, no := b.Succs[0].Block(), b.Succs[1].Block()
 	if !isLeafPlain(yes) || len(yes.Values) > 1 || !canSpeculativelyExecute(yes) {
 		return false
 	}
 	if !isLeafPlain(no) || len(no.Values) > 1 || !canSpeculativelyExecute(no) {
 		return false
 	}
 	if b.Succs[0].Block().Succs[0].Block() != b.Succs[1].Block().Succs[0].Block() {
 		return false
 	}
 	// block that postdominates the if/else
 	post := b.Succs[0].Block().Succs[0].Block()
 	if len(post.Preds) != 2 || post == b {
 		return false
 	}
 	hasphis := false
 	for _, v := range post.Values {
 		if v.Op == OpPhi {
 			hasphis = true
 			if !canCondSelect(v, f.Config.arch, loadAddr) {
 				return false
 			}
 		}
 	}
 	if !hasphis {
 		return false
 	}

 	// Don't generate CondSelects if branch is cheaper.
 	if !shouldElimIfElse(no, yes, post, f.Config.arch) {
 		return false
 	}

 	// now we're committed: rewrite each Phi as a CondSelect
 	swap := post.Preds[0].Block() != b.Succs[0].Block()
 	for _, v := range post.Values {
 		if v.Op != OpPhi {
 			continue
 		}
 		v.Op = OpCondSelect
 		if swap {
 			v.Args[0], v.Args[1] = v.Args[1], v.Args[0]
 		}
 		v.AddArg(b.Controls[0])
 	}

 	// Move the contents of all of these
 	// blocks into 'b' and update CFG edges accordingly
 	b.Kind = post.Kind
 	b.CopyControls(post)
 	b.Aux = post.Aux
 	b.Succs = append(b.Succs[:0], post.Succs...)
 	for i := range b.Succs {
 		e := b.Succs[i]
 		e.b.Preds[e.i].b = b
 	}
 	for i := range post.Values {
 		post.Values[i].Block = b
 	}
 	for i := range yes.Values {
 		yes.Values[i].Block = b
 	}
 	for i := range no.Values {
 		no.Values[i].Block = b
 	}
 	b.Values = append(b.Values, yes.Values...)
 	b.Values = append(b.Values, no.Values...)
 	b.Values = append(b.Values, post.Values...)

 	// trash post, yes, and no
 	clobberBlock(yes)
 	clobberBlock(no)
 	clobberBlock(post)

 	f.invalidateCFG()
 	return true
 }

 // shouldElimIfElse reports whether estimated cost of eliminating branch
 // is lower than threshold.
 func shouldElimIfElse(no, yes, post *Block, arch string) bool {
 	switch arch {
 	default:
 		return true
 	case "amd64":
 		const maxcost = 2
 		phi := 0
 		other := 0
 		for _, v := range post.Values {
 			if v.Op == OpPhi {
 				// Each phi results in CondSelect, which lowers into CMOV,
 				// CMOV has latency >1 on most CPUs.
 				phi++
 			}
 			for _, x := range v.Args {
 				if x.Block == no || x.Block == yes {
 					other++
 				}
 			}
 		}
 		cost := phi * 1
 		if phi > 1 {
 			// If we have more than 1 phi and some values in post have args
 			// in yes or no blocks, we may have to recalculate condition, because
 			// those args may clobber flags. For now assume that all operations clobber flags.
 			cost += other * 1
 		}
 		return cost < maxcost
 	}
 }

 // canSpeculativelyExecute reports whether every value in the block can
 // be evaluated without causing any observable side effects (memory
 // accesses, panics and so on) except for execution time changes. It
 // also ensures that the block does not contain any phis which we can't
 // speculatively execute.
 // Warning: this function cannot currently detect values that represent
 // instructions the execution of which need to be guarded with CPU
 // hardware feature checks. See issue #34950.
 func canSpeculativelyExecute(b *Block) bool {
 	// don't fuse memory ops, Phi ops, divides (can panic),
 	// or anything else with side-effects
 	for _, v := range b.Values {
 		if v.Op == OpPhi || isDivMod(v.Op) || isPtrArithmetic(v.Op) || v.Type.IsMemory() ||
 			v.MemoryArg() != nil || opcodeTable[v.Op].hasSideEffects {
 			return false
 		}
 	}
 	return true
 }

 func isDivMod(op Op) bool {
 	switch op {
 	case OpDiv8, OpDiv8u, OpDiv16, OpDiv16u,
 		OpDiv32, OpDiv32u, OpDiv64, OpDiv64u, OpDiv128u,
 		OpDiv32F, OpDiv64F,
 		OpMod8, OpMod8u, OpMod16, OpMod16u,
 		OpMod32, OpMod32u, OpMod64, OpMod64u:
 		return true
 	default:
 		return false
 	}
 }

 func isPtrArithmetic(op Op) bool {
 	// Pointer arithmetic can't be speculatively executed because the result
 	// may be an invalid pointer (if, for example, the condition is that the
 	// base pointer is not nil). See issue 56990.
 	switch op {
 	case OpOffPtr, OpAddPtr, OpSubPtr:
 		return true
 	default:
 		return false
 	}
 }
	// Copyright 2017 The Go Authors. All rights reserved.
	// Use of this source code is governed by a BSD-style
	// license that can be found in the LICENSE file.

	package ssa

	import "cmd/internal/src"

	// branchelim tries to eliminate branches by
	// generating CondSelect instructions.
	//
	// Search for basic blocks that look like
	//
	// bb0 bb0
	// \| \ / \
	// \| bb1 or bb1 bb2 <- trivial if/else blocks
	// \| / \ /
	// bb2 bb3
	//
	// where the intermediate blocks are mostly empty (with no side-effects);
	// rewrite Phis in the postdominator as CondSelects.
	func branchelim(f *Func) {
	// FIXME: add support for lowering CondSelects on more architectures
	switch f.Config.arch {
	case "arm64", "ppc64le", "ppc64", "amd64", "wasm", "loong64":
	// implemented
	default:
	return
	}

	// Find all the values used in computing the address of any load.
	// Typically these values have operations like AddPtr, Lsh64x64, etc.
	loadAddr := f.newSparseSet(f.NumValues())
	defer f.retSparseSet(loadAddr)
	for _, b := range f.Blocks {
	for _, v := range b.Values {
	switch v.Op {
	case OpLoad, OpAtomicLoad8, OpAtomicLoad32, OpAtomicLoad64, OpAtomicLoadPtr, OpAtomicLoadAcq32, OpAtomicLoadAcq64:
	loadAddr.add(v.Args[0].ID)
	case OpMove:
	loadAddr.add(v.Args[1].ID)
	}
	}
	}
	po := f.postorder()
	for {
	n := loadAddr.size()
	for _, b := range po {
	for i := len(b.Values) - 1; i >= 0; i-- {
	v := b.Values[i]
	if !loadAddr.contains(v.ID) {
	continue
	}
	for _, a := range v.Args {
	if a.Type.IsInteger() \|\| a.Type.IsPtr() \|\| a.Type.IsUnsafePtr() {
	loadAddr.add(a.ID)
	}
	}
	}
	}
	if loadAddr.size() == n {
	break
	}
	}

	change := true
	for change {
	change = false
	for _, b := range f.Blocks {
	change = elimIf(f, loadAddr, b) \|\| elimIfElse(f, loadAddr, b) \|\| change
	}
	}
	}

	func canCondSelect(v Value, arch string, loadAddr sparseSet) bool {
	if loadAddr.contains(v.ID) {
	// The result of the soon-to-be conditional move is used to compute a load address.
	// We want to avoid generating a conditional move in this case
	// because the load address would now be data-dependent on the condition.
	// Previously it would only be control-dependent on the condition, which is faster
	// if the branch predicts well (or possibly even if it doesn't, if the load will
	// be an expensive cache miss).
	// See issue #26306.
	return false
	}
	if arch == "loong64" {
	// We should not generate conditional moves if neither of the arguments is constant zero,
	// because it requires three instructions (OR, MASKEQZ, MASKNEZ) and will increase the
	// register pressure.
	if !(v.Args[0].isGenericIntConst() && v.Args[0].AuxInt == 0) &&
	!(v.Args[1].isGenericIntConst() && v.Args[1].AuxInt == 0) {
	return false
	}
	}
	// For now, stick to simple scalars that fit in registers
	switch {
	case v.Type.Size() > v.Block.Func.Config.RegSize:
	return false
	case v.Type.IsPtrShaped():
	return true
	case v.Type.IsInteger():
	if arch == "amd64" && v.Type.Size() < 2 {
	// amd64 doesn't support CMOV with byte registers
	return false
	}
	return true
	default:
	return false
	}
	}

	// elimIf converts the one-way branch starting at dom in f to a conditional move if possible.
	// loadAddr is a set of values which are used to compute the address of a load.
	// Those values are exempt from CMOV generation.
	func elimIf(f Func, loadAddr sparseSet, dom *Block) bool {
	// See if dom is an If with one arm that
	// is trivial and succeeded by the other
	// successor of dom.
	if dom.Kind != BlockIf \|\| dom.Likely != BranchUnknown {
	return false
	}
	var simple, post *Block
	for i := range dom.Succs {
	bb, other := dom.Succs[i].Block(), dom.Succs[i^1].Block()
	if isLeafPlain(bb) && bb.Succs[0].Block() == other {
	simple = bb
	post = other
	break
	}
	}
	if simple == nil \|\| len(post.Preds) != 2 \|\| post == dom {
	return false
	}

	// We've found our diamond CFG of blocks.
	// Now decide if fusing 'simple' into dom+post
	// looks profitable.

	// Check that there are Phis, and that all of them
	// can be safely rewritten to CondSelect.
	hasphis := false
	for _, v := range post.Values {
	if v.Op == OpPhi {
	hasphis = true
	if !canCondSelect(v, f.Config.arch, loadAddr) {
	return false
	}
	}
	}
	if !hasphis {
	return false
	}

	// Pick some upper bound for the number of instructions
	// we'd be willing to execute just to generate a dead
	// argument to CondSelect. In the worst case, this is
	// the number of useless instructions executed.
	const maxfuseinsts = 2

	if len(simple.Values) > maxfuseinsts \|\| !canSpeculativelyExecute(simple) {
	return false
	}

	// Replace Phi instructions in b with CondSelect instructions
	swap := (post.Preds[0].Block() == dom) != (dom.Succs[0].Block() == post)
	for _, v := range post.Values {
	if v.Op != OpPhi {
	continue
	}
	v.Op = OpCondSelect
	if swap {
	v.Args[0], v.Args[1] = v.Args[1], v.Args[0]
	}
	v.AddArg(dom.Controls[0])
	}

	// Put all of the instructions into 'dom'
	// and update the CFG appropriately.
	dom.Kind = post.Kind
	dom.CopyControls(post)
	dom.Aux = post.Aux
	dom.Succs = append(dom.Succs[:0], post.Succs...)
	for i := range dom.Succs {
	e := dom.Succs[i]
	e.b.Preds[e.i].b = dom
	}

	// Try really hard to preserve statement marks attached to blocks.
	simplePos := simple.Pos
	postPos := post.Pos
	simpleStmt := simplePos.IsStmt() == src.PosIsStmt
	postStmt := postPos.IsStmt() == src.PosIsStmt

	for _, v := range simple.Values {
	v.Block = dom
	}
	for _, v := range post.Values {
	v.Block = dom
	}

	// findBlockPos determines if b contains a stmt-marked value
	// that has the same line number as the Pos for b itself.
	// (i.e. is the position on b actually redundant?)
	findBlockPos := func(b *Block) bool {
	pos := b.Pos
	for _, v := range b.Values {
	// See if there is a stmt-marked value already that matches simple.Pos (and perhaps post.Pos)
	if pos.SameFileAndLine(v.Pos) && v.Pos.IsStmt() == src.PosIsStmt {
	return true
	}
	}
	return false
	}
	if simpleStmt {
	simpleStmt = !findBlockPos(simple)
	if !simpleStmt && simplePos.SameFileAndLine(postPos) {
	postStmt = false
	}

	}
	if postStmt {
	postStmt = !findBlockPos(post)
	}

	// If simpleStmt and/or postStmt are still true, then try harder
	// to find the corresponding statement marks new homes.

	// setBlockPos determines if b contains a can-be-statement value
	// that has the same line number as the Pos for b itself, and
	// puts a statement mark on it, and returns whether it succeeded
	// in this operation.
	setBlockPos := func(b *Block) bool {
	pos := b.Pos
	for _, v := range b.Values {
	if pos.SameFileAndLine(v.Pos) && !isPoorStatementOp(v.Op) {
	v.Pos = v.Pos.WithIsStmt()
	return true
	}
	}
	return false
	}
	// If necessary and possible, add a mark to a value in simple
	if simpleStmt {
	if setBlockPos(simple) && simplePos.SameFileAndLine(postPos) {
	postStmt = false
	}
	}
	// If necessary and possible, add a mark to a value in post
	if postStmt {
	postStmt = !setBlockPos(post)
	}

	// Before giving up (this was added because it helps), try the end of "dom", and if that is not available,
	// try the values in the successor block if it is uncomplicated.
	if postStmt {
	if dom.Pos.IsStmt() != src.PosIsStmt {
	dom.Pos = postPos
	} else {
	// Try the successor block
	if len(dom.Succs) == 1 && len(dom.Succs[0].Block().Preds) == 1 {
	succ := dom.Succs[0].Block()
	for _, v := range succ.Values {
	if isPoorStatementOp(v.Op) {
	continue
	}
	if postPos.SameFileAndLine(v.Pos) {
	v.Pos = v.Pos.WithIsStmt()
	}
	postStmt = false
	break
	}
	// If postStmt still true, tag the block itself if possible
	if postStmt && succ.Pos.IsStmt() != src.PosIsStmt {
	succ.Pos = postPos
	}
	}
	}
	}

	dom.Values = append(dom.Values, simple.Values...)
	dom.Values = append(dom.Values, post.Values...)

	// Trash 'post' and 'simple'
	clobberBlock(post)
	clobberBlock(simple)

	f.invalidateCFG()
	return true
	}

	// is this a BlockPlain with one predecessor?
	func isLeafPlain(b *Block) bool {
	return b.Kind == BlockPlain && len(b.Preds) == 1
	}

	func clobberBlock(b *Block) {
	b.Values = nil
	b.Preds = nil
	b.Succs = nil
	b.Aux = nil
	b.ResetControls()
	b.Likely = BranchUnknown
	b.Kind = BlockInvalid
	}

	// elimIfElse converts the two-way branch starting at dom in f to a conditional move if possible.
	// loadAddr is a set of values which are used to compute the address of a load.
	// Those values are exempt from CMOV generation.
	func elimIfElse(f Func, loadAddr sparseSet, b *Block) bool {
	// See if 'b' ends in an if/else: it should
	// have two successors, both of which are BlockPlain
	// and succeeded by the same block.
	if b.Kind != BlockIf \|\| b.Likely != BranchUnknown {
	return false
	}
	yes, no := b.Succs[0].Block(), b.Succs[1].Block()
	if !isLeafPlain(yes) \|\| len(yes.Values) > 1 \|\| !canSpeculativelyExecute(yes) {
	return false
	}
	if !isLeafPlain(no) \|\| len(no.Values) > 1 \|\| !canSpeculativelyExecute(no) {
	return false
	}
	if b.Succs[0].Block().Succs[0].Block() != b.Succs[1].Block().Succs[0].Block() {
	return false
	}
	// block that postdominates the if/else
	post := b.Succs[0].Block().Succs[0].Block()
	if len(post.Preds) != 2 \|\| post == b {
	return false
	}
	hasphis := false
	for _, v := range post.Values {
	if v.Op == OpPhi {
	hasphis = true
	if !canCondSelect(v, f.Config.arch, loadAddr) {
	return false
	}
	}
	}
	if !hasphis {
	return false
	}

	// Don't generate CondSelects if branch is cheaper.
	if !shouldElimIfElse(no, yes, post, f.Config.arch) {
	return false
	}

	// now we're committed: rewrite each Phi as a CondSelect
	swap := post.Preds[0].Block() != b.Succs[0].Block()
	for _, v := range post.Values {
	if v.Op != OpPhi {
	continue
	}
	v.Op = OpCondSelect
	if swap {
	v.Args[0], v.Args[1] = v.Args[1], v.Args[0]
	}
	v.AddArg(b.Controls[0])
	}

	// Move the contents of all of these
	// blocks into 'b' and update CFG edges accordingly
	b.Kind = post.Kind
	b.CopyControls(post)
	b.Aux = post.Aux
	b.Succs = append(b.Succs[:0], post.Succs...)
	for i := range b.Succs {
	e := b.Succs[i]
	e.b.Preds[e.i].b = b
	}
	for i := range post.Values {
	post.Values[i].Block = b
	}
	for i := range yes.Values {
	yes.Values[i].Block = b
	}
	for i := range no.Values {
	no.Values[i].Block = b
	}
	b.Values = append(b.Values, yes.Values...)
	b.Values = append(b.Values, no.Values...)
	b.Values = append(b.Values, post.Values...)

	// trash post, yes, and no
	clobberBlock(yes)
	clobberBlock(no)
	clobberBlock(post)

	f.invalidateCFG()
	return true
	}

	// shouldElimIfElse reports whether estimated cost of eliminating branch
	// is lower than threshold.
	func shouldElimIfElse(no, yes, post *Block, arch string) bool {
	switch arch {
	default:
	return true
	case "amd64":
	const maxcost = 2
	phi := 0
	other := 0
	for _, v := range post.Values {
	if v.Op == OpPhi {
	// Each phi results in CondSelect, which lowers into CMOV,
	// CMOV has latency >1 on most CPUs.
	phi++
	}
	for _, x := range v.Args {
	if x.Block == no \|\| x.Block == yes {
	other++
	}
	}
	}
	cost := phi * 1
	if phi > 1 {
	// If we have more than 1 phi and some values in post have args
	// in yes or no blocks, we may have to recalculate condition, because
	// those args may clobber flags. For now assume that all operations clobber flags.
	cost += other * 1
	}
	return cost < maxcost
	}
	}

	// canSpeculativelyExecute reports whether every value in the block can
	// be evaluated without causing any observable side effects (memory
	// accesses, panics and so on) except for execution time changes. It
	// also ensures that the block does not contain any phis which we can't
	// speculatively execute.
	// Warning: this function cannot currently detect values that represent
	// instructions the execution of which need to be guarded with CPU
	// hardware feature checks. See issue #34950.
	func canSpeculativelyExecute(b *Block) bool {
	// don't fuse memory ops, Phi ops, divides (can panic),
	// or anything else with side-effects
	for _, v := range b.Values {
	if v.Op == OpPhi \|\| isDivMod(v.Op) \|\| isPtrArithmetic(v.Op) \|\| v.Type.IsMemory() \|\|
	v.MemoryArg() != nil \|\| opcodeTable[v.Op].hasSideEffects {
	return false
	}
	}
	return true
	}

	func isDivMod(op Op) bool {
	switch op {
	case OpDiv8, OpDiv8u, OpDiv16, OpDiv16u,
	OpDiv32, OpDiv32u, OpDiv64, OpDiv64u, OpDiv128u,
	OpDiv32F, OpDiv64F,
	OpMod8, OpMod8u, OpMod16, OpMod16u,
	OpMod32, OpMod32u, OpMod64, OpMod64u:
	return true
	default:
	return false
	}
	}

	func isPtrArithmetic(op Op) bool {
	// Pointer arithmetic can't be speculatively executed because the result
	// may be an invalid pointer (if, for example, the condition is that the
	// base pointer is not nil). See issue 56990.
	switch op {
	case OpOffPtr, OpAddPtr, OpSubPtr:
	return true
	default:
	return false
	}
	}