cmd/compile: added some intrinsics to SSA back end
One intrinsic was needed to help get the very best
performance out of a future GC; as long as that one was
being added, I also added Bswap since that is sometimes
a handy thing to have. I had intended to fill out the
bit-scan intrinsic family, but the mismatch between the
"scan forward" instruction and "count leading zeroes"
was large enough to cause me to leave it out -- it poses
a dilemma that I'd rather dodge right now.
These intrinsics are not exposed for general use.
That's a separate issue requiring an API proposal change
( https://github.com/golang/proposal )
All intrinsics are tested, both that they are substituted
on the appropriate architecture, and that they produce the
expected result.
Change-Id: I5848037cfd97de4f75bdc33bdd89bba00af4a8ee
Reviewed-on: https://go-review.googlesource.com/20564
Reviewed-by: Keith Randall <khr@golang.org>
Run-TryBot: David Chase <drchase@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
diff --git a/src/cmd/compile/internal/amd64/prog.go b/src/cmd/compile/internal/amd64/prog.go
index 55ea7ee..91b479b 100644
--- a/src/cmd/compile/internal/amd64/prog.go
+++ b/src/cmd/compile/internal/amd64/prog.go
@@ -36,26 +36,44 @@
// NOP is an internal no-op that also stands
// for USED and SET annotations, not the Intel opcode.
- obj.ANOP: {Flags: gc.LeftRead | gc.RightWrite},
- x86.AADCL & obj.AMask: {Flags: gc.SizeL | gc.LeftRead | RightRdwr | gc.SetCarry | gc.UseCarry},
- x86.AADCQ & obj.AMask: {Flags: gc.SizeQ | gc.LeftRead | RightRdwr | gc.SetCarry | gc.UseCarry},
- x86.AADCW & obj.AMask: {Flags: gc.SizeW | gc.LeftRead | RightRdwr | gc.SetCarry | gc.UseCarry},
- x86.AADDB & obj.AMask: {Flags: gc.SizeB | gc.LeftRead | RightRdwr | gc.SetCarry},
- x86.AADDL & obj.AMask: {Flags: gc.SizeL | gc.LeftRead | RightRdwr | gc.SetCarry},
- x86.AADDW & obj.AMask: {Flags: gc.SizeW | gc.LeftRead | RightRdwr | gc.SetCarry},
- x86.AADDQ & obj.AMask: {Flags: gc.SizeQ | gc.LeftRead | RightRdwr | gc.SetCarry},
- x86.AADDSD & obj.AMask: {Flags: gc.SizeD | gc.LeftRead | RightRdwr},
- x86.AADDSS & obj.AMask: {Flags: gc.SizeF | gc.LeftRead | RightRdwr},
- x86.AANDB & obj.AMask: {Flags: gc.SizeB | gc.LeftRead | RightRdwr | gc.SetCarry},
- x86.AANDL & obj.AMask: {Flags: gc.SizeL | gc.LeftRead | RightRdwr | gc.SetCarry},
- x86.AANDQ & obj.AMask: {Flags: gc.SizeQ | gc.LeftRead | RightRdwr | gc.SetCarry},
- x86.AANDW & obj.AMask: {Flags: gc.SizeW | gc.LeftRead | RightRdwr | gc.SetCarry},
- obj.ACALL & obj.AMask: {Flags: gc.RightAddr | gc.Call | gc.KillCarry},
- x86.ACDQ & obj.AMask: {Flags: gc.OK, Reguse: AX, Regset: AX | DX},
- x86.ACQO & obj.AMask: {Flags: gc.OK, Reguse: AX, Regset: AX | DX},
- x86.ACWD & obj.AMask: {Flags: gc.OK, Reguse: AX, Regset: AX | DX},
- x86.ACLD & obj.AMask: {Flags: gc.OK},
- x86.ASTD & obj.AMask: {Flags: gc.OK},
+ obj.ANOP: {Flags: gc.LeftRead | gc.RightWrite},
+ x86.AADCL & obj.AMask: {Flags: gc.SizeL | gc.LeftRead | RightRdwr | gc.SetCarry | gc.UseCarry},
+ x86.AADCQ & obj.AMask: {Flags: gc.SizeQ | gc.LeftRead | RightRdwr | gc.SetCarry | gc.UseCarry},
+ x86.AADCW & obj.AMask: {Flags: gc.SizeW | gc.LeftRead | RightRdwr | gc.SetCarry | gc.UseCarry},
+ x86.AADDB & obj.AMask: {Flags: gc.SizeB | gc.LeftRead | RightRdwr | gc.SetCarry},
+ x86.AADDL & obj.AMask: {Flags: gc.SizeL | gc.LeftRead | RightRdwr | gc.SetCarry},
+ x86.AADDW & obj.AMask: {Flags: gc.SizeW | gc.LeftRead | RightRdwr | gc.SetCarry},
+ x86.AADDQ & obj.AMask: {Flags: gc.SizeQ | gc.LeftRead | RightRdwr | gc.SetCarry},
+ x86.AADDSD & obj.AMask: {Flags: gc.SizeD | gc.LeftRead | RightRdwr},
+ x86.AADDSS & obj.AMask: {Flags: gc.SizeF | gc.LeftRead | RightRdwr},
+ x86.AANDB & obj.AMask: {Flags: gc.SizeB | gc.LeftRead | RightRdwr | gc.SetCarry},
+ x86.AANDL & obj.AMask: {Flags: gc.SizeL | gc.LeftRead | RightRdwr | gc.SetCarry},
+ x86.AANDQ & obj.AMask: {Flags: gc.SizeQ | gc.LeftRead | RightRdwr | gc.SetCarry},
+ x86.AANDW & obj.AMask: {Flags: gc.SizeW | gc.LeftRead | RightRdwr | gc.SetCarry},
+
+ x86.ABSFL & obj.AMask: {Flags: gc.SizeL | gc.LeftRead | gc.RightWrite | gc.SetCarry},
+ x86.ABSFQ & obj.AMask: {Flags: gc.SizeQ | gc.LeftRead | gc.RightWrite | gc.SetCarry},
+ x86.ABSFW & obj.AMask: {Flags: gc.SizeW | gc.LeftRead | gc.RightWrite | gc.SetCarry},
+ x86.ABSRL & obj.AMask: {Flags: gc.SizeL | gc.LeftRead | gc.RightWrite | gc.SetCarry},
+ x86.ABSRQ & obj.AMask: {Flags: gc.SizeQ | gc.LeftRead | gc.RightWrite | gc.SetCarry},
+ x86.ABSRW & obj.AMask: {Flags: gc.SizeW | gc.LeftRead | gc.RightWrite | gc.SetCarry},
+ x86.ABSWAPL & obj.AMask: {Flags: gc.SizeL | RightRdwr},
+ x86.ABSWAPQ & obj.AMask: {Flags: gc.SizeQ | RightRdwr},
+
+ obj.ACALL & obj.AMask: {Flags: gc.RightAddr | gc.Call | gc.KillCarry},
+ x86.ACDQ & obj.AMask: {Flags: gc.OK, Reguse: AX, Regset: AX | DX},
+ x86.ACQO & obj.AMask: {Flags: gc.OK, Reguse: AX, Regset: AX | DX},
+ x86.ACWD & obj.AMask: {Flags: gc.OK, Reguse: AX, Regset: AX | DX},
+ x86.ACLD & obj.AMask: {Flags: gc.OK},
+ x86.ASTD & obj.AMask: {Flags: gc.OK},
+
+ x86.ACMOVLEQ & obj.AMask: {Flags: gc.SizeL | gc.LeftRead | RightRdwr | gc.UseCarry},
+ x86.ACMOVLNE & obj.AMask: {Flags: gc.SizeL | gc.LeftRead | RightRdwr | gc.UseCarry},
+ x86.ACMOVQEQ & obj.AMask: {Flags: gc.SizeQ | gc.LeftRead | RightRdwr | gc.UseCarry},
+ x86.ACMOVQNE & obj.AMask: {Flags: gc.SizeQ | gc.LeftRead | RightRdwr | gc.UseCarry},
+ x86.ACMOVWEQ & obj.AMask: {Flags: gc.SizeW | gc.LeftRead | RightRdwr | gc.UseCarry},
+ x86.ACMOVWNE & obj.AMask: {Flags: gc.SizeW | gc.LeftRead | RightRdwr | gc.UseCarry},
+
x86.ACMPB & obj.AMask: {Flags: gc.SizeB | gc.LeftRead | gc.RightRead | gc.SetCarry},
x86.ACMPL & obj.AMask: {Flags: gc.SizeL | gc.LeftRead | gc.RightRead | gc.SetCarry},
x86.ACMPQ & obj.AMask: {Flags: gc.SizeQ | gc.LeftRead | gc.RightRead | gc.SetCarry},
diff --git a/src/cmd/compile/internal/amd64/ssa.go b/src/cmd/compile/internal/amd64/ssa.go
index 307ba28..dfacff6 100644
--- a/src/cmd/compile/internal/amd64/ssa.go
+++ b/src/cmd/compile/internal/amd64/ssa.go
@@ -477,6 +477,33 @@
p.From.Offset = v.AuxInt2Int64()
p.To.Type = obj.TYPE_REG
p.To.Reg = r
+
+ case ssa.OpAMD64CMOVQEQconst, ssa.OpAMD64CMOVLEQconst, ssa.OpAMD64CMOVWEQconst,
+ ssa.OpAMD64CMOVQNEconst, ssa.OpAMD64CMOVLNEconst, ssa.OpAMD64CMOVWNEconst:
+ r := gc.SSARegNum(v)
+ x := gc.SSARegNum(v.Args[0])
+ // Arg0 is in/out, move in to out if not already same
+ if r != x {
+ p := gc.Prog(moveByType(v.Type))
+ p.From.Type = obj.TYPE_REG
+ p.From.Reg = x
+ p.To.Type = obj.TYPE_REG
+ p.To.Reg = r
+ }
+
+ // Constant into AX, after arg0 movement in case arg0 is in AX
+ p := gc.Prog(moveByType(v.Type))
+ p.From.Type = obj.TYPE_CONST
+ p.From.Offset = v.AuxInt2Int64()
+ p.To.Type = obj.TYPE_REG
+ p.To.Reg = x86.REG_AX
+
+ p = gc.Prog(v.Op.Asm())
+ p.From.Type = obj.TYPE_REG
+ p.From.Reg = x86.REG_AX
+ p.To.Type = obj.TYPE_REG
+ p.To.Reg = r
+
case ssa.OpAMD64MULQconst, ssa.OpAMD64MULLconst, ssa.OpAMD64MULWconst, ssa.OpAMD64MULBconst:
r := gc.SSARegNum(v)
x := gc.SSARegNum(v.Args[0])
@@ -955,6 +982,7 @@
gc.Maxarg = v.AuxInt
}
case ssa.OpAMD64NEGQ, ssa.OpAMD64NEGL, ssa.OpAMD64NEGW, ssa.OpAMD64NEGB,
+ ssa.OpAMD64BSWAPQ, ssa.OpAMD64BSWAPL,
ssa.OpAMD64NOTQ, ssa.OpAMD64NOTL, ssa.OpAMD64NOTW, ssa.OpAMD64NOTB:
x := gc.SSARegNum(v.Args[0])
r := gc.SSARegNum(v)
@@ -968,7 +996,9 @@
p := gc.Prog(v.Op.Asm())
p.To.Type = obj.TYPE_REG
p.To.Reg = r
- case ssa.OpAMD64SQRTSD:
+ case ssa.OpAMD64BSFQ, ssa.OpAMD64BSFL, ssa.OpAMD64BSFW,
+ ssa.OpAMD64BSRQ, ssa.OpAMD64BSRL, ssa.OpAMD64BSRW,
+ ssa.OpAMD64SQRTSD:
p := gc.Prog(v.Op.Asm())
p.From.Type = obj.TYPE_REG
p.From.Reg = gc.SSARegNum(v.Args[0])
@@ -1008,9 +1038,9 @@
opregreg(x86.AANDL, gc.SSARegNum(v), x86.REG_AX)
case ssa.OpAMD64InvertFlags:
- v.Fatalf("InvertFlags should never make it to codegen %v", v)
+ v.Fatalf("InvertFlags should never make it to codegen %v", v.LongString())
case ssa.OpAMD64FlagEQ, ssa.OpAMD64FlagLT_ULT, ssa.OpAMD64FlagLT_UGT, ssa.OpAMD64FlagGT_ULT, ssa.OpAMD64FlagGT_UGT:
- v.Fatalf("Flag* ops should never make it to codegen %v", v)
+ v.Fatalf("Flag* ops should never make it to codegen %v", v.LongString())
case ssa.OpAMD64REPSTOSQ:
gc.Prog(x86.AREP)
gc.Prog(x86.ASTOSQ)
diff --git a/src/cmd/compile/internal/gc/go.go b/src/cmd/compile/internal/gc/go.go
index 78c177e..448a0fd 100644
--- a/src/cmd/compile/internal/gc/go.go
+++ b/src/cmd/compile/internal/gc/go.go
@@ -55,8 +55,8 @@
}
type Pkg struct {
- Name string // package name
- Path string // string literal used in import statement
+ Name string // package name, e.g. "sys"
+ Path string // string literal used in import statement, e.g. "runtime/internal/sys"
Pathsym *Sym
Prefix string // escaped path for use in symbol table
Imported bool // export data of this package was parsed
@@ -469,6 +469,9 @@
// Set, use, or kill of carry bit.
// Kill means we never look at the carry bit after this kind of instruction.
+ // Originally for understanding ADC, RCR, and so on, but now also
+ // tracks set, use, and kill of the zero and overflow bits as well.
+ // TODO rename to {Set,Use,Kill}Flags
SetCarry = 1 << 24
UseCarry = 1 << 25
KillCarry = 1 << 26
diff --git a/src/cmd/compile/internal/gc/inl.go b/src/cmd/compile/internal/gc/inl.go
index ff0791c..e25ce13 100644
--- a/src/cmd/compile/internal/gc/inl.go
+++ b/src/cmd/compile/internal/gc/inl.go
@@ -453,7 +453,7 @@
if Debug['m'] > 3 {
fmt.Printf("%v:call to func %v\n", n.Line(), Nconv(n.Left, FmtSign))
}
- if n.Left.Func != nil && len(n.Left.Func.Inl.Slice()) != 0 { // normal case
+ if n.Left.Func != nil && len(n.Left.Func.Inl.Slice()) != 0 && !isIntrinsicCall1(n) { // normal case
n = mkinlcall(n, n.Left, n.Isddd)
} else if n.Left.Op == ONAME && n.Left.Left != nil && n.Left.Left.Op == OTYPE && n.Left.Right != nil && n.Left.Right.Op == ONAME { // methods called as functions
if n.Left.Sym.Def != nil {
diff --git a/src/cmd/compile/internal/gc/ssa.go b/src/cmd/compile/internal/gc/ssa.go
index 93b820b..9b8ef20 100644
--- a/src/cmd/compile/internal/gc/ssa.go
+++ b/src/cmd/compile/internal/gc/ssa.go
@@ -2052,7 +2052,13 @@
p, l, c := s.slice(n.Left.Type, v, i, j, k)
return s.newValue3(ssa.OpSliceMake, n.Type, p, l, c)
- case OCALLFUNC, OCALLINTER, OCALLMETH:
+ case OCALLFUNC:
+ if isIntrinsicCall1(n) {
+ return s.intrinsicCall1(n)
+ }
+ fallthrough
+
+ case OCALLINTER, OCALLMETH:
a := s.call(n, callNormal)
return s.newValue2(ssa.OpLoad, n.Type, a, s.mem())
@@ -2373,6 +2379,75 @@
callGo
)
+// isSSAIntrinsic1 returns true if n is a call to a recognized 1-arg intrinsic
+// that can be handled by the SSA backend.
+// SSA uses this, but so does the front end to see if should not
+// inline a function because it is a candidate for intrinsic
+// substitution.
+func isSSAIntrinsic1(s *Sym) bool {
+ // The test below is not quite accurate -- in the event that
+ // a function is disabled on a per-function basis, for example
+ // because of hash-keyed binary failure search, SSA might be
+ // disabled for that function but it would not be noted here,
+ // and thus an inlining would not occur (in practice, inlining
+ // so far has only been noticed for Bswap32 and the 16-bit count
+ // leading/trailing instructions, but heuristics might change
+ // in the future or on different architectures).
+ if !ssaEnabled || ssa.IntrinsicsDisable || Thearch.Thechar != '6' {
+ return false
+ }
+ if s != nil && s.Pkg != nil && s.Pkg.Path == "runtime/internal/sys" {
+ switch s.Name {
+ case
+ "Ctz64", "Ctz32", "Ctz16",
+ "Bswap64", "Bswap32":
+ return true
+ }
+ }
+ return false
+}
+
+func isIntrinsicCall1(n *Node) bool {
+ if n == nil || n.Left == nil {
+ return false
+ }
+ return isSSAIntrinsic1(n.Left.Sym)
+}
+
+// intrinsicFirstArg extracts arg from n.List and eval
+func (s *state) intrinsicFirstArg(n *Node) *ssa.Value {
+ x := n.List.First()
+ if x.Op == OAS {
+ x = x.Right
+ }
+ return s.expr(x)
+}
+
+// intrinsicCall1 converts a call to a recognized 1-arg intrinsic
+// into the intrinsic
+func (s *state) intrinsicCall1(n *Node) *ssa.Value {
+ var result *ssa.Value
+ switch n.Left.Sym.Name {
+ case "Ctz64":
+ result = s.newValue1(ssa.OpCtz64, Types[TUINT64], s.intrinsicFirstArg(n))
+ case "Ctz32":
+ result = s.newValue1(ssa.OpCtz32, Types[TUINT32], s.intrinsicFirstArg(n))
+ case "Ctz16":
+ result = s.newValue1(ssa.OpCtz16, Types[TUINT16], s.intrinsicFirstArg(n))
+ case "Bswap64":
+ result = s.newValue1(ssa.OpBswap64, Types[TUINT64], s.intrinsicFirstArg(n))
+ case "Bswap32":
+ result = s.newValue1(ssa.OpBswap32, Types[TUINT32], s.intrinsicFirstArg(n))
+ }
+ if result == nil {
+ Fatalf("Unknown special call: %v", n.Left.Sym)
+ }
+ if ssa.IntrinsicsDebug > 0 {
+ Warnl(n.Lineno, "intrinsic substitution for %v with %s", n.Left.Sym.Name, result.LongString())
+ }
+ return result
+}
+
// Calls the function n using the specified call type.
// Returns the address of the return value (or nil if none).
func (s *state) call(n *Node, k callKind) *ssa.Value {
diff --git a/src/cmd/compile/internal/ssa/compile.go b/src/cmd/compile/internal/ssa/compile.go
index b8e2b42..d6c2bf8 100644
--- a/src/cmd/compile/internal/ssa/compile.go
+++ b/src/cmd/compile/internal/ssa/compile.go
@@ -120,6 +120,10 @@
// Run consistency checker between each phase
var checkEnabled = false
+// Debug output
+var IntrinsicsDebug int
+var IntrinsicsDisable bool
+
// PhaseOption sets the specified flag in the specified ssa phase,
// returning empty string if this was successful or a string explaining
// the error if it was not.
@@ -157,6 +161,20 @@
}
}
+ if phase == "intrinsics" {
+ switch flag {
+ case "on":
+ IntrinsicsDisable = val == 0
+ case "off":
+ IntrinsicsDisable = val != 0
+ case "debug":
+ IntrinsicsDebug = val
+ default:
+ return fmt.Sprintf("Did not find a flag matching %s in -d=ssa/%s debug option", flag, phase)
+ }
+ return ""
+ }
+
underphase := strings.Replace(phase, "_", " ", -1)
var re *regexp.Regexp
if phase[0] == '~' {
diff --git a/src/cmd/compile/internal/ssa/gen/AMD64.rules b/src/cmd/compile/internal/ssa/gen/AMD64.rules
index b595912..cc21097 100644
--- a/src/cmd/compile/internal/ssa/gen/AMD64.rules
+++ b/src/cmd/compile/internal/ssa/gen/AMD64.rules
@@ -92,6 +92,38 @@
(Com16 x) -> (NOTW x)
(Com8 x) -> (NOTB x)
+// CMPQconst 0 below is redundant because BSF sets Z but how to remove?
+(Ctz64 <t> x) -> (CMOVQEQconst (BSFQ <t> x) (CMPQconst x [0]) [64])
+(Ctz32 <t> x) -> (CMOVLEQconst (BSFL <t> x) (CMPLconst x [0]) [32])
+(Ctz16 <t> x) -> (CMOVWEQconst (BSFW <t> x) (CMPWconst x [0]) [16])
+
+(CMOVQEQconst x (InvertFlags y) [c]) -> (CMOVQNEconst x y [c])
+(CMOVLEQconst x (InvertFlags y) [c]) -> (CMOVLNEconst x y [c])
+(CMOVWEQconst x (InvertFlags y) [c]) -> (CMOVWNEconst x y [c])
+
+(CMOVQEQconst _ (FlagEQ) [c]) -> (Const64 [c])
+(CMOVLEQconst _ (FlagEQ) [c]) -> (Const32 [c])
+(CMOVWEQconst _ (FlagEQ) [c]) -> (Const16 [c])
+
+(CMOVQEQconst x (FlagLT_ULT)) -> x
+(CMOVLEQconst x (FlagLT_ULT)) -> x
+(CMOVWEQconst x (FlagLT_ULT)) -> x
+
+(CMOVQEQconst x (FlagLT_UGT)) -> x
+(CMOVLEQconst x (FlagLT_UGT)) -> x
+(CMOVWEQconst x (FlagLT_UGT)) -> x
+
+(CMOVQEQconst x (FlagGT_ULT)) -> x
+(CMOVLEQconst x (FlagGT_ULT)) -> x
+(CMOVWEQconst x (FlagGT_ULT)) -> x
+
+(CMOVQEQconst x (FlagGT_UGT)) -> x
+(CMOVLEQconst x (FlagGT_UGT)) -> x
+(CMOVWEQconst x (FlagGT_UGT)) -> x
+
+(Bswap64 x) -> (BSWAPQ x)
+(Bswap32 x) -> (BSWAPL x)
+
(Sqrt x) -> (SQRTSD x)
// Note: we always extend to 64 bits even though some ops don't need that many result bits.
diff --git a/src/cmd/compile/internal/ssa/gen/AMD64Ops.go b/src/cmd/compile/internal/ssa/gen/AMD64Ops.go
index 116e3ff..9dc09aa 100644
--- a/src/cmd/compile/internal/ssa/gen/AMD64Ops.go
+++ b/src/cmd/compile/internal/ssa/gen/AMD64Ops.go
@@ -103,9 +103,13 @@
gp11mod = regInfo{inputs: []regMask{ax, gpsp &^ dx}, outputs: []regMask{dx},
clobbers: ax | flags}
- gp2flags = regInfo{inputs: []regMask{gpsp, gpsp}, outputs: flagsonly}
- gp1flags = regInfo{inputs: []regMask{gpsp}, outputs: flagsonly}
- flagsgp = regInfo{inputs: flagsonly, outputs: gponly}
+ gp2flags = regInfo{inputs: []regMask{gpsp, gpsp}, outputs: flagsonly}
+ gp1flags = regInfo{inputs: []regMask{gpsp}, outputs: flagsonly}
+ flagsgp = regInfo{inputs: flagsonly, outputs: gponly}
+
+ // for CMOVconst -- uses AX to hold constant temporary. AX input is moved before temp.
+ gp1flagsgp = regInfo{inputs: []regMask{gp, flags}, clobbers: ax | flags, outputs: []regMask{gp &^ ax}}
+
readflags = regInfo{inputs: flagsonly, outputs: gponly}
flagsgpax = regInfo{inputs: flagsonly, clobbers: ax | flags, outputs: []regMask{gp &^ ax}}
@@ -307,6 +311,25 @@
{name: "NOTW", argLength: 1, reg: gp11, asm: "NOTL", resultInArg0: true}, // ^arg0
{name: "NOTB", argLength: 1, reg: gp11, asm: "NOTL", resultInArg0: true}, // ^arg0
+ {name: "BSFQ", argLength: 1, reg: gp11, asm: "BSFQ"}, // arg0 # of low-order zeroes ; undef if zero
+ {name: "BSFL", argLength: 1, reg: gp11, asm: "BSFL"}, // arg0 # of low-order zeroes ; undef if zero
+ {name: "BSFW", argLength: 1, reg: gp11, asm: "BSFW"}, // arg0 # of low-order zeroes ; undef if zero
+
+ {name: "BSRQ", argLength: 1, reg: gp11, asm: "BSRQ"}, // arg0 # of high-order zeroes ; undef if zero
+ {name: "BSRL", argLength: 1, reg: gp11, asm: "BSRL"}, // arg0 # of high-order zeroes ; undef if zero
+ {name: "BSRW", argLength: 1, reg: gp11, asm: "BSRW"}, // arg0 # of high-order zeroes ; undef if zero
+
+ // Note ASM for ops moves whole register
+ {name: "CMOVQEQconst", argLength: 2, reg: gp1flagsgp, asm: "CMOVQEQ", typ: "UInt64", aux: "Int64", resultInArg0: true}, // replace arg0 w/ constant if Z set
+ {name: "CMOVLEQconst", argLength: 2, reg: gp1flagsgp, asm: "CMOVLEQ", typ: "UInt32", aux: "Int32", resultInArg0: true}, // replace arg0 w/ constant if Z set
+ {name: "CMOVWEQconst", argLength: 2, reg: gp1flagsgp, asm: "CMOVLEQ", typ: "UInt16", aux: "Int16", resultInArg0: true}, // replace arg0 w/ constant if Z set
+ {name: "CMOVQNEconst", argLength: 2, reg: gp1flagsgp, asm: "CMOVQNE", typ: "UInt64", aux: "Int64", resultInArg0: true}, // replace arg0 w/ constant if Z not set
+ {name: "CMOVLNEconst", argLength: 2, reg: gp1flagsgp, asm: "CMOVLNE", typ: "UInt32", aux: "Int32", resultInArg0: true}, // replace arg0 w/ constant if Z not set
+ {name: "CMOVWNEconst", argLength: 2, reg: gp1flagsgp, asm: "CMOVLNE", typ: "UInt16", aux: "Int16", resultInArg0: true}, // replace arg0 w/ constant if Z not set
+
+ {name: "BSWAPQ", argLength: 1, reg: gp11, asm: "BSWAPQ", resultInArg0: true}, // arg0 swap bytes
+ {name: "BSWAPL", argLength: 1, reg: gp11, asm: "BSWAPL", resultInArg0: true}, // arg0 swap bytes
+
{name: "SQRTSD", argLength: 1, reg: fp11, asm: "SQRTSD"}, // sqrt(arg0)
{name: "SBBQcarrymask", argLength: 1, reg: flagsgp, asm: "SBBQ"}, // (int64)(-1) if carry is set, 0 if carry is clear.
diff --git a/src/cmd/compile/internal/ssa/gen/genericOps.go b/src/cmd/compile/internal/ssa/gen/genericOps.go
index ab5e335..6d92926 100644
--- a/src/cmd/compile/internal/ssa/gen/genericOps.go
+++ b/src/cmd/compile/internal/ssa/gen/genericOps.go
@@ -237,6 +237,17 @@
{name: "Com32", argLength: 1},
{name: "Com64", argLength: 1},
+ {name: "Ctz16", argLength: 1}, // Count trailing (low order) zeroes (returns 0-16)
+ {name: "Ctz32", argLength: 1}, // Count trailing zeroes (returns 0-32)
+ {name: "Ctz64", argLength: 1}, // Count trailing zeroes (returns 0-64)
+
+ {name: "Clz16", argLength: 1}, // Count leading (high order) zeroes (returns 0-16)
+ {name: "Clz32", argLength: 1}, // Count leading zeroes (returns 0-32)
+ {name: "Clz64", argLength: 1}, // Count leading zeroes (returns 0-64)
+
+ {name: "Bswap32", argLength: 1}, // Swap bytes
+ {name: "Bswap64", argLength: 1}, // Swap bytes
+
{name: "Sqrt", argLength: 1}, // sqrt(arg0), float64 only
// Data movement, max argument length for Phi is indefinite so just pick
diff --git a/src/cmd/compile/internal/ssa/opGen.go b/src/cmd/compile/internal/ssa/opGen.go
index 3ff2b5a..e76efd4 100644
--- a/src/cmd/compile/internal/ssa/opGen.go
+++ b/src/cmd/compile/internal/ssa/opGen.go
@@ -237,6 +237,20 @@
OpAMD64NOTL
OpAMD64NOTW
OpAMD64NOTB
+ OpAMD64BSFQ
+ OpAMD64BSFL
+ OpAMD64BSFW
+ OpAMD64BSRQ
+ OpAMD64BSRL
+ OpAMD64BSRW
+ OpAMD64CMOVQEQconst
+ OpAMD64CMOVLEQconst
+ OpAMD64CMOVWEQconst
+ OpAMD64CMOVQNEconst
+ OpAMD64CMOVLNEconst
+ OpAMD64CMOVWNEconst
+ OpAMD64BSWAPQ
+ OpAMD64BSWAPL
OpAMD64SQRTSD
OpAMD64SBBQcarrymask
OpAMD64SBBLcarrymask
@@ -521,6 +535,14 @@
OpCom16
OpCom32
OpCom64
+ OpCtz16
+ OpCtz32
+ OpCtz64
+ OpClz16
+ OpClz32
+ OpClz64
+ OpBswap32
+ OpBswap64
OpSqrt
OpPhi
OpCopy
@@ -2804,6 +2826,222 @@
},
},
{
+ name: "BSFQ",
+ argLen: 1,
+ asm: x86.ABSFQ,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, 65535}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+ },
+ clobbers: 8589934592, // FLAGS
+ outputs: []regMask{
+ 65519, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+ },
+ },
+ },
+ {
+ name: "BSFL",
+ argLen: 1,
+ asm: x86.ABSFL,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, 65535}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+ },
+ clobbers: 8589934592, // FLAGS
+ outputs: []regMask{
+ 65519, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+ },
+ },
+ },
+ {
+ name: "BSFW",
+ argLen: 1,
+ asm: x86.ABSFW,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, 65535}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+ },
+ clobbers: 8589934592, // FLAGS
+ outputs: []regMask{
+ 65519, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+ },
+ },
+ },
+ {
+ name: "BSRQ",
+ argLen: 1,
+ asm: x86.ABSRQ,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, 65535}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+ },
+ clobbers: 8589934592, // FLAGS
+ outputs: []regMask{
+ 65519, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+ },
+ },
+ },
+ {
+ name: "BSRL",
+ argLen: 1,
+ asm: x86.ABSRL,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, 65535}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+ },
+ clobbers: 8589934592, // FLAGS
+ outputs: []regMask{
+ 65519, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+ },
+ },
+ },
+ {
+ name: "BSRW",
+ argLen: 1,
+ asm: x86.ABSRW,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, 65535}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+ },
+ clobbers: 8589934592, // FLAGS
+ outputs: []regMask{
+ 65519, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+ },
+ },
+ },
+ {
+ name: "CMOVQEQconst",
+ auxType: auxInt64,
+ argLen: 2,
+ resultInArg0: true,
+ asm: x86.ACMOVQEQ,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {1, 8589934592}, // FLAGS
+ {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+ },
+ clobbers: 8589934593, // AX FLAGS
+ outputs: []regMask{
+ 65518, // CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+ },
+ },
+ },
+ {
+ name: "CMOVLEQconst",
+ auxType: auxInt32,
+ argLen: 2,
+ resultInArg0: true,
+ asm: x86.ACMOVLEQ,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {1, 8589934592}, // FLAGS
+ {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+ },
+ clobbers: 8589934593, // AX FLAGS
+ outputs: []regMask{
+ 65518, // CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+ },
+ },
+ },
+ {
+ name: "CMOVWEQconst",
+ auxType: auxInt16,
+ argLen: 2,
+ resultInArg0: true,
+ asm: x86.ACMOVLEQ,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {1, 8589934592}, // FLAGS
+ {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+ },
+ clobbers: 8589934593, // AX FLAGS
+ outputs: []regMask{
+ 65518, // CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+ },
+ },
+ },
+ {
+ name: "CMOVQNEconst",
+ auxType: auxInt64,
+ argLen: 2,
+ resultInArg0: true,
+ asm: x86.ACMOVQNE,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {1, 8589934592}, // FLAGS
+ {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+ },
+ clobbers: 8589934593, // AX FLAGS
+ outputs: []regMask{
+ 65518, // CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+ },
+ },
+ },
+ {
+ name: "CMOVLNEconst",
+ auxType: auxInt32,
+ argLen: 2,
+ resultInArg0: true,
+ asm: x86.ACMOVLNE,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {1, 8589934592}, // FLAGS
+ {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+ },
+ clobbers: 8589934593, // AX FLAGS
+ outputs: []regMask{
+ 65518, // CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+ },
+ },
+ },
+ {
+ name: "CMOVWNEconst",
+ auxType: auxInt16,
+ argLen: 2,
+ resultInArg0: true,
+ asm: x86.ACMOVLNE,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {1, 8589934592}, // FLAGS
+ {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+ },
+ clobbers: 8589934593, // AX FLAGS
+ outputs: []regMask{
+ 65518, // CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+ },
+ },
+ },
+ {
+ name: "BSWAPQ",
+ argLen: 1,
+ resultInArg0: true,
+ asm: x86.ABSWAPQ,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, 65535}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+ },
+ clobbers: 8589934592, // FLAGS
+ outputs: []regMask{
+ 65519, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+ },
+ },
+ },
+ {
+ name: "BSWAPL",
+ argLen: 1,
+ resultInArg0: true,
+ asm: x86.ABSWAPL,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, 65535}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+ },
+ clobbers: 8589934592, // FLAGS
+ outputs: []regMask{
+ 65519, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+ },
+ },
+ },
+ {
name: "SQRTSD",
argLen: 1,
asm: x86.ASQRTSD,
@@ -4982,6 +5220,46 @@
generic: true,
},
{
+ name: "Ctz16",
+ argLen: 1,
+ generic: true,
+ },
+ {
+ name: "Ctz32",
+ argLen: 1,
+ generic: true,
+ },
+ {
+ name: "Ctz64",
+ argLen: 1,
+ generic: true,
+ },
+ {
+ name: "Clz16",
+ argLen: 1,
+ generic: true,
+ },
+ {
+ name: "Clz32",
+ argLen: 1,
+ generic: true,
+ },
+ {
+ name: "Clz64",
+ argLen: 1,
+ generic: true,
+ },
+ {
+ name: "Bswap32",
+ argLen: 1,
+ generic: true,
+ },
+ {
+ name: "Bswap64",
+ argLen: 1,
+ generic: true,
+ },
+ {
name: "Sqrt",
argLen: 1,
generic: true,
diff --git a/src/cmd/compile/internal/ssa/rewriteAMD64.go b/src/cmd/compile/internal/ssa/rewriteAMD64.go
index 0469738..8dd1b15 100644
--- a/src/cmd/compile/internal/ssa/rewriteAMD64.go
+++ b/src/cmd/compile/internal/ssa/rewriteAMD64.go
@@ -66,6 +66,16 @@
return rewriteValueAMD64_OpAnd8(v, config)
case OpAvg64u:
return rewriteValueAMD64_OpAvg64u(v, config)
+ case OpBswap32:
+ return rewriteValueAMD64_OpBswap32(v, config)
+ case OpBswap64:
+ return rewriteValueAMD64_OpBswap64(v, config)
+ case OpAMD64CMOVLEQconst:
+ return rewriteValueAMD64_OpAMD64CMOVLEQconst(v, config)
+ case OpAMD64CMOVQEQconst:
+ return rewriteValueAMD64_OpAMD64CMOVQEQconst(v, config)
+ case OpAMD64CMOVWEQconst:
+ return rewriteValueAMD64_OpAMD64CMOVWEQconst(v, config)
case OpAMD64CMPB:
return rewriteValueAMD64_OpAMD64CMPB(v, config)
case OpAMD64CMPBconst:
@@ -110,6 +120,12 @@
return rewriteValueAMD64_OpConstNil(v, config)
case OpConvert:
return rewriteValueAMD64_OpConvert(v, config)
+ case OpCtz16:
+ return rewriteValueAMD64_OpCtz16(v, config)
+ case OpCtz32:
+ return rewriteValueAMD64_OpCtz32(v, config)
+ case OpCtz64:
+ return rewriteValueAMD64_OpCtz64(v, config)
case OpCvt32Fto32:
return rewriteValueAMD64_OpCvt32Fto32(v, config)
case OpCvt32Fto64:
@@ -2119,6 +2135,307 @@
}
return false
}
+func rewriteValueAMD64_OpBswap32(v *Value, config *Config) bool {
+ b := v.Block
+ _ = b
+ // match: (Bswap32 x)
+ // cond:
+ // result: (BSWAPL x)
+ for {
+ x := v.Args[0]
+ v.reset(OpAMD64BSWAPL)
+ v.AddArg(x)
+ return true
+ }
+ return false
+}
+func rewriteValueAMD64_OpBswap64(v *Value, config *Config) bool {
+ b := v.Block
+ _ = b
+ // match: (Bswap64 x)
+ // cond:
+ // result: (BSWAPQ x)
+ for {
+ x := v.Args[0]
+ v.reset(OpAMD64BSWAPQ)
+ v.AddArg(x)
+ return true
+ }
+ return false
+}
+func rewriteValueAMD64_OpAMD64CMOVLEQconst(v *Value, config *Config) bool {
+ b := v.Block
+ _ = b
+ // match: (CMOVLEQconst x (InvertFlags y) [c])
+ // cond:
+ // result: (CMOVLNEconst x y [c])
+ for {
+ x := v.Args[0]
+ v_1 := v.Args[1]
+ if v_1.Op != OpAMD64InvertFlags {
+ break
+ }
+ y := v_1.Args[0]
+ c := v.AuxInt
+ v.reset(OpAMD64CMOVLNEconst)
+ v.AddArg(x)
+ v.AddArg(y)
+ v.AuxInt = c
+ return true
+ }
+ // match: (CMOVLEQconst _ (FlagEQ) [c])
+ // cond:
+ // result: (Const32 [c])
+ for {
+ v_1 := v.Args[1]
+ if v_1.Op != OpAMD64FlagEQ {
+ break
+ }
+ c := v.AuxInt
+ v.reset(OpConst32)
+ v.AuxInt = c
+ return true
+ }
+ // match: (CMOVLEQconst x (FlagLT_ULT))
+ // cond:
+ // result: x
+ for {
+ x := v.Args[0]
+ v_1 := v.Args[1]
+ if v_1.Op != OpAMD64FlagLT_ULT {
+ break
+ }
+ v.reset(OpCopy)
+ v.Type = x.Type
+ v.AddArg(x)
+ return true
+ }
+ // match: (CMOVLEQconst x (FlagLT_UGT))
+ // cond:
+ // result: x
+ for {
+ x := v.Args[0]
+ v_1 := v.Args[1]
+ if v_1.Op != OpAMD64FlagLT_UGT {
+ break
+ }
+ v.reset(OpCopy)
+ v.Type = x.Type
+ v.AddArg(x)
+ return true
+ }
+ // match: (CMOVLEQconst x (FlagGT_ULT))
+ // cond:
+ // result: x
+ for {
+ x := v.Args[0]
+ v_1 := v.Args[1]
+ if v_1.Op != OpAMD64FlagGT_ULT {
+ break
+ }
+ v.reset(OpCopy)
+ v.Type = x.Type
+ v.AddArg(x)
+ return true
+ }
+ // match: (CMOVLEQconst x (FlagGT_UGT))
+ // cond:
+ // result: x
+ for {
+ x := v.Args[0]
+ v_1 := v.Args[1]
+ if v_1.Op != OpAMD64FlagGT_UGT {
+ break
+ }
+ v.reset(OpCopy)
+ v.Type = x.Type
+ v.AddArg(x)
+ return true
+ }
+ return false
+}
+func rewriteValueAMD64_OpAMD64CMOVQEQconst(v *Value, config *Config) bool {
+ b := v.Block
+ _ = b
+ // match: (CMOVQEQconst x (InvertFlags y) [c])
+ // cond:
+ // result: (CMOVQNEconst x y [c])
+ for {
+ x := v.Args[0]
+ v_1 := v.Args[1]
+ if v_1.Op != OpAMD64InvertFlags {
+ break
+ }
+ y := v_1.Args[0]
+ c := v.AuxInt
+ v.reset(OpAMD64CMOVQNEconst)
+ v.AddArg(x)
+ v.AddArg(y)
+ v.AuxInt = c
+ return true
+ }
+ // match: (CMOVQEQconst _ (FlagEQ) [c])
+ // cond:
+ // result: (Const64 [c])
+ for {
+ v_1 := v.Args[1]
+ if v_1.Op != OpAMD64FlagEQ {
+ break
+ }
+ c := v.AuxInt
+ v.reset(OpConst64)
+ v.AuxInt = c
+ return true
+ }
+ // match: (CMOVQEQconst x (FlagLT_ULT))
+ // cond:
+ // result: x
+ for {
+ x := v.Args[0]
+ v_1 := v.Args[1]
+ if v_1.Op != OpAMD64FlagLT_ULT {
+ break
+ }
+ v.reset(OpCopy)
+ v.Type = x.Type
+ v.AddArg(x)
+ return true
+ }
+ // match: (CMOVQEQconst x (FlagLT_UGT))
+ // cond:
+ // result: x
+ for {
+ x := v.Args[0]
+ v_1 := v.Args[1]
+ if v_1.Op != OpAMD64FlagLT_UGT {
+ break
+ }
+ v.reset(OpCopy)
+ v.Type = x.Type
+ v.AddArg(x)
+ return true
+ }
+ // match: (CMOVQEQconst x (FlagGT_ULT))
+ // cond:
+ // result: x
+ for {
+ x := v.Args[0]
+ v_1 := v.Args[1]
+ if v_1.Op != OpAMD64FlagGT_ULT {
+ break
+ }
+ v.reset(OpCopy)
+ v.Type = x.Type
+ v.AddArg(x)
+ return true
+ }
+ // match: (CMOVQEQconst x (FlagGT_UGT))
+ // cond:
+ // result: x
+ for {
+ x := v.Args[0]
+ v_1 := v.Args[1]
+ if v_1.Op != OpAMD64FlagGT_UGT {
+ break
+ }
+ v.reset(OpCopy)
+ v.Type = x.Type
+ v.AddArg(x)
+ return true
+ }
+ return false
+}
+func rewriteValueAMD64_OpAMD64CMOVWEQconst(v *Value, config *Config) bool {
+ b := v.Block
+ _ = b
+ // match: (CMOVWEQconst x (InvertFlags y) [c])
+ // cond:
+ // result: (CMOVWNEconst x y [c])
+ for {
+ x := v.Args[0]
+ v_1 := v.Args[1]
+ if v_1.Op != OpAMD64InvertFlags {
+ break
+ }
+ y := v_1.Args[0]
+ c := v.AuxInt
+ v.reset(OpAMD64CMOVWNEconst)
+ v.AddArg(x)
+ v.AddArg(y)
+ v.AuxInt = c
+ return true
+ }
+ // match: (CMOVWEQconst _ (FlagEQ) [c])
+ // cond:
+ // result: (Const16 [c])
+ for {
+ v_1 := v.Args[1]
+ if v_1.Op != OpAMD64FlagEQ {
+ break
+ }
+ c := v.AuxInt
+ v.reset(OpConst16)
+ v.AuxInt = c
+ return true
+ }
+ // match: (CMOVWEQconst x (FlagLT_ULT))
+ // cond:
+ // result: x
+ for {
+ x := v.Args[0]
+ v_1 := v.Args[1]
+ if v_1.Op != OpAMD64FlagLT_ULT {
+ break
+ }
+ v.reset(OpCopy)
+ v.Type = x.Type
+ v.AddArg(x)
+ return true
+ }
+ // match: (CMOVWEQconst x (FlagLT_UGT))
+ // cond:
+ // result: x
+ for {
+ x := v.Args[0]
+ v_1 := v.Args[1]
+ if v_1.Op != OpAMD64FlagLT_UGT {
+ break
+ }
+ v.reset(OpCopy)
+ v.Type = x.Type
+ v.AddArg(x)
+ return true
+ }
+ // match: (CMOVWEQconst x (FlagGT_ULT))
+ // cond:
+ // result: x
+ for {
+ x := v.Args[0]
+ v_1 := v.Args[1]
+ if v_1.Op != OpAMD64FlagGT_ULT {
+ break
+ }
+ v.reset(OpCopy)
+ v.Type = x.Type
+ v.AddArg(x)
+ return true
+ }
+ // match: (CMOVWEQconst x (FlagGT_UGT))
+ // cond:
+ // result: x
+ for {
+ x := v.Args[0]
+ v_1 := v.Args[1]
+ if v_1.Op != OpAMD64FlagGT_UGT {
+ break
+ }
+ v.reset(OpCopy)
+ v.Type = x.Type
+ v.AddArg(x)
+ return true
+ }
+ return false
+}
func rewriteValueAMD64_OpAMD64CMPB(v *Value, config *Config) bool {
b := v.Block
_ = b
@@ -3026,6 +3343,72 @@
}
return false
}
+func rewriteValueAMD64_OpCtz16(v *Value, config *Config) bool {
+ b := v.Block
+ _ = b
+ // match: (Ctz16 <t> x)
+ // cond:
+ // result: (CMOVWEQconst (BSFW <t> x) (CMPWconst x [0]) [16])
+ for {
+ t := v.Type
+ x := v.Args[0]
+ v.reset(OpAMD64CMOVWEQconst)
+ v0 := b.NewValue0(v.Line, OpAMD64BSFW, t)
+ v0.AddArg(x)
+ v.AddArg(v0)
+ v1 := b.NewValue0(v.Line, OpAMD64CMPWconst, TypeFlags)
+ v1.AddArg(x)
+ v1.AuxInt = 0
+ v.AddArg(v1)
+ v.AuxInt = 16
+ return true
+ }
+ return false
+}
+func rewriteValueAMD64_OpCtz32(v *Value, config *Config) bool {
+ b := v.Block
+ _ = b
+ // match: (Ctz32 <t> x)
+ // cond:
+ // result: (CMOVLEQconst (BSFL <t> x) (CMPLconst x [0]) [32])
+ for {
+ t := v.Type
+ x := v.Args[0]
+ v.reset(OpAMD64CMOVLEQconst)
+ v0 := b.NewValue0(v.Line, OpAMD64BSFL, t)
+ v0.AddArg(x)
+ v.AddArg(v0)
+ v1 := b.NewValue0(v.Line, OpAMD64CMPLconst, TypeFlags)
+ v1.AddArg(x)
+ v1.AuxInt = 0
+ v.AddArg(v1)
+ v.AuxInt = 32
+ return true
+ }
+ return false
+}
+func rewriteValueAMD64_OpCtz64(v *Value, config *Config) bool {
+ b := v.Block
+ _ = b
+ // match: (Ctz64 <t> x)
+ // cond:
+ // result: (CMOVQEQconst (BSFQ <t> x) (CMPQconst x [0]) [64])
+ for {
+ t := v.Type
+ x := v.Args[0]
+ v.reset(OpAMD64CMOVQEQconst)
+ v0 := b.NewValue0(v.Line, OpAMD64BSFQ, t)
+ v0.AddArg(x)
+ v.AddArg(v0)
+ v1 := b.NewValue0(v.Line, OpAMD64CMPQconst, TypeFlags)
+ v1.AddArg(x)
+ v1.AuxInt = 0
+ v.AddArg(v1)
+ v.AuxInt = 64
+ return true
+ }
+ return false
+}
func rewriteValueAMD64_OpCvt32Fto32(v *Value, config *Config) bool {
b := v.Block
_ = b