cmd/internal/gc, cmd/6g: generate boolean values without jumps
Use SETcc instructions instead of Jcc to generate boolean values.
This generates shorter, jump-free code, which may in turn enable other
peephole optimizations.
For example, given
func f(i, j int) bool {
return i == j
}
Before
"".f t=1 size=32 value=0 args=0x18 locals=0x0
0x0000 00000 (x.go:3) TEXT "".f(SB), $0-24
0x0000 00000 (x.go:3) FUNCDATA $0, gclocals·b4c25e9b09fd0cf9bb429dcefe91c353(SB)
0x0000 00000 (x.go:3) FUNCDATA $1, gclocals·33cdeccccebe80329f1fdbee7f5874cb(SB)
0x0000 00000 (x.go:4) MOVQ "".i+8(FP), BX
0x0005 00005 (x.go:4) MOVQ "".j+16(FP), BP
0x000a 00010 (x.go:4) CMPQ BX, BP
0x000d 00013 (x.go:4) JEQ 21
0x000f 00015 (x.go:4) MOVB $0, "".~r2+24(FP)
0x0014 00020 (x.go:4) RET
0x0015 00021 (x.go:4) MOVB $1, "".~r2+24(FP)
0x001a 00026 (x.go:4) JMP 20
After
"".f t=1 size=32 value=0 args=0x18 locals=0x0
0x0000 00000 (x.go:3) TEXT "".f(SB), $0-24
0x0000 00000 (x.go:3) FUNCDATA $0, gclocals·b4c25e9b09fd0cf9bb429dcefe91c353(SB)
0x0000 00000 (x.go:3) FUNCDATA $1, gclocals·33cdeccccebe80329f1fdbee7f5874cb(SB)
0x0000 00000 (x.go:4) MOVQ "".i+8(FP), BX
0x0005 00005 (x.go:4) MOVQ "".j+16(FP), BP
0x000a 00010 (x.go:4) CMPQ BX, BP
0x000d 00013 (x.go:4) SETEQ "".~r2+24(FP)
0x0012 00018 (x.go:4) RET
regexp benchmarks, best of 12 runs:
benchmark old ns/op new ns/op delta
BenchmarkNotOnePassShortB 782 733 -6.27%
BenchmarkLiteral 180 171 -5.00%
BenchmarkNotLiteral 2855 2721 -4.69%
BenchmarkMatchHard_32 2672 2557 -4.30%
BenchmarkMatchHard_1K 80182 76732 -4.30%
BenchmarkMatchEasy1_32M 76440180 73304748 -4.10%
BenchmarkMatchEasy1_32K 68798 66350 -3.56%
BenchmarkAnchoredLongMatch 482 465 -3.53%
BenchmarkMatchEasy1_1M 2373042 2292692 -3.39%
BenchmarkReplaceAll 2776 2690 -3.10%
BenchmarkNotOnePassShortA 1397 1360 -2.65%
BenchmarkMatchClass_InRange 3842 3742 -2.60%
BenchmarkMatchEasy0_32 125 122 -2.40%
BenchmarkMatchEasy0_32K 11414 11164 -2.19%
BenchmarkMatchEasy0_1K 668 654 -2.10%
BenchmarkAnchoredShortMatch 260 255 -1.92%
BenchmarkAnchoredLiteralShortNonMatch 164 161 -1.83%
BenchmarkOnePassShortB 623 612 -1.77%
BenchmarkOnePassShortA 801 788 -1.62%
BenchmarkMatchClass 4094 4033 -1.49%
BenchmarkMatchEasy0_32M 14078800 13890704 -1.34%
BenchmarkMatchHard_32K 4095844 4045820 -1.22%
BenchmarkMatchEasy1_1K 1663 1643 -1.20%
BenchmarkMatchHard_1M 131261708 129708215 -1.18%
BenchmarkMatchHard_32M 4210112412 4169292003 -0.97%
BenchmarkMatchMedium_32K 2460752 2438611 -0.90%
BenchmarkMatchEasy0_1M 422914 419672 -0.77%
BenchmarkMatchMedium_1M 78581121 78040160 -0.69%
BenchmarkMatchMedium_32M 2515287278 2498464906 -0.67%
BenchmarkMatchMedium_32 1754 1746 -0.46%
BenchmarkMatchMedium_1K 52105 52106 +0.00%
BenchmarkAnchoredLiteralLongNonMatch 185 185 +0.00%
BenchmarkMatchEasy1_32 107 107 +0.00%
BenchmarkOnePassLongNotPrefix 505 505 +0.00%
BenchmarkOnePassLongPrefix 147 147 +0.00%
The godoc binary is ~0.12% smaller after this CL.
Updates #5729.
toolstash -cmp passes for all architectures other than amd64 and amd64p32.
Other architectures can be done in follow-up CLs.
Change-Id: I0e167e259274b722958567fc0af83a17ca002da7
Reviewed-on: https://go-review.googlesource.com/2284
Reviewed-by: Russ Cox <rsc@golang.org>
diff --git a/src/cmd/internal/gc/cgen.go b/src/cmd/internal/gc/cgen.go
index 3a3a4c6..b6691ef 100644
--- a/src/cmd/internal/gc/cgen.go
+++ b/src/cmd/internal/gc/cgen.go
@@ -345,20 +345,12 @@
Dump("cgen-res", res)
Fatal("cgen: unknown op %v", Nconv(n, obj.FmtShort|obj.FmtSign))
- // these call bgen to get a bool value
case OOROR, OANDAND,
OEQ, ONE,
OLT, OLE,
OGE, OGT,
ONOT:
- p1 := Gbranch(obj.AJMP, nil, 0)
- p2 := Pc
- Thearch.Gmove(Nodbool(true), res)
- p3 := Gbranch(obj.AJMP, nil, 0)
- Patch(p1, Pc)
- Bgen(n, true, 0, p2)
- Thearch.Gmove(Nodbool(false), res)
- Patch(p3, Pc)
+ Bvgen(n, res, true)
return
case OPLUS:
@@ -1640,10 +1632,55 @@
// goto to
// }
func Bgen(n *Node, wantTrue bool, likely int, to *obj.Prog) {
- if Debug['g'] != 0 {
- fmt.Printf("\nbgen wantTrue=%t likely=%d to=%v\n", wantTrue, likely, to)
- Dump("bgen", n)
+ bgenx(n, nil, wantTrue, likely, to)
+}
+
+// Bvgen generates code for calculating boolean values:
+// res = n == wantTrue
+func Bvgen(n, res *Node, wantTrue bool) {
+ if Thearch.Ginsboolval == nil {
+ // Direct value generation not implemented for this architecture.
+ // Implement using jumps.
+ bvgenjump(n, res, wantTrue, true)
+ return
}
+ bgenx(n, res, wantTrue, 0, nil)
+}
+
+// bvgenjump implements boolean value generation using jumps:
+// if n == wantTrue {
+// res = 1
+// } else {
+// res = 0
+// }
+// geninit controls whether n's Ninit is generated.
+func bvgenjump(n, res *Node, wantTrue, geninit bool) {
+ init := n.Ninit
+ if !geninit {
+ n.Ninit = nil
+ }
+ p1 := Gbranch(obj.AJMP, nil, 0)
+ p2 := Pc
+ Thearch.Gmove(Nodbool(true), res)
+ p3 := Gbranch(obj.AJMP, nil, 0)
+ Patch(p1, Pc)
+ Bgen(n, wantTrue, 0, p2)
+ Thearch.Gmove(Nodbool(false), res)
+ Patch(p3, Pc)
+ n.Ninit = init
+}
+
+// bgenx is the backend for Bgen and Bvgen.
+// If res is nil, it generates a branch.
+// Otherwise, it generates a boolean value.
+func bgenx(n, res *Node, wantTrue bool, likely int, to *obj.Prog) {
+ if Debug['g'] != 0 {
+ fmt.Printf("\nbgenx wantTrue=%t likely=%d to=%v\n", wantTrue, likely, to)
+ Dump("n", n)
+ Dump("res", res)
+ }
+
+ genval := res != nil
if n == nil {
n = Nodbool(true)
@@ -1659,9 +1696,7 @@
}
if n.Type.Etype != TBOOL {
- Yyerror("cgen: bad type %v for %v", n.Type, Oconv(int(n.Op), 0))
- Patch(Thearch.Gins(obj.AEND, nil, nil), to)
- return
+ Fatal("bgen: bad type %v for %v", n.Type, Oconv(int(n.Op), 0))
}
for n.Op == OCONVNOP {
@@ -1670,44 +1705,90 @@
}
if Thearch.Bgen_float != nil && n.Left != nil && Isfloat[n.Left.Type.Etype] {
+ if genval {
+ bvgenjump(n, res, wantTrue, false)
+ return
+ }
Thearch.Bgen_float(n, wantTrue, likely, to)
return
}
switch n.Op {
default:
+ if genval {
+ Cgen(n, res)
+ if !wantTrue {
+ Thearch.Gins(Thearch.Optoas(OXOR, Types[TUINT8]), Nodintconst(1), res)
+ }
+ return
+ }
+
var tmp Node
Regalloc(&tmp, n.Type, nil)
Cgen(n, &tmp)
- bgenNonZero(&tmp, wantTrue, likely, to)
+ bgenNonZero(&tmp, nil, wantTrue, likely, to)
Regfree(&tmp)
return
case ONAME:
+ if genval {
+ // 5g, 7g, and 9g might need a temporary or other help here,
+ // but they don't support direct generation of a bool value yet.
+ // We can fix that as we go.
+ switch Ctxt.Arch.Thechar {
+ case '5', '7', '9':
+ Fatal("genval 5g, 7g, 9g ONAMES not fully implemented")
+ }
+ Cgen(n, res)
+ if !wantTrue {
+ Thearch.Gins(Thearch.Optoas(OXOR, Types[TUINT8]), Nodintconst(1), res)
+ }
+ return
+ }
+
if n.Addable && Ctxt.Arch.Thechar != '5' && Ctxt.Arch.Thechar != '7' && Ctxt.Arch.Thechar != '9' {
// no need for a temporary
- bgenNonZero(n, wantTrue, likely, to)
+ bgenNonZero(n, nil, wantTrue, likely, to)
return
}
var tmp Node
Regalloc(&tmp, n.Type, nil)
Cgen(n, &tmp)
- bgenNonZero(&tmp, wantTrue, likely, to)
+ bgenNonZero(&tmp, nil, wantTrue, likely, to)
Regfree(&tmp)
return
case OLITERAL:
- // n is a constant. If n == wantTrue, jump; otherwise do nothing.
+ // n is a constant.
if !Isconst(n, CTBOOL) {
Fatal("bgen: non-bool const %v\n", Nconv(n, obj.FmtLong))
}
+ if genval {
+ Cgen(Nodbool(wantTrue == n.Val.U.Bval), res)
+ return
+ }
+ // If n == wantTrue, jump; otherwise do nothing.
if wantTrue == n.Val.U.Bval {
Patch(Gbranch(obj.AJMP, nil, likely), to)
}
return
case OANDAND, OOROR:
- if (n.Op == OANDAND) == wantTrue {
+ and := (n.Op == OANDAND) == wantTrue
+ if genval {
+ p1 := Gbranch(obj.AJMP, nil, 0)
+ p2 := Gbranch(obj.AJMP, nil, 0)
+ Patch(p2, Pc)
+ Cgen(Nodbool(!and), res)
+ p3 := Gbranch(obj.AJMP, nil, 0)
+ Patch(p1, Pc)
+ Bgen(n.Left, wantTrue != and, 0, p2)
+ Bvgen(n.Right, res, wantTrue)
+ Patch(p3, Pc)
+ return
+ }
+
+ if and {
p1 := Gbranch(obj.AJMP, nil, 0)
p2 := Gbranch(obj.AJMP, nil, 0)
Patch(p1, Pc)
@@ -1726,7 +1807,7 @@
if n.Left == nil || n.Left.Type == nil {
return
}
- Bgen(n.Left, !wantTrue, likely, to)
+ bgenx(n.Left, res, !wantTrue, likely, to)
return
case OEQ, ONE, OLT, OGT, OLE, OGE:
@@ -1743,15 +1824,21 @@
if !wantTrue {
if Isfloat[nr.Type.Etype] {
// Brcom is not valid on floats when NaN is involved.
+ ll := n.Ninit // avoid re-genning Ninit
+ n.Ninit = nil
+ if genval {
+ bgenx(n, res, true, likely, to)
+ Thearch.Gins(Thearch.Optoas(OXOR, Types[TUINT8]), Nodintconst(1), res) // res = !res
+ n.Ninit = ll
+ return
+ }
p1 := Gbranch(obj.AJMP, nil, 0)
p2 := Gbranch(obj.AJMP, nil, 0)
Patch(p1, Pc)
- ll := n.Ninit // avoid re-genning Ninit
- n.Ninit = nil
- Bgen(n, true, -likely, p2)
- n.Ninit = ll
+ bgenx(n, res, true, -likely, p2)
Patch(Gbranch(obj.AJMP, nil, 0), to)
Patch(p2, Pc)
+ n.Ninit = ll
return
}
@@ -1786,17 +1873,22 @@
Regalloc(&tmp, ptr.Type, &ptr)
Cgen(&ptr, &tmp)
Regfree(&ptr)
- bgenNonZero(&tmp, a == OEQ != wantTrue, likely, to)
+ bgenNonZero(&tmp, res, a == OEQ != wantTrue, likely, to)
Regfree(&tmp)
return
}
if Iscomplex[nl.Type.Etype] {
- complexbool(a, nl, nr, wantTrue, likely, to)
+ complexbool(a, nl, nr, res, wantTrue, likely, to)
return
}
if Ctxt.Arch.Regsize == 4 && Is64(nr.Type) {
+ if genval {
+ // TODO: Teach Cmp64 to generate boolean values and remove this.
+ bvgenjump(n, res, wantTrue, false)
+ return
+ }
if !nl.Addable || Isconst(nl, CTINT) {
nl = CgenTemp(nl)
}
@@ -1838,7 +1930,7 @@
if Smallintconst(nr) && Ctxt.Arch.Thechar != '9' {
Thearch.Gins(Thearch.Optoas(OCMP, nr.Type), nl, nr)
- Patch(Gbranch(Thearch.Optoas(a, nr.Type), nr.Type, likely), to)
+ bins(nr.Type, res, a, likely, to)
return
}
@@ -1869,6 +1961,9 @@
if Isfloat[nl.Type.Etype] {
switch Ctxt.Arch.Thechar {
case '5':
+ if genval {
+ Fatal("genval 5g Isfloat special cases not implemented")
+ }
switch n.Op {
case ONE:
Patch(Gbranch(Thearch.Optoas(OPS, nr.Type), nr.Type, likely), to)
@@ -1883,19 +1978,40 @@
switch n.Op {
case OEQ:
// neither NE nor P
- p1 := Gbranch(Thearch.Optoas(ONE, nr.Type), nil, -likely)
- p2 := Gbranch(Thearch.Optoas(OPS, nr.Type), nil, -likely)
- Patch(Gbranch(obj.AJMP, nil, 0), to)
- Patch(p1, Pc)
- Patch(p2, Pc)
+ if genval {
+ var reg Node
+ Regalloc(®, Types[TBOOL], nil)
+ Thearch.Ginsboolval(Thearch.Optoas(OEQ, nr.Type), ®)
+ Thearch.Ginsboolval(Thearch.Optoas(OPC, nr.Type), res)
+ Thearch.Gins(Thearch.Optoas(OAND, Types[TBOOL]), ®, res)
+ Regfree(®)
+ } else {
+ p1 := Gbranch(Thearch.Optoas(ONE, nr.Type), nil, -likely)
+ p2 := Gbranch(Thearch.Optoas(OPS, nr.Type), nil, -likely)
+ Patch(Gbranch(obj.AJMP, nil, 0), to)
+ Patch(p1, Pc)
+ Patch(p2, Pc)
+ }
return
case ONE:
// either NE or P
- Patch(Gbranch(Thearch.Optoas(ONE, nr.Type), nil, likely), to)
- Patch(Gbranch(Thearch.Optoas(OPS, nr.Type), nil, likely), to)
+ if genval {
+ var reg Node
+ Regalloc(®, Types[TBOOL], nil)
+ Thearch.Ginsboolval(Thearch.Optoas(ONE, nr.Type), ®)
+ Thearch.Ginsboolval(Thearch.Optoas(OPS, nr.Type), res)
+ Thearch.Gins(Thearch.Optoas(OOR, Types[TBOOL]), ®, res)
+ Regfree(®)
+ } else {
+ Patch(Gbranch(Thearch.Optoas(ONE, nr.Type), nil, likely), to)
+ Patch(Gbranch(Thearch.Optoas(OPS, nr.Type), nil, likely), to)
+ }
return
}
case '7', '9':
+ if genval {
+ Fatal("genval 7g, 9g Isfloat special cases not implemented")
+ }
switch n.Op {
// On arm64 and ppc64, <= and >= mishandle NaN. Must decompose into < or > and =.
// TODO(josh): Convert a <= b to b > a instead?
@@ -1912,11 +2028,11 @@
}
}
- // Not a special case. Insert an appropriate conditional jump.
- Patch(Gbranch(Thearch.Optoas(a, nr.Type), nr.Type, likely), to)
+ // Not a special case. Insert the conditional jump or value gen.
+ bins(nr.Type, res, a, likely, to)
}
-func bgenNonZero(n *Node, wantTrue bool, likely int, to *obj.Prog) {
+func bgenNonZero(n, res *Node, wantTrue bool, likely int, to *obj.Prog) {
// TODO: Optimize on systems that can compare to zero easily.
a := ONE
if !wantTrue {
@@ -1925,7 +2041,21 @@
var zero Node
Nodconst(&zero, n.Type, 0)
Thearch.Gins(Thearch.Optoas(OCMP, n.Type), n, &zero)
- Patch(Gbranch(Thearch.Optoas(a, n.Type), n.Type, likely), to)
+ bins(n.Type, res, a, likely, to)
+}
+
+// bins inserts an instruction to handle the result of a compare.
+// If res is non-nil, it inserts appropriate value generation instructions.
+// If res is nil, it inserts a branch to to.
+func bins(typ *Type, res *Node, a, likely int, to *obj.Prog) {
+ a = Thearch.Optoas(a, typ)
+ if res != nil {
+ // value gen
+ Thearch.Ginsboolval(a, res)
+ } else {
+ // jump
+ Patch(Gbranch(a, typ, likely), to)
+ }
}
/*
diff --git a/src/cmd/internal/gc/cplx.go b/src/cmd/internal/gc/cplx.go
index c457bbf..cf48c92 100644
--- a/src/cmd/internal/gc/cplx.go
+++ b/src/cmd/internal/gc/cplx.go
@@ -14,7 +14,7 @@
return f.Op == OINDREG && t.Op == OINDREG && f.Xoffset+f.Type.Width >= t.Xoffset && t.Xoffset+t.Type.Width >= f.Xoffset
}
-func complexbool(op int, nl, nr *Node, wantTrue bool, likely int, to *obj.Prog) {
+func complexbool(op int, nl, nr, res *Node, wantTrue bool, likely int, to *obj.Prog) {
// make both sides addable in ullman order
if nr != nil {
if nl.Ullman > nr.Ullman && !nl.Addable {
@@ -35,7 +35,10 @@
subnode(&rreal, &rimag, nr)
// build tree
- // real(l) == real(r) && imag(l) == imag(r)
+ // if branching:
+ // real(l) == real(r) && imag(l) == imag(r)
+ // if generating a value, use a branch-free version:
+ // real(l) == real(r) & imag(l) == imag(r)
realeq := Node{
Op: OEQ,
Left: &lreal,
@@ -55,6 +58,19 @@
Type: Types[TBOOL],
}
+ if res != nil {
+ // generating a value
+ and.Op = OAND
+ if op == ONE {
+ and.Op = OOR
+ realeq.Op = ONE
+ imageq.Op = ONE
+ }
+ Bvgen(&and, res, true)
+ return
+ }
+
+ // generating a branch
if op == ONE {
wantTrue = !wantTrue
}
diff --git a/src/cmd/internal/gc/go.go b/src/cmd/internal/gc/go.go
index c0ec7b5..d399ebb 100644
--- a/src/cmd/internal/gc/go.go
+++ b/src/cmd/internal/gc/go.go
@@ -791,6 +791,13 @@
Expandchecks func(*obj.Prog)
Getg func(*Node)
Gins func(int, *Node, *Node) *obj.Prog
+ // Ginsboolval inserts instructions to convert the result
+ // of a just-completed comparison to a boolean value.
+ // The first argument is the conditional jump instruction
+ // corresponding to the desired value.
+ // The second argument is the destination.
+ // If not present, Ginsboolval will be emulated with jumps.
+ Ginsboolval func(int, *Node)
Ginscon func(int, int64, *Node)
Ginsnop func()
Gmove func(*Node, *Node)
diff --git a/src/cmd/internal/gc/syntax.go b/src/cmd/internal/gc/syntax.go
index 1012c66..d448188 100644
--- a/src/cmd/internal/gc/syntax.go
+++ b/src/cmd/internal/gc/syntax.go
@@ -306,6 +306,7 @@
ORROTC // right rotate-carry: ARCR.
ORETJMP // return to other function
OPS // compare parity set (for x86 NaN check)
+ OPC // compare parity clear (for x86 NaN check)
OSQRT // sqrt(float64), on systems that have hw support
OGETG // runtime.getg() (read g pointer)