cmd/internal/gc, cmd/6g: generate boolean values without jumps

Use SETcc instructions instead of Jcc to generate boolean values.
This generates shorter, jump-free code, which may in turn enable other
peephole optimizations.

For example, given

func f(i, j int) bool {
	return i == j
}

Before

"".f t=1 size=32 value=0 args=0x18 locals=0x0
	0x0000 00000 (x.go:3)	TEXT	"".f(SB), $0-24
	0x0000 00000 (x.go:3)	FUNCDATA	$0, gclocals·b4c25e9b09fd0cf9bb429dcefe91c353(SB)
	0x0000 00000 (x.go:3)	FUNCDATA	$1, gclocals·33cdeccccebe80329f1fdbee7f5874cb(SB)
	0x0000 00000 (x.go:4)	MOVQ	"".i+8(FP), BX
	0x0005 00005 (x.go:4)	MOVQ	"".j+16(FP), BP
	0x000a 00010 (x.go:4)	CMPQ	BX, BP
	0x000d 00013 (x.go:4)	JEQ	21
	0x000f 00015 (x.go:4)	MOVB	$0, "".~r2+24(FP)
	0x0014 00020 (x.go:4)	RET
	0x0015 00021 (x.go:4)	MOVB	$1, "".~r2+24(FP)
	0x001a 00026 (x.go:4)	JMP	20

After

"".f t=1 size=32 value=0 args=0x18 locals=0x0
	0x0000 00000 (x.go:3)	TEXT	"".f(SB), $0-24
	0x0000 00000 (x.go:3)	FUNCDATA	$0, gclocals·b4c25e9b09fd0cf9bb429dcefe91c353(SB)
	0x0000 00000 (x.go:3)	FUNCDATA	$1, gclocals·33cdeccccebe80329f1fdbee7f5874cb(SB)
	0x0000 00000 (x.go:4)	MOVQ	"".i+8(FP), BX
	0x0005 00005 (x.go:4)	MOVQ	"".j+16(FP), BP
	0x000a 00010 (x.go:4)	CMPQ	BX, BP
	0x000d 00013 (x.go:4)	SETEQ	"".~r2+24(FP)
	0x0012 00018 (x.go:4)	RET

regexp benchmarks, best of 12 runs:

benchmark                                 old ns/op      new ns/op      delta
BenchmarkNotOnePassShortB                 782            733            -6.27%
BenchmarkLiteral                          180            171            -5.00%
BenchmarkNotLiteral                       2855           2721           -4.69%
BenchmarkMatchHard_32                     2672           2557           -4.30%
BenchmarkMatchHard_1K                     80182          76732          -4.30%
BenchmarkMatchEasy1_32M                   76440180       73304748       -4.10%
BenchmarkMatchEasy1_32K                   68798          66350          -3.56%
BenchmarkAnchoredLongMatch                482            465            -3.53%
BenchmarkMatchEasy1_1M                    2373042        2292692        -3.39%
BenchmarkReplaceAll                       2776           2690           -3.10%
BenchmarkNotOnePassShortA                 1397           1360           -2.65%
BenchmarkMatchClass_InRange               3842           3742           -2.60%
BenchmarkMatchEasy0_32                    125            122            -2.40%
BenchmarkMatchEasy0_32K                   11414          11164          -2.19%
BenchmarkMatchEasy0_1K                    668            654            -2.10%
BenchmarkAnchoredShortMatch               260            255            -1.92%
BenchmarkAnchoredLiteralShortNonMatch     164            161            -1.83%
BenchmarkOnePassShortB                    623            612            -1.77%
BenchmarkOnePassShortA                    801            788            -1.62%
BenchmarkMatchClass                       4094           4033           -1.49%
BenchmarkMatchEasy0_32M                   14078800       13890704       -1.34%
BenchmarkMatchHard_32K                    4095844        4045820        -1.22%
BenchmarkMatchEasy1_1K                    1663           1643           -1.20%
BenchmarkMatchHard_1M                     131261708      129708215      -1.18%
BenchmarkMatchHard_32M                    4210112412     4169292003     -0.97%
BenchmarkMatchMedium_32K                  2460752        2438611        -0.90%
BenchmarkMatchEasy0_1M                    422914         419672         -0.77%
BenchmarkMatchMedium_1M                   78581121       78040160       -0.69%
BenchmarkMatchMedium_32M                  2515287278     2498464906     -0.67%
BenchmarkMatchMedium_32                   1754           1746           -0.46%
BenchmarkMatchMedium_1K                   52105          52106          +0.00%
BenchmarkAnchoredLiteralLongNonMatch      185            185            +0.00%
BenchmarkMatchEasy1_32                    107            107            +0.00%
BenchmarkOnePassLongNotPrefix             505            505            +0.00%
BenchmarkOnePassLongPrefix                147            147            +0.00%

The godoc binary is ~0.12% smaller after this CL.

Updates #5729.

toolstash -cmp passes for all architectures other than amd64 and amd64p32.

Other architectures can be done in follow-up CLs.

Change-Id: I0e167e259274b722958567fc0af83a17ca002da7
Reviewed-on: https://go-review.googlesource.com/2284
Reviewed-by: Russ Cox <rsc@golang.org>
diff --git a/src/cmd/6g/galign.go b/src/cmd/6g/galign.go
index 8ba8dd4..5b41a67 100644
--- a/src/cmd/6g/galign.go
+++ b/src/cmd/6g/galign.go
@@ -10,11 +10,11 @@
 	"cmd/internal/obj/x86"
 )
 
-var thechar int = '6'
-
-var thestring string = "amd64"
-
-var thelinkarch *obj.LinkArch = &x86.Linkamd64
+var (
+	thechar     int           = '6'
+	thestring   string        = "amd64"
+	thelinkarch *obj.LinkArch = &x86.Linkamd64
+)
 
 func linkarchinit() {
 	if obj.Getgoarch() == "amd64p32" {
@@ -27,13 +27,12 @@
 
 var MAXWIDTH int64 = 1 << 50
 
-var addptr int = x86.AADDQ
-
-var movptr int = x86.AMOVQ
-
-var leaptr int = x86.ALEAQ
-
-var cmpptr int = x86.ACMPQ
+var (
+	addptr int = x86.AADDQ
+	movptr int = x86.AMOVQ
+	leaptr int = x86.ALEAQ
+	cmpptr int = x86.ACMPQ
+)
 
 /*
  * go declares several platform-specific type aliases:
@@ -101,6 +100,7 @@
 	gc.Thearch.Expandchecks = expandchecks
 	gc.Thearch.Getg = getg
 	gc.Thearch.Gins = gins
+	gc.Thearch.Ginsboolval = ginsboolval
 	gc.Thearch.Ginscon = ginscon
 	gc.Thearch.Ginsnop = ginsnop
 	gc.Thearch.Gmove = gmove
diff --git a/src/cmd/6g/gsubr.go b/src/cmd/6g/gsubr.go
index 0ac103e..53d0f03 100644
--- a/src/cmd/6g/gsubr.go
+++ b/src/cmd/6g/gsubr.go
@@ -99,6 +99,10 @@
 	gins(as, &n1, n2)
 }
 
+func ginsboolval(a int, n *gc.Node) {
+	gins(jmptoset(a), nil, n)
+}
+
 /*
  * set up nodes representing 2^63
  */
@@ -698,6 +702,21 @@
 		gc.OPS<<16 | gc.TFLOAT64:
 		a = x86.AJPS
 
+	case gc.OPC<<16 | gc.TBOOL,
+		gc.OPC<<16 | gc.TINT8,
+		gc.OPC<<16 | gc.TUINT8,
+		gc.OPC<<16 | gc.TINT16,
+		gc.OPC<<16 | gc.TUINT16,
+		gc.OPC<<16 | gc.TINT32,
+		gc.OPC<<16 | gc.TUINT32,
+		gc.OPC<<16 | gc.TINT64,
+		gc.OPC<<16 | gc.TUINT64,
+		gc.OPC<<16 | gc.TPTR32,
+		gc.OPC<<16 | gc.TPTR64,
+		gc.OPC<<16 | gc.TFLOAT32,
+		gc.OPC<<16 | gc.TFLOAT64:
+		a = x86.AJPC
+
 	case gc.OLT<<16 | gc.TINT8,
 		gc.OLT<<16 | gc.TINT16,
 		gc.OLT<<16 | gc.TINT32,
@@ -902,7 +921,8 @@
 		gc.OMINUS<<16 | gc.TPTR64:
 		a = x86.ANEGQ
 
-	case gc.OAND<<16 | gc.TINT8,
+	case gc.OAND<<16 | gc.TBOOL,
+		gc.OAND<<16 | gc.TINT8,
 		gc.OAND<<16 | gc.TUINT8:
 		a = x86.AANDB
 
@@ -920,7 +940,8 @@
 		gc.OAND<<16 | gc.TPTR64:
 		a = x86.AANDQ
 
-	case gc.OOR<<16 | gc.TINT8,
+	case gc.OOR<<16 | gc.TBOOL,
+		gc.OOR<<16 | gc.TINT8,
 		gc.OOR<<16 | gc.TUINT8:
 		a = x86.AORB
 
@@ -1134,6 +1155,46 @@
 	return a
 }
 
+// jmptoset returns ASETxx for AJxx.
+func jmptoset(jmp int) int {
+	switch jmp {
+	case x86.AJEQ:
+		return x86.ASETEQ
+	case x86.AJNE:
+		return x86.ASETNE
+	case x86.AJLT:
+		return x86.ASETLT
+	case x86.AJCS:
+		return x86.ASETCS
+	case x86.AJLE:
+		return x86.ASETLE
+	case x86.AJLS:
+		return x86.ASETLS
+	case x86.AJGT:
+		return x86.ASETGT
+	case x86.AJHI:
+		return x86.ASETHI
+	case x86.AJGE:
+		return x86.ASETGE
+	case x86.AJCC:
+		return x86.ASETCC
+	case x86.AJMI:
+		return x86.ASETMI
+	case x86.AJOC:
+		return x86.ASETOC
+	case x86.AJOS:
+		return x86.ASETOS
+	case x86.AJPC:
+		return x86.ASETPC
+	case x86.AJPL:
+		return x86.ASETPL
+	case x86.AJPS:
+		return x86.ASETPS
+	}
+	gc.Fatal("jmptoset: no entry for %v", gc.Oconv(jmp, 0))
+	panic("unreachable")
+}
+
 const (
 	ODynam   = 1 << 0
 	OAddable = 1 << 1
diff --git a/src/cmd/6g/prog.go b/src/cmd/6g/prog.go
index fe9f013..4c41d87 100644
--- a/src/cmd/6g/prog.go
+++ b/src/cmd/6g/prog.go
@@ -196,6 +196,22 @@
 	x86.ASBBL:     {gc.SizeL | gc.LeftRead | RightRdwr | gc.SetCarry | gc.UseCarry, 0, 0, 0},
 	x86.ASBBQ:     {gc.SizeQ | gc.LeftRead | RightRdwr | gc.SetCarry | gc.UseCarry, 0, 0, 0},
 	x86.ASBBW:     {gc.SizeW | gc.LeftRead | RightRdwr | gc.SetCarry | gc.UseCarry, 0, 0, 0},
+	x86.ASETCC:    {gc.SizeB | gc.RightWrite | gc.UseCarry, 0, 0, 0},
+	x86.ASETCS:    {gc.SizeB | gc.RightWrite | gc.UseCarry, 0, 0, 0},
+	x86.ASETEQ:    {gc.SizeB | gc.RightWrite | gc.UseCarry, 0, 0, 0},
+	x86.ASETGE:    {gc.SizeB | gc.RightWrite | gc.UseCarry, 0, 0, 0},
+	x86.ASETGT:    {gc.SizeB | gc.RightWrite | gc.UseCarry, 0, 0, 0},
+	x86.ASETHI:    {gc.SizeB | gc.RightWrite | gc.UseCarry, 0, 0, 0},
+	x86.ASETLE:    {gc.SizeB | gc.RightWrite | gc.UseCarry, 0, 0, 0},
+	x86.ASETLS:    {gc.SizeB | gc.RightWrite | gc.UseCarry, 0, 0, 0},
+	x86.ASETLT:    {gc.SizeB | gc.RightWrite | gc.UseCarry, 0, 0, 0},
+	x86.ASETMI:    {gc.SizeB | gc.RightWrite | gc.UseCarry, 0, 0, 0},
+	x86.ASETNE:    {gc.SizeB | gc.RightWrite | gc.UseCarry, 0, 0, 0},
+	x86.ASETOC:    {gc.SizeB | gc.RightWrite | gc.UseCarry, 0, 0, 0},
+	x86.ASETOS:    {gc.SizeB | gc.RightWrite | gc.UseCarry, 0, 0, 0},
+	x86.ASETPC:    {gc.SizeB | gc.RightWrite | gc.UseCarry, 0, 0, 0},
+	x86.ASETPL:    {gc.SizeB | gc.RightWrite | gc.UseCarry, 0, 0, 0},
+	x86.ASETPS:    {gc.SizeB | gc.RightWrite | gc.UseCarry, 0, 0, 0},
 	x86.ASHLB:     {gc.SizeB | gc.LeftRead | RightRdwr | gc.ShiftCX | gc.SetCarry, 0, 0, 0},
 	x86.ASHLL:     {gc.SizeL | gc.LeftRead | RightRdwr | gc.ShiftCX | gc.SetCarry, 0, 0, 0},
 	x86.ASHLQ:     {gc.SizeQ | gc.LeftRead | RightRdwr | gc.ShiftCX | gc.SetCarry, 0, 0, 0},
diff --git a/src/cmd/internal/gc/cgen.go b/src/cmd/internal/gc/cgen.go
index 3a3a4c6..b6691ef 100644
--- a/src/cmd/internal/gc/cgen.go
+++ b/src/cmd/internal/gc/cgen.go
@@ -345,20 +345,12 @@
 		Dump("cgen-res", res)
 		Fatal("cgen: unknown op %v", Nconv(n, obj.FmtShort|obj.FmtSign))
 
-	// these call bgen to get a bool value
 	case OOROR, OANDAND,
 		OEQ, ONE,
 		OLT, OLE,
 		OGE, OGT,
 		ONOT:
-		p1 := Gbranch(obj.AJMP, nil, 0)
-		p2 := Pc
-		Thearch.Gmove(Nodbool(true), res)
-		p3 := Gbranch(obj.AJMP, nil, 0)
-		Patch(p1, Pc)
-		Bgen(n, true, 0, p2)
-		Thearch.Gmove(Nodbool(false), res)
-		Patch(p3, Pc)
+		Bvgen(n, res, true)
 		return
 
 	case OPLUS:
@@ -1640,10 +1632,55 @@
 // 		goto to
 // 	}
 func Bgen(n *Node, wantTrue bool, likely int, to *obj.Prog) {
-	if Debug['g'] != 0 {
-		fmt.Printf("\nbgen wantTrue=%t likely=%d to=%v\n", wantTrue, likely, to)
-		Dump("bgen", n)
+	bgenx(n, nil, wantTrue, likely, to)
+}
+
+// Bvgen generates code for calculating boolean values:
+// 	res = n == wantTrue
+func Bvgen(n, res *Node, wantTrue bool) {
+	if Thearch.Ginsboolval == nil {
+		// Direct value generation not implemented for this architecture.
+		// Implement using jumps.
+		bvgenjump(n, res, wantTrue, true)
+		return
 	}
+	bgenx(n, res, wantTrue, 0, nil)
+}
+
+// bvgenjump implements boolean value generation using jumps:
+// 	if n == wantTrue {
+// 		res = 1
+// 	} else {
+// 		res = 0
+// 	}
+// geninit controls whether n's Ninit is generated.
+func bvgenjump(n, res *Node, wantTrue, geninit bool) {
+	init := n.Ninit
+	if !geninit {
+		n.Ninit = nil
+	}
+	p1 := Gbranch(obj.AJMP, nil, 0)
+	p2 := Pc
+	Thearch.Gmove(Nodbool(true), res)
+	p3 := Gbranch(obj.AJMP, nil, 0)
+	Patch(p1, Pc)
+	Bgen(n, wantTrue, 0, p2)
+	Thearch.Gmove(Nodbool(false), res)
+	Patch(p3, Pc)
+	n.Ninit = init
+}
+
+// bgenx is the backend for Bgen and Bvgen.
+// If res is nil, it generates a branch.
+// Otherwise, it generates a boolean value.
+func bgenx(n, res *Node, wantTrue bool, likely int, to *obj.Prog) {
+	if Debug['g'] != 0 {
+		fmt.Printf("\nbgenx wantTrue=%t likely=%d to=%v\n", wantTrue, likely, to)
+		Dump("n", n)
+		Dump("res", res)
+	}
+
+	genval := res != nil
 
 	if n == nil {
 		n = Nodbool(true)
@@ -1659,9 +1696,7 @@
 	}
 
 	if n.Type.Etype != TBOOL {
-		Yyerror("cgen: bad type %v for %v", n.Type, Oconv(int(n.Op), 0))
-		Patch(Thearch.Gins(obj.AEND, nil, nil), to)
-		return
+		Fatal("bgen: bad type %v for %v", n.Type, Oconv(int(n.Op), 0))
 	}
 
 	for n.Op == OCONVNOP {
@@ -1670,44 +1705,90 @@
 	}
 
 	if Thearch.Bgen_float != nil && n.Left != nil && Isfloat[n.Left.Type.Etype] {
+		if genval {
+			bvgenjump(n, res, wantTrue, false)
+			return
+		}
 		Thearch.Bgen_float(n, wantTrue, likely, to)
 		return
 	}
 
 	switch n.Op {
 	default:
+		if genval {
+			Cgen(n, res)
+			if !wantTrue {
+				Thearch.Gins(Thearch.Optoas(OXOR, Types[TUINT8]), Nodintconst(1), res)
+			}
+			return
+		}
+
 		var tmp Node
 		Regalloc(&tmp, n.Type, nil)
 		Cgen(n, &tmp)
-		bgenNonZero(&tmp, wantTrue, likely, to)
+		bgenNonZero(&tmp, nil, wantTrue, likely, to)
 		Regfree(&tmp)
 		return
 
 	case ONAME:
+		if genval {
+			// 5g, 7g, and 9g might need a temporary or other help here,
+			// but they don't support direct generation of a bool value yet.
+			// We can fix that as we go.
+			switch Ctxt.Arch.Thechar {
+			case '5', '7', '9':
+				Fatal("genval 5g, 7g, 9g ONAMES not fully implemented")
+			}
+			Cgen(n, res)
+			if !wantTrue {
+				Thearch.Gins(Thearch.Optoas(OXOR, Types[TUINT8]), Nodintconst(1), res)
+			}
+			return
+		}
+
 		if n.Addable && Ctxt.Arch.Thechar != '5' && Ctxt.Arch.Thechar != '7' && Ctxt.Arch.Thechar != '9' {
 			// no need for a temporary
-			bgenNonZero(n, wantTrue, likely, to)
+			bgenNonZero(n, nil, wantTrue, likely, to)
 			return
 		}
 		var tmp Node
 		Regalloc(&tmp, n.Type, nil)
 		Cgen(n, &tmp)
-		bgenNonZero(&tmp, wantTrue, likely, to)
+		bgenNonZero(&tmp, nil, wantTrue, likely, to)
 		Regfree(&tmp)
 		return
 
 	case OLITERAL:
-		// n is a constant. If n == wantTrue, jump; otherwise do nothing.
+		// n is a constant.
 		if !Isconst(n, CTBOOL) {
 			Fatal("bgen: non-bool const %v\n", Nconv(n, obj.FmtLong))
 		}
+		if genval {
+			Cgen(Nodbool(wantTrue == n.Val.U.Bval), res)
+			return
+		}
+		// If n == wantTrue, jump; otherwise do nothing.
 		if wantTrue == n.Val.U.Bval {
 			Patch(Gbranch(obj.AJMP, nil, likely), to)
 		}
 		return
 
 	case OANDAND, OOROR:
-		if (n.Op == OANDAND) == wantTrue {
+		and := (n.Op == OANDAND) == wantTrue
+		if genval {
+			p1 := Gbranch(obj.AJMP, nil, 0)
+			p2 := Gbranch(obj.AJMP, nil, 0)
+			Patch(p2, Pc)
+			Cgen(Nodbool(!and), res)
+			p3 := Gbranch(obj.AJMP, nil, 0)
+			Patch(p1, Pc)
+			Bgen(n.Left, wantTrue != and, 0, p2)
+			Bvgen(n.Right, res, wantTrue)
+			Patch(p3, Pc)
+			return
+		}
+
+		if and {
 			p1 := Gbranch(obj.AJMP, nil, 0)
 			p2 := Gbranch(obj.AJMP, nil, 0)
 			Patch(p1, Pc)
@@ -1726,7 +1807,7 @@
 		if n.Left == nil || n.Left.Type == nil {
 			return
 		}
-		Bgen(n.Left, !wantTrue, likely, to)
+		bgenx(n.Left, res, !wantTrue, likely, to)
 		return
 
 	case OEQ, ONE, OLT, OGT, OLE, OGE:
@@ -1743,15 +1824,21 @@
 	if !wantTrue {
 		if Isfloat[nr.Type.Etype] {
 			// Brcom is not valid on floats when NaN is involved.
+			ll := n.Ninit // avoid re-genning Ninit
+			n.Ninit = nil
+			if genval {
+				bgenx(n, res, true, likely, to)
+				Thearch.Gins(Thearch.Optoas(OXOR, Types[TUINT8]), Nodintconst(1), res) // res = !res
+				n.Ninit = ll
+				return
+			}
 			p1 := Gbranch(obj.AJMP, nil, 0)
 			p2 := Gbranch(obj.AJMP, nil, 0)
 			Patch(p1, Pc)
-			ll := n.Ninit // avoid re-genning Ninit
-			n.Ninit = nil
-			Bgen(n, true, -likely, p2)
-			n.Ninit = ll
+			bgenx(n, res, true, -likely, p2)
 			Patch(Gbranch(obj.AJMP, nil, 0), to)
 			Patch(p2, Pc)
+			n.Ninit = ll
 			return
 		}
 
@@ -1786,17 +1873,22 @@
 		Regalloc(&tmp, ptr.Type, &ptr)
 		Cgen(&ptr, &tmp)
 		Regfree(&ptr)
-		bgenNonZero(&tmp, a == OEQ != wantTrue, likely, to)
+		bgenNonZero(&tmp, res, a == OEQ != wantTrue, likely, to)
 		Regfree(&tmp)
 		return
 	}
 
 	if Iscomplex[nl.Type.Etype] {
-		complexbool(a, nl, nr, wantTrue, likely, to)
+		complexbool(a, nl, nr, res, wantTrue, likely, to)
 		return
 	}
 
 	if Ctxt.Arch.Regsize == 4 && Is64(nr.Type) {
+		if genval {
+			// TODO: Teach Cmp64 to generate boolean values and remove this.
+			bvgenjump(n, res, wantTrue, false)
+			return
+		}
 		if !nl.Addable || Isconst(nl, CTINT) {
 			nl = CgenTemp(nl)
 		}
@@ -1838,7 +1930,7 @@
 
 		if Smallintconst(nr) && Ctxt.Arch.Thechar != '9' {
 			Thearch.Gins(Thearch.Optoas(OCMP, nr.Type), nl, nr)
-			Patch(Gbranch(Thearch.Optoas(a, nr.Type), nr.Type, likely), to)
+			bins(nr.Type, res, a, likely, to)
 			return
 		}
 
@@ -1869,6 +1961,9 @@
 	if Isfloat[nl.Type.Etype] {
 		switch Ctxt.Arch.Thechar {
 		case '5':
+			if genval {
+				Fatal("genval 5g Isfloat special cases not implemented")
+			}
 			switch n.Op {
 			case ONE:
 				Patch(Gbranch(Thearch.Optoas(OPS, nr.Type), nr.Type, likely), to)
@@ -1883,19 +1978,40 @@
 			switch n.Op {
 			case OEQ:
 				// neither NE nor P
-				p1 := Gbranch(Thearch.Optoas(ONE, nr.Type), nil, -likely)
-				p2 := Gbranch(Thearch.Optoas(OPS, nr.Type), nil, -likely)
-				Patch(Gbranch(obj.AJMP, nil, 0), to)
-				Patch(p1, Pc)
-				Patch(p2, Pc)
+				if genval {
+					var reg Node
+					Regalloc(&reg, Types[TBOOL], nil)
+					Thearch.Ginsboolval(Thearch.Optoas(OEQ, nr.Type), &reg)
+					Thearch.Ginsboolval(Thearch.Optoas(OPC, nr.Type), res)
+					Thearch.Gins(Thearch.Optoas(OAND, Types[TBOOL]), &reg, res)
+					Regfree(&reg)
+				} else {
+					p1 := Gbranch(Thearch.Optoas(ONE, nr.Type), nil, -likely)
+					p2 := Gbranch(Thearch.Optoas(OPS, nr.Type), nil, -likely)
+					Patch(Gbranch(obj.AJMP, nil, 0), to)
+					Patch(p1, Pc)
+					Patch(p2, Pc)
+				}
 				return
 			case ONE:
 				// either NE or P
-				Patch(Gbranch(Thearch.Optoas(ONE, nr.Type), nil, likely), to)
-				Patch(Gbranch(Thearch.Optoas(OPS, nr.Type), nil, likely), to)
+				if genval {
+					var reg Node
+					Regalloc(&reg, Types[TBOOL], nil)
+					Thearch.Ginsboolval(Thearch.Optoas(ONE, nr.Type), &reg)
+					Thearch.Ginsboolval(Thearch.Optoas(OPS, nr.Type), res)
+					Thearch.Gins(Thearch.Optoas(OOR, Types[TBOOL]), &reg, res)
+					Regfree(&reg)
+				} else {
+					Patch(Gbranch(Thearch.Optoas(ONE, nr.Type), nil, likely), to)
+					Patch(Gbranch(Thearch.Optoas(OPS, nr.Type), nil, likely), to)
+				}
 				return
 			}
 		case '7', '9':
+			if genval {
+				Fatal("genval 7g, 9g Isfloat special cases not implemented")
+			}
 			switch n.Op {
 			// On arm64 and ppc64, <= and >= mishandle NaN. Must decompose into < or > and =.
 			// TODO(josh): Convert a <= b to b > a instead?
@@ -1912,11 +2028,11 @@
 		}
 	}
 
-	// Not a special case. Insert an appropriate conditional jump.
-	Patch(Gbranch(Thearch.Optoas(a, nr.Type), nr.Type, likely), to)
+	// Not a special case. Insert the conditional jump or value gen.
+	bins(nr.Type, res, a, likely, to)
 }
 
-func bgenNonZero(n *Node, wantTrue bool, likely int, to *obj.Prog) {
+func bgenNonZero(n, res *Node, wantTrue bool, likely int, to *obj.Prog) {
 	// TODO: Optimize on systems that can compare to zero easily.
 	a := ONE
 	if !wantTrue {
@@ -1925,7 +2041,21 @@
 	var zero Node
 	Nodconst(&zero, n.Type, 0)
 	Thearch.Gins(Thearch.Optoas(OCMP, n.Type), n, &zero)
-	Patch(Gbranch(Thearch.Optoas(a, n.Type), n.Type, likely), to)
+	bins(n.Type, res, a, likely, to)
+}
+
+// bins inserts an instruction to handle the result of a compare.
+// If res is non-nil, it inserts appropriate value generation instructions.
+// If res is nil, it inserts a branch to to.
+func bins(typ *Type, res *Node, a, likely int, to *obj.Prog) {
+	a = Thearch.Optoas(a, typ)
+	if res != nil {
+		// value gen
+		Thearch.Ginsboolval(a, res)
+	} else {
+		// jump
+		Patch(Gbranch(a, typ, likely), to)
+	}
 }
 
 /*
diff --git a/src/cmd/internal/gc/cplx.go b/src/cmd/internal/gc/cplx.go
index c457bbf..cf48c92 100644
--- a/src/cmd/internal/gc/cplx.go
+++ b/src/cmd/internal/gc/cplx.go
@@ -14,7 +14,7 @@
 	return f.Op == OINDREG && t.Op == OINDREG && f.Xoffset+f.Type.Width >= t.Xoffset && t.Xoffset+t.Type.Width >= f.Xoffset
 }
 
-func complexbool(op int, nl, nr *Node, wantTrue bool, likely int, to *obj.Prog) {
+func complexbool(op int, nl, nr, res *Node, wantTrue bool, likely int, to *obj.Prog) {
 	// make both sides addable in ullman order
 	if nr != nil {
 		if nl.Ullman > nr.Ullman && !nl.Addable {
@@ -35,7 +35,10 @@
 	subnode(&rreal, &rimag, nr)
 
 	// build tree
-	// real(l) == real(r) && imag(l) == imag(r)
+	// if branching:
+	// 	real(l) == real(r) && imag(l) == imag(r)
+	// if generating a value, use a branch-free version:
+	// 	real(l) == real(r) & imag(l) == imag(r)
 	realeq := Node{
 		Op:    OEQ,
 		Left:  &lreal,
@@ -55,6 +58,19 @@
 		Type:  Types[TBOOL],
 	}
 
+	if res != nil {
+		// generating a value
+		and.Op = OAND
+		if op == ONE {
+			and.Op = OOR
+			realeq.Op = ONE
+			imageq.Op = ONE
+		}
+		Bvgen(&and, res, true)
+		return
+	}
+
+	// generating a branch
 	if op == ONE {
 		wantTrue = !wantTrue
 	}
diff --git a/src/cmd/internal/gc/go.go b/src/cmd/internal/gc/go.go
index c0ec7b5..d399ebb 100644
--- a/src/cmd/internal/gc/go.go
+++ b/src/cmd/internal/gc/go.go
@@ -791,6 +791,13 @@
 	Expandchecks func(*obj.Prog)
 	Getg         func(*Node)
 	Gins         func(int, *Node, *Node) *obj.Prog
+	// Ginsboolval inserts instructions to convert the result
+	// of a just-completed comparison to a boolean value.
+	// The first argument is the conditional jump instruction
+	// corresponding to the desired value.
+	// The second argument is the destination.
+	// If not present, Ginsboolval will be emulated with jumps.
+	Ginsboolval  func(int, *Node)
 	Ginscon      func(int, int64, *Node)
 	Ginsnop      func()
 	Gmove        func(*Node, *Node)
diff --git a/src/cmd/internal/gc/syntax.go b/src/cmd/internal/gc/syntax.go
index 1012c66..d448188 100644
--- a/src/cmd/internal/gc/syntax.go
+++ b/src/cmd/internal/gc/syntax.go
@@ -306,6 +306,7 @@
 	ORROTC  // right rotate-carry: ARCR.
 	ORETJMP // return to other function
 	OPS     // compare parity set (for x86 NaN check)
+	OPC     // compare parity clear (for x86 NaN check)
 	OSQRT   // sqrt(float64), on systems that have hw support
 	OGETG   // runtime.getg() (read g pointer)