cmd/compile: change riscv64 Eq32/Neq32 to zero extend before subtraction

As done with other equality tests, zero extend before subtraction rather than
after (or in this case, at the same time). While at face value this appears to
require more instructions, in reality it allows for most sign extensions to
be completely eliminated due to correctly typed loads. Existing optimisations
(such as subtraction of zero) then become more effective.

This removes more than 10,000 instructions from the Go binary and in particular,
a writeBarrier check only requires three instructions (AUIPC, LWU, BNEZ) instead
of the current four (AUIPC, LWU, NEGW, BNEZ).

Change-Id: I7afdc1921c4916ddbd414c3b3f5c2089107ec016
Reviewed-on: https://go-review.googlesource.com/c/go/+/274066
Trust: Joel Sing <joel@sing.id.au>
Reviewed-by: Cherry Zhang <cherryyz@google.com>
Run-TryBot: Cherry Zhang <cherryyz@google.com>
diff --git a/src/cmd/compile/internal/ssa/gen/RISCV64.rules b/src/cmd/compile/internal/ssa/gen/RISCV64.rules
index 4380a5e..15361fd 100644
--- a/src/cmd/compile/internal/ssa/gen/RISCV64.rules
+++ b/src/cmd/compile/internal/ssa/gen/RISCV64.rules
@@ -251,7 +251,7 @@
 
 (EqPtr x y) => (SEQZ (SUB <x.Type> x y))
 (Eq64  x y) => (SEQZ (SUB <x.Type> x y))
-(Eq32  x y) => (SEQZ (SUBW <x.Type> x y))
+(Eq32  x y) => (SEQZ (SUB <x.Type> (ZeroExt32to64 x) (ZeroExt32to64 y)))
 (Eq16  x y) => (SEQZ (SUB <x.Type> (ZeroExt16to64 x) (ZeroExt16to64 y)))
 (Eq8   x y) => (SEQZ (SUB <x.Type> (ZeroExt8to64  x) (ZeroExt8to64  y)))
 (Eq64F ...) => (FEQD ...)
@@ -259,7 +259,7 @@
 
 (NeqPtr x y) => (SNEZ (SUB <x.Type> x y))
 (Neq64  x y) => (SNEZ (SUB <x.Type> x y))
-(Neq32  x y) => (SNEZ (SUBW <x.Type> x y))
+(Neq32  x y) => (SNEZ (SUB <x.Type> (ZeroExt32to64 x) (ZeroExt32to64 y)))
 (Neq16  x y) => (SNEZ (SUB <x.Type> (ZeroExt16to64 x) (ZeroExt16to64 y)))
 (Neq8   x y) => (SNEZ (SUB <x.Type> (ZeroExt8to64  x) (ZeroExt8to64  y)))
 (Neq64F ...) => (FNED ...)
diff --git a/src/cmd/compile/internal/ssa/rewriteRISCV64.go b/src/cmd/compile/internal/ssa/rewriteRISCV64.go
index fb507b6..1abdf1a 100644
--- a/src/cmd/compile/internal/ssa/rewriteRISCV64.go
+++ b/src/cmd/compile/internal/ssa/rewriteRISCV64.go
@@ -890,14 +890,19 @@
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
 	b := v.Block
+	typ := &b.Func.Config.Types
 	// match: (Eq32 x y)
-	// result: (SEQZ (SUBW <x.Type> x y))
+	// result: (SEQZ (SUB <x.Type> (ZeroExt32to64 x) (ZeroExt32to64 y)))
 	for {
 		x := v_0
 		y := v_1
 		v.reset(OpRISCV64SEQZ)
-		v0 := b.NewValue0(v.Pos, OpRISCV64SUBW, x.Type)
-		v0.AddArg2(x, y)
+		v0 := b.NewValue0(v.Pos, OpRISCV64SUB, x.Type)
+		v1 := b.NewValue0(v.Pos, OpZeroExt32to64, typ.UInt64)
+		v1.AddArg(x)
+		v2 := b.NewValue0(v.Pos, OpZeroExt32to64, typ.UInt64)
+		v2.AddArg(y)
+		v0.AddArg2(v1, v2)
 		v.AddArg(v0)
 		return true
 	}
@@ -2466,14 +2471,19 @@
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
 	b := v.Block
+	typ := &b.Func.Config.Types
 	// match: (Neq32 x y)
-	// result: (SNEZ (SUBW <x.Type> x y))
+	// result: (SNEZ (SUB <x.Type> (ZeroExt32to64 x) (ZeroExt32to64 y)))
 	for {
 		x := v_0
 		y := v_1
 		v.reset(OpRISCV64SNEZ)
-		v0 := b.NewValue0(v.Pos, OpRISCV64SUBW, x.Type)
-		v0.AddArg2(x, y)
+		v0 := b.NewValue0(v.Pos, OpRISCV64SUB, x.Type)
+		v1 := b.NewValue0(v.Pos, OpZeroExt32to64, typ.UInt64)
+		v1.AddArg(x)
+		v2 := b.NewValue0(v.Pos, OpZeroExt32to64, typ.UInt64)
+		v2.AddArg(y)
+		v0.AddArg2(v1, v2)
 		v.AddArg(v0)
 		return true
 	}