[dev.ssa] cmd/compile: handle floating point on ARM

Machine supports (or the runtime simulates in soft float mode)
(u)int32<->float conversions. The frontend rewrites int64<->float
conversions to call to runtime function.

For int64->float32 conversion, the frontend generates

.   .   AS u(100) l(10) tc(1)
.   .   .   NAME-main.~r1 u(1) a(true) g(1) l(9) x(8+0) class(PPARAMOUT) f(1) float32
.   .   .   CALLFUNC u(100) l(10) tc(1) float32
.   .   .   .   NAME-runtime.int64tofloat64 u(1) a(true) x(0+0) class(PFUNC) tc(1) used(true) FUNC-func(int64) float64

The CALLFUNC node has type float32, whereas runtime.int64tofloat64
returns float64. The legacy backend implicitly makes a float64->float32
conversion. The SSA backend does not do implicit conversion, so we
insert an explicit CONV here.

All cmd/compile/internal/gc/testdata/*_ssa.go tests passed.

Progress on SSA for ARM. Still not complete.

Update #15365.

Change-Id: I30937c8ff977271246b068f48224693776804339
Reviewed-on: https://go-review.googlesource.com/23652
Reviewed-by: Keith Randall <khr@golang.org>
diff --git a/src/cmd/compile/internal/ssa/gen/ARM.rules b/src/cmd/compile/internal/ssa/gen/ARM.rules
index f36cf6a..79d1868 100644
--- a/src/cmd/compile/internal/ssa/gen/ARM.rules
+++ b/src/cmd/compile/internal/ssa/gen/ARM.rules
@@ -6,6 +6,8 @@
 (Add32 x y) -> (ADD x y)
 (Add16 x y) -> (ADD x y)
 (Add8 x y) -> (ADD x y)
+(Add32F x y) -> (ADDF x y)
+(Add64F x y) -> (ADDD x y)
 
 (Add32carry x y) -> (ADDS x y)
 (Add32withcarry x y c) -> (ADC x y c)
@@ -14,6 +16,8 @@
 (Sub32 x y) -> (SUB x y)
 (Sub16 x y) -> (SUB x y)
 (Sub8 x y) -> (SUB x y)
+(Sub32F x y) -> (SUBF x y)
+(Sub64F x y) -> (SUBD x y)
 
 (Sub32carry x y) -> (SUBS x y)
 (Sub32withcarry x y c) -> (SBC x y c)
@@ -21,6 +25,8 @@
 (Mul32 x y) -> (MUL x y)
 (Mul16 x y) -> (MUL x y)
 (Mul8 x y) -> (MUL x y)
+(Mul32F x y) -> (MULF x y)
+(Mul64F x y) -> (MULD x y)
 
 (Hmul32 x y) -> (HMUL x y)
 (Hmul32u x y) -> (HMULU x y)
@@ -37,6 +43,8 @@
 (Div16u x y) -> (DIVU (ZeroExt16to32 x) (ZeroExt16to32 y))
 (Div8 x y) -> (DIV (SignExt8to32 x) (SignExt8to32 y))
 (Div8u x y) -> (DIVU (ZeroExt8to32 x) (ZeroExt8to32 y))
+(Div32F x y) -> (DIVF x y)
+(Div64F x y) -> (DIVD x y)
 
 (Mod32 x y) -> (MOD x y)
 (Mod32u x y) -> (MODU x y)
@@ -61,11 +69,16 @@
 (Neg32 x) -> (RSBconst [0] x)
 (Neg16 x) -> (RSBconst [0] x)
 (Neg8 x) -> (RSBconst [0] x)
+//TODO: implement NEGF, NEGD in assembler and soft float simulator, and use them.
+(Neg32F x) -> (MULF (MOVFconst [int64(math.Float64bits(-1))]) x)
+(Neg64F x) -> (MULD (MOVDconst [int64(math.Float64bits(-1))]) x)
 
 (Com32 x) -> (MVN x)
 (Com16 x) -> (MVN x)
 (Com8 x) -> (MVN x)
 
+(Sqrt x) -> (SQRTD x)
+
 // boolean ops -- booleans are represented with 0=false, 1=true
 (AndB x y) -> (AND x y)
 (OrB x y) -> (OR x y)
@@ -143,6 +156,8 @@
 (Const8 [val]) -> (MOVWconst [val])
 (Const16 [val]) -> (MOVWconst [val])
 (Const32 [val]) -> (MOVWconst [val])
+(Const32F [val]) -> (MOVFconst [val])
+(Const64F [val]) -> (MOVDconst [val])
 (ConstNil) -> (MOVWconst [0])
 (ConstBool [b]) -> (MOVWconst [b])
 
@@ -164,20 +179,38 @@
 (Signmask x) -> (SRAconst x [31])
 (Zeromask x) -> (LoweredZeromask x)
 
+// float <-> int conversion
+(Cvt32to32F x) -> (MOVWF x)
+(Cvt32to64F x) -> (MOVWD x)
+(Cvt32Uto32F x) -> (MOVWUF x)
+(Cvt32Uto64F x) -> (MOVWUD x)
+(Cvt32Fto32 x) -> (MOVFW x)
+(Cvt64Fto32 x) -> (MOVDW x)
+(Cvt32Fto32U x) -> (MOVFWU x)
+(Cvt64Fto32U x) -> (MOVDWU x)
+(Cvt32Fto64F x) -> (MOVFD x)
+(Cvt64Fto32F x) -> (MOVDF x)
+
 // comparisons
 (Eq8 x y)  -> (Equal (CMP (ZeroExt8to32 x) (ZeroExt8to32 y)))
 (Eq16 x y) -> (Equal (CMP (ZeroExt16to32 x) (ZeroExt16to32 y)))
 (Eq32 x y) -> (Equal (CMP x y))
 (EqPtr x y) -> (Equal (CMP x y))
+(Eq32F x y) -> (Equal (CMPF x y))
+(Eq64F x y) -> (Equal (CMPD x y))
 
 (Neq8 x y)  -> (NotEqual (CMP (ZeroExt8to32 x) (ZeroExt8to32 y)))
 (Neq16 x y) -> (NotEqual (CMP (ZeroExt16to32 x) (ZeroExt16to32 y)))
 (Neq32 x y) -> (NotEqual (CMP x y))
 (NeqPtr x y) -> (NotEqual (CMP x y))
+(Neq32F x y) -> (NotEqual (CMPF x y))
+(Neq64F x y) -> (NotEqual (CMPD x y))
 
 (Less8 x y)  -> (LessThan (CMP (SignExt8to32 x) (SignExt8to32 y)))
 (Less16 x y) -> (LessThan (CMP (SignExt16to32 x) (SignExt16to32 y)))
 (Less32 x y) -> (LessThan (CMP x y))
+(Less32F x y) -> (GreaterThan (CMPF y x)) // reverse operands to work around NaN
+(Less64F x y) -> (GreaterThan (CMPD y x)) // reverse operands to work around NaN
 
 (Less8U x y)  -> (LessThanU (CMP (ZeroExt8to32 x) (ZeroExt8to32 y)))
 (Less16U x y) -> (LessThanU (CMP (ZeroExt16to32 x) (ZeroExt16to32 y)))
@@ -186,6 +219,8 @@
 (Leq8 x y)  -> (LessEqual (CMP (SignExt8to32 x) (SignExt8to32 y)))
 (Leq16 x y) -> (LessEqual (CMP (SignExt16to32 x) (SignExt16to32 y)))
 (Leq32 x y) -> (LessEqual (CMP x y))
+(Leq32F x y) -> (GreaterEqual (CMPF y x)) // reverse operands to work around NaN
+(Leq64F x y) -> (GreaterEqual (CMPD y x)) // reverse operands to work around NaN
 
 (Leq8U x y)  -> (LessEqualU (CMP (ZeroExt8to32 x) (ZeroExt8to32 y)))
 (Leq16U x y) -> (LessEqualU (CMP (ZeroExt16to32 x) (ZeroExt16to32 y)))
@@ -194,6 +229,8 @@
 (Greater8 x y)  -> (GreaterThan (CMP (SignExt8to32 x) (SignExt8to32 y)))
 (Greater16 x y) -> (GreaterThan (CMP (SignExt16to32 x) (SignExt16to32 y)))
 (Greater32 x y) -> (GreaterThan (CMP x y))
+(Greater32F x y) -> (GreaterThan (CMPF x y))
+(Greater64F x y) -> (GreaterThan (CMPD x y))
 
 (Greater8U x y)  -> (GreaterThanU (CMP (ZeroExt8to32 x) (ZeroExt8to32 y)))
 (Greater16U x y) -> (GreaterThanU (CMP (ZeroExt16to32 x) (ZeroExt16to32 y)))
@@ -202,6 +239,8 @@
 (Geq8 x y)  -> (GreaterEqual (CMP (SignExt8to32 x) (SignExt8to32 y)))
 (Geq16 x y) -> (GreaterEqual (CMP (SignExt16to32 x) (SignExt16to32 y)))
 (Geq32 x y) -> (GreaterEqual (CMP x y))
+(Geq32F x y) -> (GreaterEqual (CMPF x y))
+(Geq64F x y) -> (GreaterEqual (CMPD x y))
 
 (Geq8U x y)  -> (GreaterEqualU (CMP (ZeroExt8to32 x) (ZeroExt8to32 y)))
 (Geq16U x y) -> (GreaterEqualU (CMP (ZeroExt16to32 x) (ZeroExt16to32 y)))
@@ -218,11 +257,15 @@
 (Load <t> ptr mem) && (is16BitInt(t) && isSigned(t)) -> (MOVHload ptr mem)
 (Load <t> ptr mem) && (is16BitInt(t) && !isSigned(t)) -> (MOVHUload ptr mem)
 (Load <t> ptr mem) && (is32BitInt(t) || isPtr(t)) -> (MOVWload ptr mem)
+(Load <t> ptr mem) && is32BitFloat(t) -> (MOVFload ptr mem)
+(Load <t> ptr mem) && is64BitFloat(t) -> (MOVDload ptr mem)
 
 // stores
 (Store [1] ptr val mem) -> (MOVBstore ptr val mem)
 (Store [2] ptr val mem) -> (MOVHstore ptr val mem)
-(Store [4] ptr val mem) -> (MOVWstore ptr val mem)
+(Store [4] ptr val mem) && !is32BitFloat(val.Type) -> (MOVWstore ptr val mem)
+(Store [4] ptr val mem) && is32BitFloat(val.Type) -> (MOVFstore ptr val mem)
+(Store [8] ptr val mem) && is64BitFloat(val.Type) -> (MOVDstore ptr val mem)
 
 // zero instructions
 //TODO: check alignment?
@@ -336,6 +379,10 @@
   (MOVHUload [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
 (MOVWload [off1] {sym1} (ADDconst [off2] {sym2} ptr) mem) && canMergeSym(sym1,sym2) ->
   (MOVWload [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
+(MOVFload [off1] {sym1} (ADDconst [off2] {sym2} ptr) mem) && canMergeSym(sym1,sym2) ->
+  (MOVFload [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
+(MOVDload [off1] {sym1} (ADDconst [off2] {sym2} ptr) mem) && canMergeSym(sym1,sym2) ->
+  (MOVDload [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
 
 (MOVBstore [off1] {sym1} (ADDconst [off2] {sym2} ptr) val mem) && canMergeSym(sym1,sym2) ->
   (MOVBstore [off1+off2] {mergeSym(sym1,sym2)} ptr val mem)
@@ -343,6 +390,10 @@
   (MOVHstore [off1+off2] {mergeSym(sym1,sym2)} ptr val mem)
 (MOVWstore [off1] {sym1} (ADDconst [off2] {sym2} ptr) val mem) && canMergeSym(sym1,sym2) ->
   (MOVWstore [off1+off2] {mergeSym(sym1,sym2)} ptr val mem)
+(MOVFstore [off1] {sym1} (ADDconst [off2] {sym2} ptr) val mem) && canMergeSym(sym1,sym2) ->
+  (MOVFstore [off1+off2] {mergeSym(sym1,sym2)} ptr val mem)
+(MOVDstore [off1] {sym1} (ADDconst [off2] {sym2} ptr) val mem) && canMergeSym(sym1,sym2) ->
+  (MOVDstore [off1+off2] {mergeSym(sym1,sym2)} ptr val mem)
 
 (ADD (MUL x y) a) -> (MULA x y a)
 (ADD a (MUL x y)) -> (MULA x y a)
diff --git a/src/cmd/compile/internal/ssa/gen/ARMOps.go b/src/cmd/compile/internal/ssa/gen/ARMOps.go
index 4fc7238..6a76b0a 100644
--- a/src/cmd/compile/internal/ssa/gen/ARMOps.go
+++ b/src/cmd/compile/internal/ssa/gen/ARMOps.go
@@ -22,6 +22,8 @@
 // HU            = 16 bit unsigned
 // B (byte)      = 8 bit
 // BU            = 8 bit unsigned
+// F (float)     = 32 bit float
+// D (double)    = 64 bit float
 
 var regNamesARM = []string{
 	"R0",
@@ -41,6 +43,23 @@
 	"R14", // link
 	"R15", // pc
 
+	"F0",
+	"F1",
+	"F2",
+	"F3",
+	"F4",
+	"F5",
+	"F6",
+	"F7",
+	"F8",
+	"F9",
+	"F10",
+	"F11",
+	"F12",
+	"F13",
+	"F14",
+	"F15", // tmp
+
 	// pseudo-registers
 	"FLAGS",
 	"SB",
@@ -73,7 +92,8 @@
 		gpsp       = gp | buildReg("SP")
 		gpspsb     = gpsp | buildReg("SB")
 		flags      = buildReg("FLAGS")
-		callerSave = gp | flags
+		fp         = buildReg("F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15")
+		callerSave = gp | fp | flags
 	)
 	// Common regInfo
 	var (
@@ -88,6 +108,14 @@
 		gp31      = regInfo{inputs: []regMask{gp, gp, gp}, outputs: []regMask{gp}}
 		gpload    = regInfo{inputs: []regMask{gpspsb}, outputs: []regMask{gp}}
 		gpstore   = regInfo{inputs: []regMask{gpspsb, gp}, outputs: []regMask{}}
+		fp01      = regInfo{inputs: []regMask{}, outputs: []regMask{fp}}
+		fp11      = regInfo{inputs: []regMask{fp}, outputs: []regMask{fp}}
+		fpgp      = regInfo{inputs: []regMask{fp}, outputs: []regMask{gp}}
+		gpfp      = regInfo{inputs: []regMask{gp}, outputs: []regMask{fp}}
+		fp21      = regInfo{inputs: []regMask{fp, fp}, outputs: []regMask{fp}}
+		fp2flags  = regInfo{inputs: []regMask{fp, fp}, outputs: []regMask{flags}}
+		fpload    = regInfo{inputs: []regMask{gpspsb}, outputs: []regMask{fp}}
+		fpstore   = regInfo{inputs: []regMask{gpspsb, fp}, outputs: []regMask{}}
 		readflags = regInfo{inputs: []regMask{flags}, outputs: []regMask{gp}}
 	)
 	ops := []opData{
@@ -114,6 +142,15 @@
 		{name: "MULLU", argLength: 2, reg: regInfo{inputs: []regMask{gp, gp}, outputs: []regMask{gp &^ buildReg("R0")}, clobbers: buildReg("R0")}, asm: "MULLU", commutative: true}, // arg0 * arg1, results 64-bit, high 32-bit in R0
 		{name: "MULA", argLength: 3, reg: gp31, asm: "MULA"},                                                                                                                        // arg0 * arg1 + arg2
 
+		{name: "ADDF", argLength: 2, reg: fp21, asm: "ADDF", commutative: true}, // arg0 + arg1
+		{name: "ADDD", argLength: 2, reg: fp21, asm: "ADDD", commutative: true}, // arg0 + arg1
+		{name: "SUBF", argLength: 2, reg: fp21, asm: "SUBF"},                    // arg0 - arg1
+		{name: "SUBD", argLength: 2, reg: fp21, asm: "SUBD"},                    // arg0 - arg1
+		{name: "MULF", argLength: 2, reg: fp21, asm: "MULF", commutative: true}, // arg0 * arg1
+		{name: "MULD", argLength: 2, reg: fp21, asm: "MULD", commutative: true}, // arg0 * arg1
+		{name: "DIVF", argLength: 2, reg: fp21, asm: "DIVF"},                    // arg0 / arg1
+		{name: "DIVD", argLength: 2, reg: fp21, asm: "DIVD"},                    // arg0 / arg1
+
 		{name: "AND", argLength: 2, reg: gp21, asm: "AND", commutative: true}, // arg0 & arg1
 		{name: "ANDconst", argLength: 1, reg: gp11, asm: "AND", aux: "Int32"}, // arg0 & auxInt
 		{name: "OR", argLength: 2, reg: gp21, asm: "ORR", commutative: true},  // arg0 | arg1
@@ -126,6 +163,8 @@
 		// unary ops
 		{name: "MVN", argLength: 1, reg: gp11, asm: "MVN"}, // ^arg0
 
+		{name: "SQRTD", argLength: 1, reg: fp11, asm: "SQRTD"}, // sqrt(arg0), float64
+
 		// shifts
 		{name: "SLL", argLength: 2, reg: gp21cf, asm: "SLL"},                  // arg0 << arg1, results 0 for large shift
 		{name: "SLLconst", argLength: 1, reg: gp11, asm: "SLL", aux: "Int32"}, // arg0 << auxInt
@@ -143,24 +182,43 @@
 		{name: "TSTconst", argLength: 1, reg: gp1flags, asm: "TST", aux: "Int32", typ: "Flags"}, // arg0 & auxInt compare to 0
 		{name: "TEQ", argLength: 2, reg: gp2flags, asm: "TEQ", typ: "Flags", commutative: true}, // arg0 ^ arg1 compare to 0
 		{name: "TEQconst", argLength: 1, reg: gp1flags, asm: "TEQ", aux: "Int32", typ: "Flags"}, // arg0 ^ auxInt compare to 0
+		{name: "CMPF", argLength: 2, reg: fp2flags, asm: "CMPF", typ: "Flags"},                  // arg0 compare to arg1, float32
+		{name: "CMPD", argLength: 2, reg: fp2flags, asm: "CMPD", typ: "Flags"},                  // arg0 compare to arg1, float64
 
-		{name: "MOVWconst", argLength: 0, reg: gp01, aux: "Int32", asm: "MOVW", typ: "UInt32", rematerializeable: true}, // 32 low bits of auxint
+		{name: "MOVWconst", argLength: 0, reg: gp01, aux: "Int32", asm: "MOVW", typ: "UInt32", rematerializeable: true},    // 32 low bits of auxint
+		{name: "MOVFconst", argLength: 0, reg: fp01, aux: "Float64", asm: "MOVF", typ: "Float32", rematerializeable: true}, // auxint as 64-bit float, convert to 32-bit float
+		{name: "MOVDconst", argLength: 0, reg: fp01, aux: "Float64", asm: "MOVD", typ: "Float64", rematerializeable: true}, // auxint as 64-bit float
 
 		{name: "MOVBload", argLength: 2, reg: gpload, aux: "SymOff", asm: "MOVB", typ: "Int8"},     // load from arg0 + auxInt + aux.  arg1=mem.
 		{name: "MOVBUload", argLength: 2, reg: gpload, aux: "SymOff", asm: "MOVBU", typ: "UInt8"},  // load from arg0 + auxInt + aux.  arg1=mem.
 		{name: "MOVHload", argLength: 2, reg: gpload, aux: "SymOff", asm: "MOVH", typ: "Int16"},    // load from arg0 + auxInt + aux.  arg1=mem.
 		{name: "MOVHUload", argLength: 2, reg: gpload, aux: "SymOff", asm: "MOVHU", typ: "UInt16"}, // load from arg0 + auxInt + aux.  arg1=mem.
 		{name: "MOVWload", argLength: 2, reg: gpload, aux: "SymOff", asm: "MOVW", typ: "UInt32"},   // load from arg0 + auxInt + aux.  arg1=mem.
+		{name: "MOVFload", argLength: 2, reg: fpload, aux: "SymOff", asm: "MOVF", typ: "Float32"},  // load from arg0 + auxInt + aux.  arg1=mem.
+		{name: "MOVDload", argLength: 2, reg: fpload, aux: "SymOff", asm: "MOVD", typ: "Float64"},  // load from arg0 + auxInt + aux.  arg1=mem.
 
 		{name: "MOVBstore", argLength: 3, reg: gpstore, aux: "SymOff", asm: "MOVB", typ: "Mem"}, // store 1 byte of arg1 to arg0 + auxInt + aux.  arg2=mem.
 		{name: "MOVHstore", argLength: 3, reg: gpstore, aux: "SymOff", asm: "MOVH", typ: "Mem"}, // store 2 bytes of arg1 to arg0 + auxInt + aux.  arg2=mem.
 		{name: "MOVWstore", argLength: 3, reg: gpstore, aux: "SymOff", asm: "MOVW", typ: "Mem"}, // store 4 bytes of arg1 to arg0 + auxInt + aux.  arg2=mem.
+		{name: "MOVFstore", argLength: 3, reg: fpstore, aux: "SymOff", asm: "MOVF", typ: "Mem"}, // store 4 bytes of arg1 to arg0 + auxInt + aux.  arg2=mem.
+		{name: "MOVDstore", argLength: 3, reg: fpstore, aux: "SymOff", asm: "MOVD", typ: "Mem"}, // store 8 bytes of arg1 to arg0 + auxInt + aux.  arg2=mem.
 
 		{name: "MOVBreg", argLength: 1, reg: gp11, asm: "MOVBS"},  // move from arg0, sign-extended from byte
 		{name: "MOVBUreg", argLength: 1, reg: gp11, asm: "MOVBU"}, // move from arg0, unsign-extended from byte
 		{name: "MOVHreg", argLength: 1, reg: gp11, asm: "MOVHS"},  // move from arg0, sign-extended from half
 		{name: "MOVHUreg", argLength: 1, reg: gp11, asm: "MOVHU"}, // move from arg0, unsign-extended from half
 
+		{name: "MOVWF", argLength: 1, reg: gpfp, asm: "MOVWF"},  // int32 -> float32
+		{name: "MOVWD", argLength: 1, reg: gpfp, asm: "MOVWD"},  // int32 -> float64
+		{name: "MOVWUF", argLength: 1, reg: gpfp, asm: "MOVWF"}, // uint32 -> float32, set U bit in the instruction
+		{name: "MOVWUD", argLength: 1, reg: gpfp, asm: "MOVWD"}, // uint32 -> float64, set U bit in the instruction
+		{name: "MOVFW", argLength: 1, reg: fpgp, asm: "MOVFW"},  // float32 -> int32
+		{name: "MOVDW", argLength: 1, reg: fpgp, asm: "MOVDW"},  // float64 -> int32
+		{name: "MOVFWU", argLength: 1, reg: fpgp, asm: "MOVFW"}, // float32 -> uint32, set U bit in the instruction
+		{name: "MOVDWU", argLength: 1, reg: fpgp, asm: "MOVDW"}, // float64 -> uint32, set U bit in the instruction
+		{name: "MOVFD", argLength: 1, reg: fp11, asm: "MOVFD"},  // float32 -> float64
+		{name: "MOVDF", argLength: 1, reg: fp11, asm: "MOVDF"},  // float64 -> float32
+
 		{name: "CALLstatic", argLength: 1, reg: regInfo{clobbers: callerSave}, aux: "SymOff"},                                // call static function aux.(*gc.Sym).  arg0=mem, auxint=argsize, returns mem
 		{name: "CALLclosure", argLength: 3, reg: regInfo{[]regMask{gpsp, buildReg("R7"), 0}, callerSave, nil}, aux: "Int64"}, // call function via closure.  arg0=codeptr, arg1=closure, arg2=mem, auxint=argsize, returns mem
 		{name: "CALLdefer", argLength: 1, reg: regInfo{clobbers: callerSave}, aux: "Int64"},                                  // call deferproc.  arg0=mem, auxint=argsize, returns mem
@@ -290,7 +348,7 @@
 		blocks:          blocks,
 		regnames:        regNamesARM,
 		gpregmask:       gp,
-		fpregmask:       0, // fp not implemented yet
+		fpregmask:       fp,
 		flagmask:        flags,
 		framepointerreg: -1, // not used
 	})
diff --git a/src/cmd/compile/internal/ssa/gen/genericOps.go b/src/cmd/compile/internal/ssa/gen/genericOps.go
index 72b7f6f..e35da2b 100644
--- a/src/cmd/compile/internal/ssa/gen/genericOps.go
+++ b/src/cmd/compile/internal/ssa/gen/genericOps.go
@@ -433,6 +433,11 @@
 	{name: "Signmask", argLength: 1, typ: "Int32"},  // 0 if arg0 >= 0, -1 if arg0 < 0
 	{name: "Zeromask", argLength: 1, typ: "UInt32"}, // 0 if arg0 == 0, 0xffffffff if arg0 != 0
 
+	{name: "Cvt32Uto32F", argLength: 1}, // uint32 -> float32, only used on 32-bit arch
+	{name: "Cvt32Uto64F", argLength: 1}, // uint32 -> float64, only used on 32-bit arch
+	{name: "Cvt32Fto32U", argLength: 1}, // float32 -> uint32, only used on 32-bit arch
+	{name: "Cvt64Fto32U", argLength: 1}, // float64 -> uint32, only used on 32-bit arch
+
 	// pseudo-ops for breaking Tuple
 	{name: "Select0", argLength: 1}, // the first component of a tuple
 	{name: "Select1", argLength: 1}, // the second component of a tuple