[dev.ssa] cmd/compile/internal/ssa: fix shift operations

Convert shift ops to also encode the size of the shift amount.

Change signed right shift from using CMOV to using bit twiddles.
It is a little bit better (5 instructions instead of 4, but fewer
bytes and slightly faster code).  It's also a bit faster than
the 4-instruction branch version, even with a very predictable
branch.  As tested on my machine, YMMV.

Implement OCOM while we are here.

Change-Id: I8ca12dd62fae5d626dc0e6da5d4bbd34fd9640d2
Reviewed-on: https://go-review.googlesource.com/12867
Reviewed-by: Josh Bleecher Snyder <josharian@gmail.com>
diff --git a/src/cmd/compile/internal/ssa/gen/genericOps.go b/src/cmd/compile/internal/ssa/gen/genericOps.go
index 9e71dbd..bc1fdc8 100644
--- a/src/cmd/compile/internal/ssa/gen/genericOps.go
+++ b/src/cmd/compile/internal/ssa/gen/genericOps.go
@@ -42,19 +42,57 @@
 	{name: "Xor32"},
 	{name: "Xor64"},
 
-	{name: "Lsh8"}, // arg0 << arg1
-	{name: "Lsh16"},
-	{name: "Lsh32"},
-	{name: "Lsh64"},
+	// For shifts, AxB means the shifted value has A bits and the shift amount has B bits.
+	{name: "Lsh8x8"}, // arg0 << arg1
+	{name: "Lsh8x16"},
+	{name: "Lsh8x32"},
+	{name: "Lsh8x64"},
+	{name: "Lsh16x8"},
+	{name: "Lsh16x16"},
+	{name: "Lsh16x32"},
+	{name: "Lsh16x64"},
+	{name: "Lsh32x8"},
+	{name: "Lsh32x16"},
+	{name: "Lsh32x32"},
+	{name: "Lsh32x64"},
+	{name: "Lsh64x8"},
+	{name: "Lsh64x16"},
+	{name: "Lsh64x32"},
+	{name: "Lsh64x64"},
 
-	{name: "Rsh8"}, // arg0 >> arg1
-	{name: "Rsh8U"},
-	{name: "Rsh16"},
-	{name: "Rsh16U"},
-	{name: "Rsh32"},
-	{name: "Rsh32U"},
-	{name: "Rsh64"},
-	{name: "Rsh64U"},
+	{name: "Rsh8x8"}, // arg0 >> arg1, signed
+	{name: "Rsh8x16"},
+	{name: "Rsh8x32"},
+	{name: "Rsh8x64"},
+	{name: "Rsh16x8"},
+	{name: "Rsh16x16"},
+	{name: "Rsh16x32"},
+	{name: "Rsh16x64"},
+	{name: "Rsh32x8"},
+	{name: "Rsh32x16"},
+	{name: "Rsh32x32"},
+	{name: "Rsh32x64"},
+	{name: "Rsh64x8"},
+	{name: "Rsh64x16"},
+	{name: "Rsh64x32"},
+	{name: "Rsh64x64"},
+
+	{name: "Rsh8Ux8"}, // arg0 >> arg1, unsigned
+	{name: "Rsh8Ux16"},
+	{name: "Rsh8Ux32"},
+	{name: "Rsh8Ux64"},
+	{name: "Rsh16Ux8"},
+	{name: "Rsh16Ux16"},
+	{name: "Rsh16Ux32"},
+	{name: "Rsh16Ux64"},
+	{name: "Rsh32Ux8"},
+	{name: "Rsh32Ux16"},
+	{name: "Rsh32Ux32"},
+	{name: "Rsh32Ux64"},
+	{name: "Rsh64Ux8"},
+	{name: "Rsh64Ux16"},
+	{name: "Rsh64Ux32"},
+	{name: "Rsh64Ux64"},
 
 	// 2-input comparisons
 	{name: "Eq8"}, // arg0 == arg1
@@ -110,11 +148,16 @@
 	// 1-input ops
 	{name: "Not"}, // !arg0
 
-	{name: "Neg8"}, // - arg0
+	{name: "Neg8"}, // -arg0
 	{name: "Neg16"},
 	{name: "Neg32"},
 	{name: "Neg64"},
 
+	{name: "Com8"}, // ^arg0
+	{name: "Com16"},
+	{name: "Com32"},
+	{name: "Com64"},
+
 	// Data movement
 	{name: "Phi"},  // select an argument based on which predecessor block we came from
 	{name: "Copy"}, // output = arg0