[dev.ssa] cmd/compile: promote byte/word operation

Writing to low 8/16 bits of register creates false dependency
Generate 32-bit operations when possible.

Change-Id: I8eb6c1c43a66424eec6baa91a660bceb6b80d1d3
Reviewed-on: https://go-review.googlesource.com/19506
Reviewed-by: Keith Randall <khr@golang.org>
Run-TryBot: Ilya Tocar <ilya.tocar@intel.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
diff --git a/src/cmd/compile/internal/gc/ssa.go b/src/cmd/compile/internal/gc/ssa.go
index 0081146..a2454e1 100644
--- a/src/cmd/compile/internal/gc/ssa.go
+++ b/src/cmd/compile/internal/gc/ssa.go
@@ -3793,7 +3793,7 @@
 			case ssa.OpAMD64ADDL:
 				asm = x86.ALEAL
 			case ssa.OpAMD64ADDW:
-				asm = x86.ALEAW
+				asm = x86.ALEAL
 			}
 			p := Prog(asm)
 			p.From.Type = obj.TYPE_MEM
@@ -3843,9 +3843,15 @@
 		opregreg(v.Op.Asm(), r, y)
 
 		if neg {
-			p := Prog(x86.ANEGQ) // TODO: use correct size?  This is mostly a hack until regalloc does 2-address correctly
-			p.To.Type = obj.TYPE_REG
-			p.To.Reg = r
+			if v.Op == ssa.OpAMD64SUBQ {
+				p := Prog(x86.ANEGQ)
+				p.To.Type = obj.TYPE_REG
+				p.To.Reg = r
+			} else { // Avoids partial registers write
+				p := Prog(x86.ANEGL)
+				p.To.Type = obj.TYPE_REG
+				p.To.Reg = r
+			}
 		}
 	case ssa.OpAMD64SUBSS, ssa.OpAMD64SUBSD, ssa.OpAMD64DIVSS, ssa.OpAMD64DIVSD:
 		r := regnum(v)
@@ -4035,7 +4041,7 @@
 				case ssa.OpAMD64ADDLconst:
 					asm = x86.AINCL
 				case ssa.OpAMD64ADDWconst:
-					asm = x86.AINCW
+					asm = x86.AINCL
 				}
 				p := Prog(asm)
 				p.To.Type = obj.TYPE_REG
@@ -4049,7 +4055,7 @@
 				case ssa.OpAMD64ADDLconst:
 					asm = x86.ADECL
 				case ssa.OpAMD64ADDWconst:
-					asm = x86.ADECW
+					asm = x86.ADECL
 				}
 				p := Prog(asm)
 				p.To.Type = obj.TYPE_REG
@@ -4071,7 +4077,7 @@
 		case ssa.OpAMD64ADDLconst:
 			asm = x86.ALEAL
 		case ssa.OpAMD64ADDWconst:
-			asm = x86.ALEAW
+			asm = x86.ALEAL
 		}
 		p := Prog(asm)
 		p.From.Type = obj.TYPE_MEM
@@ -4131,7 +4137,7 @@
 			case ssa.OpAMD64SUBLconst:
 				asm = x86.AINCL
 			case ssa.OpAMD64SUBWconst:
-				asm = x86.AINCW
+				asm = x86.AINCL
 			}
 			p := Prog(asm)
 			p.To.Type = obj.TYPE_REG
@@ -4144,7 +4150,7 @@
 			case ssa.OpAMD64SUBLconst:
 				asm = x86.ADECL
 			case ssa.OpAMD64SUBWconst:
-				asm = x86.ADECW
+				asm = x86.ADECL
 			}
 			p := Prog(asm)
 			p.To.Type = obj.TYPE_REG
@@ -4157,7 +4163,7 @@
 			case ssa.OpAMD64SUBLconst:
 				asm = x86.ALEAL
 			case ssa.OpAMD64SUBWconst:
-				asm = x86.ALEAW
+				asm = x86.ALEAL
 			}
 			p := Prog(asm)
 			p.From.Type = obj.TYPE_MEM
@@ -4596,8 +4602,8 @@
 		q := Prog(x86.ASETPS)
 		q.To.Type = obj.TYPE_REG
 		q.To.Reg = x86.REG_AX
-		// TODO AORQ copied from old code generator, why not AORB?
-		opregreg(x86.AORQ, regnum(v), x86.REG_AX)
+		// ORL avoids partial register write and is smaller than ORQ, used by old compiler
+		opregreg(x86.AORL, regnum(v), x86.REG_AX)
 
 	case ssa.OpAMD64SETEQF:
 		p := Prog(v.Op.Asm())
@@ -4606,8 +4612,8 @@
 		q := Prog(x86.ASETPC)
 		q.To.Type = obj.TYPE_REG
 		q.To.Reg = x86.REG_AX
-		// TODO AANDQ copied from old code generator, why not AANDB?
-		opregreg(x86.AANDQ, regnum(v), x86.REG_AX)
+		// ANDL avoids partial register write and is smaller than ANDQ, used by old compiler
+		opregreg(x86.AANDL, regnum(v), x86.REG_AX)
 
 	case ssa.OpAMD64InvertFlags:
 		v.Fatalf("InvertFlags should never make it to codegen %v", v)
@@ -5019,7 +5025,15 @@
 
 // loadByType returns the load instruction of the given type.
 func loadByType(t ssa.Type) int {
-	// For x86, there's no difference between load and store opcodes.
+	// Avoid partial register write
+	if !t.IsFloat() && t.Size() <= 2 {
+		if t.Size() == 1 {
+			return x86.AMOVBLZX
+		} else {
+			return x86.AMOVWLZX
+		}
+	}
+	// Otherwise, there's no difference between load and store opcodes.
 	return storeByType(t)
 }
 
@@ -5059,9 +5073,10 @@
 	} else {
 		switch t.Size() {
 		case 1:
-			return x86.AMOVB
+			// Avoids partial register write
+			return x86.AMOVL
 		case 2:
-			return x86.AMOVW
+			return x86.AMOVL
 		case 4:
 			return x86.AMOVL
 		case 8: