[dev.ssa] cmd/compile: use 32-bit load to read writebarrier

Avoid targeting a partial register with load;
ensure source of load (writebarrier) is aligned.

Better yet would be "CMPB $1,writebarrier" but that requires
wrestling with flagalloc (mem operand complicates moving
instruction around).

Didn't see a change in time for
   benchcmd -n 10 Build go build net/http

Verified that we clean the code up properly:
   0x20a8 <main.main+104>:	mov    0xc30a2(%rip),%eax
                            # 0xc5150 <runtime.writeBarrier>
   0x20ae <main.main+110>:	test   %al,%al

Change-Id: Id5fb8c260eaec27bd727cb0ae1476c60343b0986
Reviewed-on: https://go-review.googlesource.com/19998
Reviewed-by: Keith Randall <khr@golang.org>
diff --git a/src/cmd/compile/internal/gc/ssa.go b/src/cmd/compile/internal/gc/ssa.go
index 9847806..8e68c20 100644
--- a/src/cmd/compile/internal/gc/ssa.go
+++ b/src/cmd/compile/internal/gc/ssa.go
@@ -2718,9 +2718,11 @@
 	bEnd := s.f.NewBlock(ssa.BlockPlain)
 
 	aux := &ssa.ExternSymbol{Types[TBOOL], syslook("writeBarrier", 0).Sym}
-	flagaddr := s.newValue1A(ssa.OpAddr, Ptrto(Types[TBOOL]), aux, s.sb)
+	flagaddr := s.newValue1A(ssa.OpAddr, Ptrto(Types[TUINT32]), aux, s.sb)
 	// TODO: select the .enabled field.  It is currently first, so not needed for now.
-	flag := s.newValue2(ssa.OpLoad, Types[TBOOL], flagaddr, s.mem())
+	// Load word, test byte, avoiding partial register write from load byte.
+	flag := s.newValue2(ssa.OpLoad, Types[TUINT32], flagaddr, s.mem())
+	flag = s.newValue1(ssa.OpTrunc64to8, Types[TBOOL], flag)
 	b := s.endBlock()
 	b.Kind = ssa.BlockIf
 	b.Likely = ssa.BranchUnlikely
@@ -2761,9 +2763,11 @@
 	bEnd := s.f.NewBlock(ssa.BlockPlain)
 
 	aux := &ssa.ExternSymbol{Types[TBOOL], syslook("writeBarrier", 0).Sym}
-	flagaddr := s.newValue1A(ssa.OpAddr, Ptrto(Types[TBOOL]), aux, s.sb)
+	flagaddr := s.newValue1A(ssa.OpAddr, Ptrto(Types[TUINT32]), aux, s.sb)
 	// TODO: select the .enabled field.  It is currently first, so not needed for now.
-	flag := s.newValue2(ssa.OpLoad, Types[TBOOL], flagaddr, s.mem())
+	// Load word, test byte, avoiding partial register write from load byte.
+	flag := s.newValue2(ssa.OpLoad, Types[TUINT32], flagaddr, s.mem())
+	flag = s.newValue1(ssa.OpTrunc64to8, Types[TBOOL], flag)
 	b := s.endBlock()
 	b.Kind = ssa.BlockIf
 	b.Likely = ssa.BranchUnlikely