[dev.ssa] cmd/compile: simplify 386+PIC+globals a bit

We shouldn't issue instructions like MOVL foo(SB), AX directly from the
SSA backend.  Instead we should do LEAL foo(SB), AX; MOVL (AX), AX.

This simplifies obj logic because now only LEAL needs to be treated
specially.  The register allocator uses the LEAL to in effect allocate
the temporary register required for the shared library thunk calls.

Also, the LEALs can now be CSEd.  So code like
    var g int
    func f() { g += 5 }
Requires only one thunk call instead of 2.

Change-Id: Ib87d465f617f73af437445871d0ea91a630b2355
Reviewed-on: https://go-review.googlesource.com/26814
Run-TryBot: Keith Randall <khr@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: David Chase <drchase@google.com>
diff --git a/src/cmd/compile/internal/ssa/gen/386.rules b/src/cmd/compile/internal/ssa/gen/386.rules
index 0609d3b..6a0990d 100644
--- a/src/cmd/compile/internal/ssa/gen/386.rules
+++ b/src/cmd/compile/internal/ssa/gen/386.rules
@@ -673,51 +673,57 @@
 // what variables are being read/written by the ops.
 // Note: we turn off this merging for operations on globals when building
 // position-independent code (when Flag_shared is set).
-// PIC needs a spare register to load the PC into. For loads from globals into integer registers we use
-// the target register, but for other loads and all stores, we need a free register. Having the LEAL be
-// a separate instruction gives us that register.
-(MOVLload  [off1] {sym1} (LEAL [off2] {sym2} base) mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
+// PIC needs a spare register to load the PC into.  Having the LEAL be
+// a separate instruction gives us that register.  Having the LEAL be
+// a separate instruction also allows it to be CSEd (which is good because
+// it compiles to a thunk call).
+(MOVLload  [off1] {sym1} (LEAL [off2] {sym2} base) mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2)
+  && (base.Op != OpSB || !config.ctxt.Flag_shared) ->
 	(MOVLload  [off1+off2] {mergeSym(sym1,sym2)} base mem)
-(MOVWload  [off1] {sym1} (LEAL [off2] {sym2} base) mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
+(MOVWload  [off1] {sym1} (LEAL [off2] {sym2} base) mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2)
+  && (base.Op != OpSB || !config.ctxt.Flag_shared) ->
 	(MOVWload  [off1+off2] {mergeSym(sym1,sym2)} base mem)
-(MOVBload  [off1] {sym1} (LEAL [off2] {sym2} base) mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
+(MOVBload  [off1] {sym1} (LEAL [off2] {sym2} base) mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2)
+  && (base.Op != OpSB || !config.ctxt.Flag_shared) ->
 	(MOVBload  [off1+off2] {mergeSym(sym1,sym2)} base mem)
 (MOVSSload [off1] {sym1} (LEAL [off2] {sym2} base) mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-  && (base.Op != OpSB || !b.Func.Config.ctxt.Flag_shared) ->
+  && (base.Op != OpSB || !config.ctxt.Flag_shared) ->
 	(MOVSSload [off1+off2] {mergeSym(sym1,sym2)} base mem)
 (MOVSDload [off1] {sym1} (LEAL [off2] {sym2} base) mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-  && (base.Op != OpSB || !b.Func.Config.ctxt.Flag_shared) ->
+  && (base.Op != OpSB || !config.ctxt.Flag_shared) ->
 	(MOVSDload [off1+off2] {mergeSym(sym1,sym2)} base mem)
 
-(MOVBLSXload [off1] {sym1} (LEAL [off2] {sym2} base) mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
+(MOVBLSXload [off1] {sym1} (LEAL [off2] {sym2} base) mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2)
+  && (base.Op != OpSB || !config.ctxt.Flag_shared) ->
 	(MOVBLSXload [off1+off2] {mergeSym(sym1,sym2)} base mem)
-(MOVWLSXload [off1] {sym1} (LEAL [off2] {sym2} base) mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
+(MOVWLSXload [off1] {sym1} (LEAL [off2] {sym2} base) mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2)
+  && (base.Op != OpSB || !config.ctxt.Flag_shared) ->
 	(MOVWLSXload [off1+off2] {mergeSym(sym1,sym2)} base mem)
 
 (MOVLstore  [off1] {sym1} (LEAL [off2] {sym2} base) val mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-  && (base.Op != OpSB || !b.Func.Config.ctxt.Flag_shared) ->
+  && (base.Op != OpSB || !config.ctxt.Flag_shared) ->
 	(MOVLstore  [off1+off2] {mergeSym(sym1,sym2)} base val mem)
 (MOVWstore  [off1] {sym1} (LEAL [off2] {sym2} base) val mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-  && (base.Op != OpSB || !b.Func.Config.ctxt.Flag_shared) ->
+  && (base.Op != OpSB || !config.ctxt.Flag_shared) ->
 	(MOVWstore  [off1+off2] {mergeSym(sym1,sym2)} base val mem)
 (MOVBstore  [off1] {sym1} (LEAL [off2] {sym2} base) val mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-  && (base.Op != OpSB || !b.Func.Config.ctxt.Flag_shared) ->
+  && (base.Op != OpSB || !config.ctxt.Flag_shared) ->
 	(MOVBstore  [off1+off2] {mergeSym(sym1,sym2)} base val mem)
 (MOVSSstore [off1] {sym1} (LEAL [off2] {sym2} base) val mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-  && (base.Op != OpSB || !b.Func.Config.ctxt.Flag_shared) ->
+  && (base.Op != OpSB || !config.ctxt.Flag_shared) ->
 	(MOVSSstore [off1+off2] {mergeSym(sym1,sym2)} base val mem)
 (MOVSDstore [off1] {sym1} (LEAL [off2] {sym2} base) val mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-  && (base.Op != OpSB || !b.Func.Config.ctxt.Flag_shared) ->
+  && (base.Op != OpSB || !config.ctxt.Flag_shared) ->
 	(MOVSDstore [off1+off2] {mergeSym(sym1,sym2)} base val mem)
 
 (MOVLstoreconst [sc] {sym1} (LEAL [off] {sym2} ptr) mem) && canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd(off)
-  && (ptr.Op != OpSB || !b.Func.Config.ctxt.Flag_shared) ->
+  && (ptr.Op != OpSB || !config.ctxt.Flag_shared) ->
 	(MOVLstoreconst [ValAndOff(sc).add(off)] {mergeSym(sym1, sym2)} ptr mem)
 (MOVWstoreconst [sc] {sym1} (LEAL [off] {sym2} ptr) mem) && canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd(off)
-  && (ptr.Op != OpSB || !b.Func.Config.ctxt.Flag_shared) ->
+  && (ptr.Op != OpSB || !config.ctxt.Flag_shared) ->
 	(MOVWstoreconst [ValAndOff(sc).add(off)] {mergeSym(sym1, sym2)} ptr mem)
 (MOVBstoreconst [sc] {sym1} (LEAL [off] {sym2} ptr) mem) && canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd(off)
-  && (ptr.Op != OpSB || !b.Func.Config.ctxt.Flag_shared) ->
+  && (ptr.Op != OpSB || !config.ctxt.Flag_shared) ->
 	(MOVBstoreconst [ValAndOff(sc).add(off)] {mergeSym(sym1, sym2)} ptr mem)
 
 // generating indexed loads and stores
diff --git a/src/cmd/compile/internal/ssa/rewrite386.go b/src/cmd/compile/internal/ssa/rewrite386.go
index b791c44..eda37e7 100644
--- a/src/cmd/compile/internal/ssa/rewrite386.go
+++ b/src/cmd/compile/internal/ssa/rewrite386.go
@@ -2502,7 +2502,7 @@
 	b := v.Block
 	_ = b
 	// match: (MOVBLSXload [off1] {sym1} (LEAL [off2] {sym2} base) mem)
-	// cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
+	// cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)   && (base.Op != OpSB || !config.ctxt.Flag_shared)
 	// result: (MOVBLSXload [off1+off2] {mergeSym(sym1,sym2)} base mem)
 	for {
 		off1 := v.AuxInt
@@ -2515,7 +2515,7 @@
 		sym2 := v_0.Aux
 		base := v_0.Args[0]
 		mem := v.Args[1]
-		if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+		if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared)) {
 			break
 		}
 		v.reset(Op386MOVBLSXload)
@@ -2649,7 +2649,7 @@
 		return true
 	}
 	// match: (MOVBload  [off1] {sym1} (LEAL [off2] {sym2} base) mem)
-	// cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
+	// cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)   && (base.Op != OpSB || !config.ctxt.Flag_shared)
 	// result: (MOVBload  [off1+off2] {mergeSym(sym1,sym2)} base mem)
 	for {
 		off1 := v.AuxInt
@@ -2662,7 +2662,7 @@
 		sym2 := v_0.Aux
 		base := v_0.Args[0]
 		mem := v.Args[1]
-		if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+		if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared)) {
 			break
 		}
 		v.reset(Op386MOVBload)
@@ -2867,7 +2867,7 @@
 		return true
 	}
 	// match: (MOVBstore  [off1] {sym1} (LEAL [off2] {sym2} base) val mem)
-	// cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)   && (base.Op != OpSB || !b.Func.Config.ctxt.Flag_shared)
+	// cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)   && (base.Op != OpSB || !config.ctxt.Flag_shared)
 	// result: (MOVBstore  [off1+off2] {mergeSym(sym1,sym2)} base val mem)
 	for {
 		off1 := v.AuxInt
@@ -2881,7 +2881,7 @@
 		base := v_0.Args[0]
 		val := v.Args[1]
 		mem := v.Args[2]
-		if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !b.Func.Config.ctxt.Flag_shared)) {
+		if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared)) {
 			break
 		}
 		v.reset(Op386MOVBstore)
@@ -3066,7 +3066,7 @@
 		return true
 	}
 	// match: (MOVBstoreconst [sc] {sym1} (LEAL [off] {sym2} ptr) mem)
-	// cond: canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd(off)   && (ptr.Op != OpSB || !b.Func.Config.ctxt.Flag_shared)
+	// cond: canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd(off)   && (ptr.Op != OpSB || !config.ctxt.Flag_shared)
 	// result: (MOVBstoreconst [ValAndOff(sc).add(off)] {mergeSym(sym1, sym2)} ptr mem)
 	for {
 		sc := v.AuxInt
@@ -3079,7 +3079,7 @@
 		sym2 := v_0.Aux
 		ptr := v_0.Args[0]
 		mem := v.Args[1]
-		if !(canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd(off) && (ptr.Op != OpSB || !b.Func.Config.ctxt.Flag_shared)) {
+		if !(canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd(off) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)) {
 			break
 		}
 		v.reset(Op386MOVBstoreconst)
@@ -3454,7 +3454,7 @@
 		return true
 	}
 	// match: (MOVLload  [off1] {sym1} (LEAL [off2] {sym2} base) mem)
-	// cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
+	// cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)   && (base.Op != OpSB || !config.ctxt.Flag_shared)
 	// result: (MOVLload  [off1+off2] {mergeSym(sym1,sym2)} base mem)
 	for {
 		off1 := v.AuxInt
@@ -3467,7 +3467,7 @@
 		sym2 := v_0.Aux
 		base := v_0.Args[0]
 		mem := v.Args[1]
-		if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+		if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared)) {
 			break
 		}
 		v.reset(Op386MOVLload)
@@ -3729,7 +3729,7 @@
 		return true
 	}
 	// match: (MOVLstore  [off1] {sym1} (LEAL [off2] {sym2} base) val mem)
-	// cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)   && (base.Op != OpSB || !b.Func.Config.ctxt.Flag_shared)
+	// cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)   && (base.Op != OpSB || !config.ctxt.Flag_shared)
 	// result: (MOVLstore  [off1+off2] {mergeSym(sym1,sym2)} base val mem)
 	for {
 		off1 := v.AuxInt
@@ -3743,7 +3743,7 @@
 		base := v_0.Args[0]
 		val := v.Args[1]
 		mem := v.Args[2]
-		if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !b.Func.Config.ctxt.Flag_shared)) {
+		if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared)) {
 			break
 		}
 		v.reset(Op386MOVLstore)
@@ -3865,7 +3865,7 @@
 		return true
 	}
 	// match: (MOVLstoreconst [sc] {sym1} (LEAL [off] {sym2} ptr) mem)
-	// cond: canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd(off)   && (ptr.Op != OpSB || !b.Func.Config.ctxt.Flag_shared)
+	// cond: canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd(off)   && (ptr.Op != OpSB || !config.ctxt.Flag_shared)
 	// result: (MOVLstoreconst [ValAndOff(sc).add(off)] {mergeSym(sym1, sym2)} ptr mem)
 	for {
 		sc := v.AuxInt
@@ -3878,7 +3878,7 @@
 		sym2 := v_0.Aux
 		ptr := v_0.Args[0]
 		mem := v.Args[1]
-		if !(canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd(off) && (ptr.Op != OpSB || !b.Func.Config.ctxt.Flag_shared)) {
+		if !(canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd(off) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)) {
 			break
 		}
 		v.reset(Op386MOVLstoreconst)
@@ -4263,7 +4263,7 @@
 		return true
 	}
 	// match: (MOVSDload [off1] {sym1} (LEAL [off2] {sym2} base) mem)
-	// cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)   && (base.Op != OpSB || !b.Func.Config.ctxt.Flag_shared)
+	// cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)   && (base.Op != OpSB || !config.ctxt.Flag_shared)
 	// result: (MOVSDload [off1+off2] {mergeSym(sym1,sym2)} base mem)
 	for {
 		off1 := v.AuxInt
@@ -4276,7 +4276,7 @@
 		sym2 := v_0.Aux
 		base := v_0.Args[0]
 		mem := v.Args[1]
-		if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !b.Func.Config.ctxt.Flag_shared)) {
+		if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared)) {
 			break
 		}
 		v.reset(Op386MOVSDload)
@@ -4491,7 +4491,7 @@
 		return true
 	}
 	// match: (MOVSDstore [off1] {sym1} (LEAL [off2] {sym2} base) val mem)
-	// cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)   && (base.Op != OpSB || !b.Func.Config.ctxt.Flag_shared)
+	// cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)   && (base.Op != OpSB || !config.ctxt.Flag_shared)
 	// result: (MOVSDstore [off1+off2] {mergeSym(sym1,sym2)} base val mem)
 	for {
 		off1 := v.AuxInt
@@ -4505,7 +4505,7 @@
 		base := v_0.Args[0]
 		val := v.Args[1]
 		mem := v.Args[2]
-		if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !b.Func.Config.ctxt.Flag_shared)) {
+		if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared)) {
 			break
 		}
 		v.reset(Op386MOVSDstore)
@@ -4752,7 +4752,7 @@
 		return true
 	}
 	// match: (MOVSSload [off1] {sym1} (LEAL [off2] {sym2} base) mem)
-	// cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)   && (base.Op != OpSB || !b.Func.Config.ctxt.Flag_shared)
+	// cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)   && (base.Op != OpSB || !config.ctxt.Flag_shared)
 	// result: (MOVSSload [off1+off2] {mergeSym(sym1,sym2)} base mem)
 	for {
 		off1 := v.AuxInt
@@ -4765,7 +4765,7 @@
 		sym2 := v_0.Aux
 		base := v_0.Args[0]
 		mem := v.Args[1]
-		if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !b.Func.Config.ctxt.Flag_shared)) {
+		if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared)) {
 			break
 		}
 		v.reset(Op386MOVSSload)
@@ -4980,7 +4980,7 @@
 		return true
 	}
 	// match: (MOVSSstore [off1] {sym1} (LEAL [off2] {sym2} base) val mem)
-	// cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)   && (base.Op != OpSB || !b.Func.Config.ctxt.Flag_shared)
+	// cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)   && (base.Op != OpSB || !config.ctxt.Flag_shared)
 	// result: (MOVSSstore [off1+off2] {mergeSym(sym1,sym2)} base val mem)
 	for {
 		off1 := v.AuxInt
@@ -4994,7 +4994,7 @@
 		base := v_0.Args[0]
 		val := v.Args[1]
 		mem := v.Args[2]
-		if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !b.Func.Config.ctxt.Flag_shared)) {
+		if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared)) {
 			break
 		}
 		v.reset(Op386MOVSSstore)
@@ -5247,7 +5247,7 @@
 	b := v.Block
 	_ = b
 	// match: (MOVWLSXload [off1] {sym1} (LEAL [off2] {sym2} base) mem)
-	// cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
+	// cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)   && (base.Op != OpSB || !config.ctxt.Flag_shared)
 	// result: (MOVWLSXload [off1+off2] {mergeSym(sym1,sym2)} base mem)
 	for {
 		off1 := v.AuxInt
@@ -5260,7 +5260,7 @@
 		sym2 := v_0.Aux
 		base := v_0.Args[0]
 		mem := v.Args[1]
-		if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+		if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared)) {
 			break
 		}
 		v.reset(Op386MOVWLSXload)
@@ -5421,7 +5421,7 @@
 		return true
 	}
 	// match: (MOVWload  [off1] {sym1} (LEAL [off2] {sym2} base) mem)
-	// cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
+	// cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)   && (base.Op != OpSB || !config.ctxt.Flag_shared)
 	// result: (MOVWload  [off1+off2] {mergeSym(sym1,sym2)} base mem)
 	for {
 		off1 := v.AuxInt
@@ -5434,7 +5434,7 @@
 		sym2 := v_0.Aux
 		base := v_0.Args[0]
 		mem := v.Args[1]
-		if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+		if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared)) {
 			break
 		}
 		v.reset(Op386MOVWload)
@@ -5738,7 +5738,7 @@
 		return true
 	}
 	// match: (MOVWstore  [off1] {sym1} (LEAL [off2] {sym2} base) val mem)
-	// cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)   && (base.Op != OpSB || !b.Func.Config.ctxt.Flag_shared)
+	// cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)   && (base.Op != OpSB || !config.ctxt.Flag_shared)
 	// result: (MOVWstore  [off1+off2] {mergeSym(sym1,sym2)} base val mem)
 	for {
 		off1 := v.AuxInt
@@ -5752,7 +5752,7 @@
 		base := v_0.Args[0]
 		val := v.Args[1]
 		mem := v.Args[2]
-		if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !b.Func.Config.ctxt.Flag_shared)) {
+		if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared)) {
 			break
 		}
 		v.reset(Op386MOVWstore)
@@ -5965,7 +5965,7 @@
 		return true
 	}
 	// match: (MOVWstoreconst [sc] {sym1} (LEAL [off] {sym2} ptr) mem)
-	// cond: canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd(off)   && (ptr.Op != OpSB || !b.Func.Config.ctxt.Flag_shared)
+	// cond: canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd(off)   && (ptr.Op != OpSB || !config.ctxt.Flag_shared)
 	// result: (MOVWstoreconst [ValAndOff(sc).add(off)] {mergeSym(sym1, sym2)} ptr mem)
 	for {
 		sc := v.AuxInt
@@ -5978,7 +5978,7 @@
 		sym2 := v_0.Aux
 		ptr := v_0.Args[0]
 		mem := v.Args[1]
-		if !(canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd(off) && (ptr.Op != OpSB || !b.Func.Config.ctxt.Flag_shared)) {
+		if !(canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd(off) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)) {
 			break
 		}
 		v.reset(Op386MOVWstoreconst)
diff --git a/src/cmd/internal/obj/x86/obj6.go b/src/cmd/internal/obj/x86/obj6.go
index 7b868ba..c4b0d89 100644
--- a/src/cmd/internal/obj/x86/obj6.go
+++ b/src/cmd/internal/obj/x86/obj6.go
@@ -334,15 +334,12 @@
 		lea = ALEAL
 		mov = AMOVL
 		reg = REG_CX
-		if p.To.Type == obj.TYPE_REG && p.To.Reg != p.From.Reg && p.To.Reg != p.From.Index {
-			switch p.As {
-			case ALEAL, AMOVL, AMOVWLZX, AMOVBLZX, AMOVWLSX, AMOVBLSX:
-				// Special case: clobber the destination register with
-				// the PC so we don't have to clobber CX.
-				// The SSA backend depends on CX not being clobbered across these instructions.
-				// See cmd/compile/internal/ssa/gen/386.rules (search for Flag_shared).
-				reg = p.To.Reg
-			}
+		if p.As == ALEAL && p.To.Reg != p.From.Reg && p.To.Reg != p.From.Index {
+			// Special case: clobber the destination register with
+			// the PC so we don't have to clobber CX.
+			// The SSA backend depends on CX not being clobbered across LEAL.
+			// See cmd/compile/internal/ssa/gen/386.rules (search for Flag_shared).
+			reg = p.To.Reg
 		}
 	}
 
@@ -554,12 +551,10 @@
 		return
 	}
 	var dst int16 = REG_CX
-	if p.To.Type == obj.TYPE_REG && p.To.Reg != p.From.Reg && p.To.Reg != p.From.Index {
-		switch p.As {
-		case ALEAL, AMOVL, AMOVWLZX, AMOVBLZX, AMOVWLSX, AMOVBLSX:
-			dst = p.To.Reg
-			// Why?  See the comment near the top of rewriteToUseGot above.
-		}
+	if (p.As == ALEAL || p.As == AMOVL) && p.To.Reg != p.From.Reg && p.To.Reg != p.From.Index {
+		dst = p.To.Reg
+		// Why?  See the comment near the top of rewriteToUseGot above.
+		// AMOVLs might be introduced by the GOT rewrites.
 	}
 	q := obj.Appendp(ctxt, p)
 	q.RegTo2 = 1