cmd/compile: convert 386 port to use addressing modes pass

Update #36468

Change-Id: Idfdb845d097994689be450d6e8a57fa9adb57166
Reviewed-on: https://go-review.googlesource.com/c/go/+/222782
Run-TryBot: Keith Randall <khr@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Josh Bleecher Snyder <josharian@gmail.com>
diff --git a/src/cmd/compile/internal/ssa/addressingmodes.go b/src/cmd/compile/internal/ssa/addressingmodes.go
index 8874b56..2af8a4d 100644
--- a/src/cmd/compile/internal/ssa/addressingmodes.go
+++ b/src/cmd/compile/internal/ssa/addressingmodes.go
@@ -11,8 +11,8 @@
 	default:
 		// Most architectures can't do this.
 		return
-	case "amd64":
-		// TODO: 386, s390x?
+	case "amd64", "386":
+		// TODO: s390x?
 	}
 
 	var tmp []*Value
@@ -21,7 +21,17 @@
 			if !combineFirst[v.Op] {
 				continue
 			}
-			p := v.Args[0]
+			// All matched operations have the pointer in arg[0].
+			// All results have the pointer in arg[0] and the index in arg[1].
+			// *Except* for operations which update a register,
+			// which are marked with resultInArg0. Those have
+			// the pointer in arg[1], and the corresponding result op
+			// has the pointer in arg[1] and the index in arg[2].
+			ptrIndex := 0
+			if opcodeTable[v.Op].resultInArg0 {
+				ptrIndex = 1
+			}
+			p := v.Args[ptrIndex]
 			c, ok := combine[[2]Op{v.Op, p.Op}]
 			if !ok {
 				continue
@@ -71,10 +81,11 @@
 				f.Fatalf("unknown aux combining for %s and %s\n", v.Op, p.Op)
 			}
 			// Combine the operations.
-			tmp = append(tmp[:0], v.Args[1:]...)
+			tmp = append(tmp[:0], v.Args[:ptrIndex]...)
+			tmp = append(tmp, p.Args...)
+			tmp = append(tmp, v.Args[ptrIndex+1:]...)
 			v.resetArgs()
 			v.Op = c
-			v.AddArgs(p.Args...)
 			v.AddArgs(tmp...)
 		}
 	}
@@ -97,6 +108,7 @@
 //   x.Args[0].Args + x.Args[1:]
 // Additionally, the Aux/AuxInt from x.Args[0] is merged into x.
 var combine = map[[2]Op]Op{
+	// amd64
 	[2]Op{OpAMD64MOVBload, OpAMD64ADDQ}:  OpAMD64MOVBloadidx1,
 	[2]Op{OpAMD64MOVWload, OpAMD64ADDQ}:  OpAMD64MOVWloadidx1,
 	[2]Op{OpAMD64MOVLload, OpAMD64ADDQ}:  OpAMD64MOVLloadidx1,
@@ -150,5 +162,64 @@
 	[2]Op{OpAMD64MOVQstoreconst, OpAMD64LEAQ1}: OpAMD64MOVQstoreconstidx1,
 	[2]Op{OpAMD64MOVQstoreconst, OpAMD64LEAQ8}: OpAMD64MOVQstoreconstidx8,
 
-	// TODO: 386
+	// 386
+	[2]Op{Op386MOVBload, Op386ADDL}:  Op386MOVBloadidx1,
+	[2]Op{Op386MOVWload, Op386ADDL}:  Op386MOVWloadidx1,
+	[2]Op{Op386MOVLload, Op386ADDL}:  Op386MOVLloadidx1,
+	[2]Op{Op386MOVSSload, Op386ADDL}: Op386MOVSSloadidx1,
+	[2]Op{Op386MOVSDload, Op386ADDL}: Op386MOVSDloadidx1,
+
+	[2]Op{Op386MOVBstore, Op386ADDL}:  Op386MOVBstoreidx1,
+	[2]Op{Op386MOVWstore, Op386ADDL}:  Op386MOVWstoreidx1,
+	[2]Op{Op386MOVLstore, Op386ADDL}:  Op386MOVLstoreidx1,
+	[2]Op{Op386MOVSSstore, Op386ADDL}: Op386MOVSSstoreidx1,
+	[2]Op{Op386MOVSDstore, Op386ADDL}: Op386MOVSDstoreidx1,
+
+	[2]Op{Op386MOVBstoreconst, Op386ADDL}: Op386MOVBstoreconstidx1,
+	[2]Op{Op386MOVWstoreconst, Op386ADDL}: Op386MOVWstoreconstidx1,
+	[2]Op{Op386MOVLstoreconst, Op386ADDL}: Op386MOVLstoreconstidx1,
+
+	[2]Op{Op386MOVBload, Op386LEAL1}:  Op386MOVBloadidx1,
+	[2]Op{Op386MOVWload, Op386LEAL1}:  Op386MOVWloadidx1,
+	[2]Op{Op386MOVWload, Op386LEAL2}:  Op386MOVWloadidx2,
+	[2]Op{Op386MOVLload, Op386LEAL1}:  Op386MOVLloadidx1,
+	[2]Op{Op386MOVLload, Op386LEAL4}:  Op386MOVLloadidx4,
+	[2]Op{Op386MOVSSload, Op386LEAL1}: Op386MOVSSloadidx1,
+	[2]Op{Op386MOVSSload, Op386LEAL4}: Op386MOVSSloadidx4,
+	[2]Op{Op386MOVSDload, Op386LEAL1}: Op386MOVSDloadidx1,
+	[2]Op{Op386MOVSDload, Op386LEAL8}: Op386MOVSDloadidx8,
+
+	[2]Op{Op386MOVBstore, Op386LEAL1}:  Op386MOVBstoreidx1,
+	[2]Op{Op386MOVWstore, Op386LEAL1}:  Op386MOVWstoreidx1,
+	[2]Op{Op386MOVWstore, Op386LEAL2}:  Op386MOVWstoreidx2,
+	[2]Op{Op386MOVLstore, Op386LEAL1}:  Op386MOVLstoreidx1,
+	[2]Op{Op386MOVLstore, Op386LEAL4}:  Op386MOVLstoreidx4,
+	[2]Op{Op386MOVSSstore, Op386LEAL1}: Op386MOVSSstoreidx1,
+	[2]Op{Op386MOVSSstore, Op386LEAL4}: Op386MOVSSstoreidx4,
+	[2]Op{Op386MOVSDstore, Op386LEAL1}: Op386MOVSDstoreidx1,
+	[2]Op{Op386MOVSDstore, Op386LEAL8}: Op386MOVSDstoreidx8,
+
+	[2]Op{Op386MOVBstoreconst, Op386LEAL1}: Op386MOVBstoreconstidx1,
+	[2]Op{Op386MOVWstoreconst, Op386LEAL1}: Op386MOVWstoreconstidx1,
+	[2]Op{Op386MOVWstoreconst, Op386LEAL2}: Op386MOVWstoreconstidx2,
+	[2]Op{Op386MOVLstoreconst, Op386LEAL1}: Op386MOVLstoreconstidx1,
+	[2]Op{Op386MOVLstoreconst, Op386LEAL4}: Op386MOVLstoreconstidx4,
+
+	[2]Op{Op386ADDLload, Op386LEAL4}: Op386ADDLloadidx4,
+	[2]Op{Op386SUBLload, Op386LEAL4}: Op386SUBLloadidx4,
+	[2]Op{Op386MULLload, Op386LEAL4}: Op386MULLloadidx4,
+	[2]Op{Op386ANDLload, Op386LEAL4}: Op386ANDLloadidx4,
+	[2]Op{Op386ORLload, Op386LEAL4}:  Op386ORLloadidx4,
+	[2]Op{Op386XORLload, Op386LEAL4}: Op386XORLloadidx4,
+
+	[2]Op{Op386ADDLmodify, Op386LEAL4}: Op386ADDLmodifyidx4,
+	[2]Op{Op386SUBLmodify, Op386LEAL4}: Op386SUBLmodifyidx4,
+	[2]Op{Op386ANDLmodify, Op386LEAL4}: Op386ANDLmodifyidx4,
+	[2]Op{Op386ORLmodify, Op386LEAL4}:  Op386ORLmodifyidx4,
+	[2]Op{Op386XORLmodify, Op386LEAL4}: Op386XORLmodifyidx4,
+
+	[2]Op{Op386ADDLconstmodify, Op386LEAL4}: Op386ADDLconstmodifyidx4,
+	[2]Op{Op386ANDLconstmodify, Op386LEAL4}: Op386ANDLconstmodifyidx4,
+	[2]Op{Op386ORLconstmodify, Op386LEAL4}:  Op386ORLconstmodifyidx4,
+	[2]Op{Op386XORLconstmodify, Op386LEAL4}: Op386XORLconstmodifyidx4,
 }
diff --git a/src/cmd/compile/internal/ssa/gen/386.rules b/src/cmd/compile/internal/ssa/gen/386.rules
index 64a6cba..4798473 100644
--- a/src/cmd/compile/internal/ssa/gen/386.rules
+++ b/src/cmd/compile/internal/ssa/gen/386.rules
@@ -588,10 +588,6 @@
 (MOVWLSX x:(MOVWload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVWLSXload <v.Type> [off] {sym} ptr mem)
 (MOVWLZX x:(MOVWload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVWload <v.Type> [off] {sym} ptr mem)
 
-(MOVBLZX x:(MOVBloadidx1 [off] {sym} ptr idx mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVBloadidx1 <v.Type> [off] {sym} ptr idx mem)
-(MOVWLZX x:(MOVWloadidx1 [off] {sym} ptr idx mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVWloadidx1 <v.Type> [off] {sym} ptr idx mem)
-(MOVWLZX x:(MOVWloadidx2 [off] {sym} ptr idx mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVWloadidx2 <v.Type> [off] {sym} ptr idx mem)
-
 // replace load from same location as preceding store with zero/sign extension (or copy in case of full width)
 (MOVBload [off] {sym} ptr (MOVBstore [off2] {sym2} ptr2 x _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) -> (MOVBLZX x)
 (MOVWload [off] {sym} ptr (MOVWstore [off2] {sym2} ptr2 x _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) -> (MOVWLZX x)
@@ -611,34 +607,22 @@
 
 // fold constants into memory operations
 // Note that this is not always a good idea because if not all the uses of
-// the ADDQconst get eliminated, we still have to compute the ADDQconst and we now
-// have potentially two live values (ptr and (ADDQconst [off] ptr)) instead of one.
+// the ADDLconst get eliminated, we still have to compute the ADDLconst and we now
+// have potentially two live values (ptr and (ADDLconst [off] ptr)) instead of one.
 // Nevertheless, let's do it!
 (MOV(L|W|B|SS|SD)load  [off1] {sym} (ADDLconst [off2] ptr) mem) && is32Bit(off1+off2) -> (MOV(L|W|B|SS|SD)load  [off1+off2] {sym} ptr mem)
 (MOV(L|W|B|SS|SD)store  [off1] {sym} (ADDLconst [off2] ptr) val mem) && is32Bit(off1+off2) -> (MOV(L|W|B|SS|SD)store  [off1+off2] {sym} ptr val mem)
 
 ((ADD|SUB|MUL|AND|OR|XOR)Lload [off1] {sym} val (ADDLconst [off2] base) mem) && is32Bit(off1+off2) ->
 	((ADD|SUB|MUL|AND|OR|XOR)Lload [off1+off2] {sym} val base mem)
-((ADD|SUB|MUL|AND|OR|XOR)Lloadidx4 [off1] {sym} val (ADDLconst [off2] base) idx mem) && is32Bit(off1+off2) ->
-	((ADD|SUB|MUL|AND|OR|XOR)Lloadidx4 [off1+off2] {sym} val base idx mem)
-((ADD|SUB|MUL|AND|OR|XOR)Lloadidx4 [off1] {sym} val base (ADDLconst [off2] idx) mem) && is32Bit(off1+off2*4) ->
-	((ADD|SUB|MUL|AND|OR|XOR)Lloadidx4 [off1+off2*4] {sym} val base idx mem)
 ((ADD|SUB|MUL|DIV)SSload [off1] {sym} val (ADDLconst [off2] base) mem) && is32Bit(off1+off2) ->
 	((ADD|SUB|MUL|DIV)SSload [off1+off2] {sym} val base mem)
 ((ADD|SUB|MUL|DIV)SDload [off1] {sym} val (ADDLconst [off2] base) mem) && is32Bit(off1+off2) ->
 	((ADD|SUB|MUL|DIV)SDload [off1+off2] {sym} val base mem)
 ((ADD|SUB|AND|OR|XOR)Lmodify [off1] {sym} (ADDLconst [off2] base) val mem) && is32Bit(off1+off2) ->
 	((ADD|SUB|AND|OR|XOR)Lmodify [off1+off2] {sym} base val mem)
-((ADD|SUB|AND|OR|XOR)Lmodifyidx4 [off1] {sym} (ADDLconst [off2] base) idx val mem) && is32Bit(off1+off2) ->
-	((ADD|SUB|AND|OR|XOR)Lmodifyidx4 [off1+off2] {sym} base idx val mem)
-((ADD|SUB|AND|OR|XOR)Lmodifyidx4 [off1] {sym} base (ADDLconst [off2] idx) val mem) && is32Bit(off1+off2*4) ->
-	((ADD|SUB|AND|OR|XOR)Lmodifyidx4 [off1+off2*4] {sym} base idx val mem)
 ((ADD|AND|OR|XOR)Lconstmodify [valoff1] {sym} (ADDLconst [off2] base) mem) && ValAndOff(valoff1).canAdd(off2) ->
 	((ADD|AND|OR|XOR)Lconstmodify [ValAndOff(valoff1).add(off2)] {sym} base mem)
-((ADD|AND|OR|XOR)Lconstmodifyidx4 [valoff1] {sym} (ADDLconst [off2] base) idx mem) && ValAndOff(valoff1).canAdd(off2) ->
-	((ADD|AND|OR|XOR)Lconstmodifyidx4 [ValAndOff(valoff1).add(off2)] {sym} base idx mem)
-((ADD|AND|OR|XOR)Lconstmodifyidx4 [valoff1] {sym} base (ADDLconst [off2] idx) mem) && ValAndOff(valoff1).canAdd(off2*4) ->
-	((ADD|AND|OR|XOR)Lconstmodifyidx4 [ValAndOff(valoff1).add(off2*4)] {sym} base idx mem)
 
 // Fold constants into stores.
 (MOVLstore [off] {sym} ptr (MOVLconst [c]) mem) && validOff(off) ->
@@ -652,7 +636,7 @@
 (MOV(L|W|B)storeconst [sc] {s} (ADDLconst [off] ptr) mem) && ValAndOff(sc).canAdd(off) ->
 	(MOV(L|W|B)storeconst [ValAndOff(sc).add(off)] {s} ptr mem)
 
-// We need to fold LEAQ into the MOVx ops so that the live variable analysis knows
+// We need to fold LEAL into the MOVx ops so that the live variable analysis knows
 // what variables are being read/written by the ops.
 // Note: we turn off this merging for operations on globals when building
 // position-independent code (when Flag_shared is set).
@@ -672,31 +656,9 @@
   && (ptr.Op != OpSB || !config.ctxt.Flag_shared) ->
 	(MOV(L|W|B)storeconst [ValAndOff(sc).add(off)] {mergeSym(sym1, sym2)} ptr mem)
 
-// generating indexed loads and stores
-(MOV(B|W|L|SS|SD)load [off1] {sym1} (LEAL1 [off2] {sym2} ptr idx) mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
-	(MOV(B|W|L|SS|SD)loadidx1 [off1+off2] {mergeSym(sym1,sym2)} ptr idx mem)
-(MOVWload [off1] {sym1} (LEAL2 [off2] {sym2} ptr idx) mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
-	(MOVWloadidx2 [off1+off2] {mergeSym(sym1,sym2)} ptr idx mem)
-(MOV(L|SS)load [off1] {sym1} (LEAL4 [off2] {sym2} ptr idx) mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
-	(MOV(L|SS)loadidx4 [off1+off2] {mergeSym(sym1,sym2)} ptr idx mem)
-(MOVSDload [off1] {sym1} (LEAL8 [off2] {sym2} ptr idx) mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
-	(MOVSDloadidx8 [off1+off2] {mergeSym(sym1,sym2)} ptr idx mem)
-
-(MOV(B|W|L|SS|SD)store [off1] {sym1} (LEAL1 [off2] {sym2} ptr idx) val mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
-	(MOV(B|W|L|SS|SD)storeidx1 [off1+off2] {mergeSym(sym1,sym2)} ptr idx val mem)
-(MOVWstore [off1] {sym1} (LEAL2 [off2] {sym2} ptr idx) val mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
-	(MOVWstoreidx2 [off1+off2] {mergeSym(sym1,sym2)} ptr idx val mem)
-(MOV(L|SS)store [off1] {sym1} (LEAL4 [off2] {sym2} ptr idx) val mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
-	(MOV(L|SS)storeidx4 [off1+off2] {mergeSym(sym1,sym2)} ptr idx val mem)
-(MOVSDstore [off1] {sym1} (LEAL8 [off2] {sym2} ptr idx) val mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
-	(MOVSDstoreidx8 [off1+off2] {mergeSym(sym1,sym2)} ptr idx val mem)
-
 ((ADD|SUB|MUL|AND|OR|XOR)Lload [off1] {sym1} val (LEAL [off2] {sym2} base) mem)
 	&& is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared) ->
 	((ADD|SUB|MUL|AND|OR|XOR)Lload [off1+off2] {mergeSym(sym1,sym2)} val base mem)
-((ADD|SUB|MUL|AND|OR|XOR)Lloadidx4 [off1] {sym1} val (LEAL [off2] {sym2} base) idx mem)
-	&& is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared) ->
-	((ADD|SUB|MUL|AND|OR|XOR)Lloadidx4 [off1+off2] {mergeSym(sym1,sym2)} val base idx mem)
 ((ADD|SUB|MUL|DIV)SSload [off1] {sym1} val (LEAL [off2] {sym2} base) mem)
 	&& is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared) ->
 	((ADD|SUB|MUL|DIV)SSload [off1+off2] {mergeSym(sym1,sym2)} val base mem)
@@ -706,97 +668,20 @@
 ((ADD|SUB|AND|OR|XOR)Lmodify [off1] {sym1} (LEAL [off2] {sym2} base) val mem)
 	&& is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared) ->
 	((ADD|SUB|AND|OR|XOR)Lmodify [off1+off2] {mergeSym(sym1,sym2)} base val mem)
-((ADD|SUB|AND|OR|XOR)Lmodifyidx4 [off1] {sym1} (LEAL [off2] {sym2} base) idx val mem)
-	&& is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared) ->
-	((ADD|SUB|AND|OR|XOR)Lmodifyidx4 [off1+off2] {mergeSym(sym1,sym2)} base idx val mem)
 ((ADD|AND|OR|XOR)Lconstmodify [valoff1] {sym1} (LEAL [off2] {sym2} base) mem)
 	&& ValAndOff(valoff1).canAdd(off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared) ->
 	((ADD|AND|OR|XOR)Lconstmodify [ValAndOff(valoff1).add(off2)] {mergeSym(sym1,sym2)} base mem)
-((ADD|AND|OR|XOR)Lconstmodifyidx4 [valoff1] {sym1} (LEAL [off2] {sym2} base) idx mem)
-	&& ValAndOff(valoff1).canAdd(off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared) ->
-	((ADD|AND|OR|XOR)Lconstmodifyidx4 [ValAndOff(valoff1).add(off2)] {mergeSym(sym1,sym2)} base idx mem)
-
-(MOV(B|W|L|SS|SD)load [off] {sym} (ADDL ptr idx) mem) && ptr.Op != OpSB -> (MOV(B|W|L|SS|SD)loadidx1 [off] {sym} ptr idx mem)
-(MOV(B|W|L|SS|SD)store [off] {sym} (ADDL ptr idx) val mem) && ptr.Op != OpSB -> (MOV(B|W|L|SS|SD)storeidx1 [off] {sym} ptr idx val mem)
-
-(MOV(B|W|L)storeconst [x] {sym1} (LEAL1 [off] {sym2} ptr idx) mem) && canMergeSym(sym1, sym2) ->
-	(MOV(B|W|L)storeconstidx1 [ValAndOff(x).add(off)] {mergeSym(sym1,sym2)} ptr idx mem)
-(MOVWstoreconst [x] {sym1} (LEAL2 [off] {sym2} ptr idx) mem) && canMergeSym(sym1, sym2) ->
-	(MOVWstoreconstidx2 [ValAndOff(x).add(off)] {mergeSym(sym1,sym2)} ptr idx mem)
-(MOVLstoreconst [x] {sym1} (LEAL4 [off] {sym2} ptr idx) mem) && canMergeSym(sym1, sym2) ->
-	(MOVLstoreconstidx4 [ValAndOff(x).add(off)] {mergeSym(sym1,sym2)} ptr idx mem)
-
-(MOV(B|W|L)storeconst [x] {sym} (ADDL ptr idx) mem) -> (MOV(B|W|L)storeconstidx1 [x] {sym} ptr idx mem)
-
-// combine SHLL into indexed loads and stores
-(MOVWloadidx1 [c] {sym} ptr (SHLLconst [1] idx) mem) -> (MOVWloadidx2 [c] {sym} ptr idx mem)
-(MOVLloadidx1 [c] {sym} ptr (SHLLconst [2] idx) mem) -> (MOVLloadidx4 [c] {sym} ptr idx mem)
-(MOVWstoreidx1 [c] {sym} ptr (SHLLconst [1] idx) val mem) -> (MOVWstoreidx2 [c] {sym} ptr idx val mem)
-(MOVLstoreidx1 [c] {sym} ptr (SHLLconst [2] idx) val mem) -> (MOVLstoreidx4 [c] {sym} ptr idx val mem)
-(MOVWstoreconstidx1 [c] {sym} ptr (SHLLconst [1] idx) mem) -> (MOVWstoreconstidx2 [c] {sym} ptr idx mem)
-(MOVLstoreconstidx1 [c] {sym} ptr (SHLLconst [2] idx) mem) -> (MOVLstoreconstidx4 [c] {sym} ptr idx mem)
-
-// combine ADDL into indexed loads and stores
-(MOV(B|W|L|SS|SD)loadidx1 [c] {sym} (ADDLconst [d] ptr) idx mem) -> (MOV(B|W|L|SS|SD)loadidx1 [int64(int32(c+d))] {sym} ptr idx mem)
-(MOVWloadidx2 [c] {sym} (ADDLconst [d] ptr) idx mem) -> (MOVWloadidx2 [int64(int32(c+d))] {sym} ptr idx mem)
-(MOV(L|SS)loadidx4 [c] {sym} (ADDLconst [d] ptr) idx mem) -> (MOV(L|SS)loadidx4 [int64(int32(c+d))] {sym} ptr idx mem)
-(MOVSDloadidx8 [c] {sym} (ADDLconst [d] ptr) idx mem) -> (MOVSDloadidx8 [int64(int32(c+d))] {sym} ptr idx mem)
-
-(MOV(B|W|L|SS|SD)storeidx1 [c] {sym} (ADDLconst [d] ptr) idx val mem) -> (MOV(B|W|L|SS|SD)storeidx1 [int64(int32(c+d))] {sym} ptr idx val mem)
-(MOVWstoreidx2 [c] {sym} (ADDLconst [d] ptr) idx val mem) -> (MOVWstoreidx2 [int64(int32(c+d))] {sym} ptr idx val mem)
-(MOV(L|SS)storeidx4 [c] {sym} (ADDLconst [d] ptr) idx val mem) -> (MOV(L|SS)storeidx4 [int64(int32(c+d))] {sym} ptr idx val mem)
-(MOVSDstoreidx8 [c] {sym} (ADDLconst [d] ptr) idx val mem) -> (MOVSDstoreidx8 [int64(int32(c+d))] {sym} ptr idx val mem)
-
-(MOV(B|W|L|SS|SD)loadidx1 [c] {sym} ptr (ADDLconst [d] idx) mem) -> (MOV(B|W|L|SS|SD)loadidx1 [int64(int32(c+d))] {sym} ptr idx mem)
-(MOVWloadidx2 [c] {sym} ptr (ADDLconst [d] idx) mem) -> (MOVWloadidx2 [int64(int32(c+2*d))] {sym} ptr idx mem)
-(MOV(L|SS)loadidx4 [c] {sym} ptr (ADDLconst [d] idx) mem) -> (MOV(L|SS)loadidx4 [int64(int32(c+4*d))] {sym} ptr idx mem)
-(MOVSDloadidx8 [c] {sym} ptr (ADDLconst [d] idx) mem) -> (MOVSDloadidx8 [int64(int32(c+8*d))] {sym} ptr idx mem)
-
-(MOV(B|W|L|SS|SD)storeidx1 [c] {sym} ptr (ADDLconst [d] idx) val mem) -> (MOV(B|W|L|SS|SD)storeidx1  [int64(int32(c+d))]   {sym} ptr idx val mem)
-(MOVWstoreidx2 [c] {sym} ptr (ADDLconst [d] idx) val mem) -> (MOVWstoreidx2  [int64(int32(c+2*d))] {sym} ptr idx val mem)
-(MOV(L|SS)storeidx4 [c] {sym} ptr (ADDLconst [d] idx) val mem) -> (MOV(L|SS)storeidx4 [int64(int32(c+4*d))] {sym} ptr idx val mem)
-(MOVSDstoreidx8 [c] {sym} ptr (ADDLconst [d] idx) val mem) -> (MOVSDstoreidx8 [int64(int32(c+8*d))] {sym} ptr idx val mem)
 
 // Merge load/store to op
 ((ADD|AND|OR|XOR|SUB|MUL)L x l:(MOVLload [off] {sym} ptr mem)) && canMergeLoadClobber(v, l, x) && clobber(l) -> ((ADD|AND|OR|XOR|SUB|MUL)Lload x [off] {sym} ptr mem)
-((ADD|AND|OR|XOR|SUB|MUL)L x l:(MOVLloadidx4 [off] {sym} ptr idx mem)) && canMergeLoadClobber(v, l, x) && clobber(l) ->
-	((ADD|AND|OR|XOR|SUB|MUL)Lloadidx4 x [off] {sym} ptr idx mem)
-((ADD|SUB|MUL|AND|OR|XOR)Lload [off1] {sym1} val (LEAL4 [off2] {sym2} ptr idx) mem)
-	&& is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
-	((ADD|SUB|MUL|AND|OR|XOR)Lloadidx4 [off1+off2] {mergeSym(sym1,sym2)} val ptr idx mem)
 ((ADD|SUB|MUL|DIV)SD x l:(MOVSDload [off] {sym} ptr mem)) && canMergeLoadClobber(v, l, x) && !config.use387 && clobber(l) -> ((ADD|SUB|MUL|DIV)SDload x [off] {sym} ptr mem)
 ((ADD|SUB|MUL|DIV)SS x l:(MOVSSload [off] {sym} ptr mem)) && canMergeLoadClobber(v, l, x) && !config.use387 && clobber(l) -> ((ADD|SUB|MUL|DIV)SSload x [off] {sym} ptr mem)
 (MOVLstore {sym} [off] ptr y:((ADD|AND|OR|XOR)Lload x [off] {sym} ptr mem) mem) && y.Uses==1 && clobber(y) -> ((ADD|AND|OR|XOR)Lmodify [off] {sym} ptr x mem)
 (MOVLstore {sym} [off] ptr y:((ADD|SUB|AND|OR|XOR)L l:(MOVLload [off] {sym} ptr mem) x) mem) && y.Uses==1 && l.Uses==1 && clobber(y, l) ->
 	((ADD|SUB|AND|OR|XOR)Lmodify [off] {sym} ptr x mem)
-(MOVLstoreidx4 {sym} [off] ptr idx y:((ADD|AND|OR|XOR)Lloadidx4 x [off] {sym} ptr idx mem) mem) && y.Uses==1 && clobber(y) ->
-	((ADD|AND|OR|XOR)Lmodifyidx4 [off] {sym} ptr idx x mem)
-(MOVLstoreidx4 {sym} [off] ptr idx y:((ADD|SUB|AND|OR|XOR)L l:(MOVLloadidx4 [off] {sym} ptr idx mem) x) mem) && y.Uses==1 && l.Uses==1 && clobber(y, l) ->
-	((ADD|SUB|AND|OR|XOR)Lmodifyidx4 [off] {sym} ptr idx x mem)
 (MOVLstore {sym} [off] ptr y:((ADD|AND|OR|XOR)Lconst [c] l:(MOVLload [off] {sym} ptr mem)) mem)
 	&& y.Uses==1 && l.Uses==1 && clobber(y, l) && validValAndOff(c,off) ->
 	((ADD|AND|OR|XOR)Lconstmodify [makeValAndOff(c,off)] {sym} ptr mem)
-(MOVLstoreidx4 {sym} [off] ptr idx y:((ADD|AND|OR|XOR)Lconst [c] l:(MOVLloadidx4 [off] {sym} ptr idx mem)) mem)
-	&& y.Uses==1 && l.Uses==1 && clobber(y, l) && validValAndOff(c,off) ->
-	((ADD|AND|OR|XOR)Lconstmodifyidx4 [makeValAndOff(c,off)] {sym} ptr idx mem)
-((ADD|AND|OR|XOR)Lmodifyidx4 [off] {sym} ptr idx (MOVLconst [c]) mem) && validValAndOff(c,off) ->
-	((ADD|AND|OR|XOR)Lconstmodifyidx4 [makeValAndOff(c,off)] {sym} ptr idx mem)
-(SUBLmodifyidx4 [off] {sym} ptr idx (MOVLconst [c]) mem) && validValAndOff(-c,off) ->
-	(ADDLconstmodifyidx4 [makeValAndOff(-c,off)] {sym} ptr idx mem)
-
-(MOV(B|W|L)storeconstidx1 [x] {sym} (ADDLconst [c] ptr) idx mem) ->
-	(MOV(B|W|L)storeconstidx1 [ValAndOff(x).add(c)] {sym} ptr idx mem)
-(MOVWstoreconstidx2 [x] {sym} (ADDLconst [c] ptr) idx mem) ->
-	(MOVWstoreconstidx2 [ValAndOff(x).add(c)] {sym} ptr idx mem)
-(MOVLstoreconstidx4 [x] {sym} (ADDLconst [c] ptr) idx mem) ->
-	(MOVLstoreconstidx4 [ValAndOff(x).add(c)] {sym} ptr idx mem)
-
-(MOV(B|W|L)storeconstidx1 [x] {sym} ptr (ADDLconst [c] idx) mem) ->
-	(MOV(B|W|L)storeconstidx1 [ValAndOff(x).add(c)] {sym} ptr idx mem)
-(MOVWstoreconstidx2 [x] {sym} ptr (ADDLconst [c] idx) mem) ->
-	(MOVWstoreconstidx2 [ValAndOff(x).add(2*c)] {sym} ptr idx mem)
-(MOVLstoreconstidx4 [x] {sym} ptr (ADDLconst [c] idx) mem) ->
-	(MOVLstoreconstidx4 [ValAndOff(x).add(4*c)] {sym} ptr idx mem)
 
 // fold LEALs together
 (LEAL [off1] {sym1} (LEAL [off2] {sym2} x)) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
@@ -826,6 +711,16 @@
 (LEAL [off1] {sym1} (LEAL8 [off2] {sym2} x y)) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
       (LEAL8 [off1+off2] {mergeSym(sym1,sym2)} x y)
 
+// LEAL[1248] into LEAL[1248]. Only some such merges are possible.
+(LEAL1 [off1] {sym1} x (LEAL1 [off2] {sym2} y y)) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
+      (LEAL2 [off1+off2] {mergeSym(sym1, sym2)} x y)
+(LEAL1 [off1] {sym1} x (LEAL1 [off2] {sym2} x y)) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
+      (LEAL2 [off1+off2] {mergeSym(sym1, sym2)} y x)
+(LEAL2 [off1] {sym1} x (LEAL1 [off2] {sym2} y y)) && is32Bit(off1+2*off2) && sym2 == nil ->
+      (LEAL4 [off1+2*off2] {sym1} x y)
+(LEAL4 [off1] {sym1} x (LEAL1 [off2] {sym2} y y)) && is32Bit(off1+4*off2) && sym2 == nil ->
+      (LEAL8 [off1+4*off2] {sym1} x y)
+
 // Absorb InvertFlags into branches.
 (LT (InvertFlags cmp) yes no) -> (GT cmp yes no)
 (GT (InvertFlags cmp) yes no) -> (LT cmp yes no)
@@ -1039,23 +934,27 @@
 // TEST %reg,%reg is shorter than CMP
 (CMP(L|W|B)const x [0]) -> (TEST(L|W|B) x x)
 
+// Convert LEAL1 back to ADDL if we can
+(LEAL1 [0] x y) && v.Aux == nil -> (ADDL x y)
+
 // Combining byte loads into larger (unaligned) loads.
 // There are many ways these combinations could occur.  This is
 // designed to match the way encoding/binary.LittleEndian does it.
-(ORL                  x0:(MOVBload [i0] {s} p mem)
-    s0:(SHLLconst [8] x1:(MOVBload [i1] {s} p mem)))
+(ORL                  x0:(MOVBload [i0] {s} p0 mem)
+    s0:(SHLLconst [8] x1:(MOVBload [i1] {s} p1 mem)))
   && i1 == i0+1
   && x0.Uses == 1
   && x1.Uses == 1
   && s0.Uses == 1
+  && same(p0, p1, 1)
   && mergePoint(b,x0,x1) != nil
   && clobber(x0, x1, s0)
-  -> @mergePoint(b,x0,x1) (MOVWload [i0] {s} p mem)
+  -> @mergePoint(b,x0,x1) (MOVWload [i0] {s} p0 mem)
 
 (ORL o0:(ORL
-                       x0:(MOVWload [i0] {s} p mem)
-    s0:(SHLLconst [16] x1:(MOVBload [i2] {s} p mem)))
-    s1:(SHLLconst [24] x2:(MOVBload [i3] {s} p mem)))
+                       x0:(MOVWload [i0] {s} p0 mem)
+    s0:(SHLLconst [16] x1:(MOVBload [i2] {s} p1 mem)))
+    s1:(SHLLconst [24] x2:(MOVBload [i3] {s} p2 mem)))
   && i2 == i0+2
   && i3 == i0+3
   && x0.Uses == 1
@@ -1064,126 +963,84 @@
   && s0.Uses == 1
   && s1.Uses == 1
   && o0.Uses == 1
+  && same(p0, p1, 1)
+  && same(p1, p2, 1)
   && mergePoint(b,x0,x1,x2) != nil
   && clobber(x0, x1, x2, s0, s1, o0)
-  -> @mergePoint(b,x0,x1,x2) (MOVLload [i0] {s} p mem)
-
-(ORL                  x0:(MOVBloadidx1 [i0] {s} p idx mem)
-    s0:(SHLLconst [8] x1:(MOVBloadidx1 [i1] {s} p idx mem)))
-  && i1==i0+1
-  && x0.Uses == 1
-  && x1.Uses == 1
-  && s0.Uses == 1
-  && mergePoint(b,x0,x1) != nil
-  && clobber(x0, x1, s0)
-  -> @mergePoint(b,x0,x1) (MOVWloadidx1 <v.Type> [i0] {s} p idx mem)
-
-(ORL o0:(ORL
-                       x0:(MOVWloadidx1 [i0] {s} p idx mem)
-    s0:(SHLLconst [16] x1:(MOVBloadidx1 [i2] {s} p idx mem)))
-    s1:(SHLLconst [24] x2:(MOVBloadidx1 [i3] {s} p idx mem)))
-  && i2 == i0+2
-  && i3 == i0+3
-  && x0.Uses == 1
-  && x1.Uses == 1
-  && x2.Uses == 1
-  && s0.Uses == 1
-  && s1.Uses == 1
-  && o0.Uses == 1
-  && mergePoint(b,x0,x1,x2) != nil
-  && clobber(x0, x1, x2, s0, s1, o0)
-  -> @mergePoint(b,x0,x1,x2) (MOVLloadidx1 <v.Type> [i0] {s} p idx mem)
+  -> @mergePoint(b,x0,x1,x2) (MOVLload [i0] {s} p0 mem)
 
 // Combine constant stores into larger (unaligned) stores.
-(MOVBstoreconst [c] {s} p x:(MOVBstoreconst [a] {s} p mem))
+(MOVBstoreconst [c] {s} p1 x:(MOVBstoreconst [a] {s} p0 mem))
   && x.Uses == 1
   && ValAndOff(a).Off() + 1 == ValAndOff(c).Off()
+  && same(p0, p1, 1)
   && clobber(x)
-  -> (MOVWstoreconst [makeValAndOff(ValAndOff(a).Val()&0xff | ValAndOff(c).Val()<<8, ValAndOff(a).Off())] {s} p mem)
-(MOVBstoreconst [a] {s} p x:(MOVBstoreconst [c] {s} p mem))
+  -> (MOVWstoreconst [makeValAndOff(ValAndOff(a).Val()&0xff | ValAndOff(c).Val()<<8, ValAndOff(a).Off())] {s} p0 mem)
+(MOVBstoreconst [a] {s} p1 x:(MOVBstoreconst [c] {s} p0 mem))
   && x.Uses == 1
   && ValAndOff(a).Off() + 1 == ValAndOff(c).Off()
+  && same(p0, p1, 1)
   && clobber(x)
-  -> (MOVWstoreconst [makeValAndOff(ValAndOff(a).Val()&0xff | ValAndOff(c).Val()<<8, ValAndOff(a).Off())] {s} p mem)
-(MOVWstoreconst [c] {s} p x:(MOVWstoreconst [a] {s} p mem))
+  -> (MOVWstoreconst [makeValAndOff(ValAndOff(a).Val()&0xff | ValAndOff(c).Val()<<8, ValAndOff(a).Off())] {s} p0 mem)
+(MOVWstoreconst [c] {s} p1 x:(MOVWstoreconst [a] {s} p0 mem))
   && x.Uses == 1
   && ValAndOff(a).Off() + 2 == ValAndOff(c).Off()
+  && same(p0, p1, 1)
   && clobber(x)
-  -> (MOVLstoreconst [makeValAndOff(ValAndOff(a).Val()&0xffff | ValAndOff(c).Val()<<16, ValAndOff(a).Off())] {s} p mem)
-(MOVWstoreconst [a] {s} p x:(MOVWstoreconst [c] {s} p mem))
+  -> (MOVLstoreconst [makeValAndOff(ValAndOff(a).Val()&0xffff | ValAndOff(c).Val()<<16, ValAndOff(a).Off())] {s} p0 mem)
+(MOVWstoreconst [a] {s} p1 x:(MOVWstoreconst [c] {s} p0 mem))
   && x.Uses == 1
   && ValAndOff(a).Off() + 2 == ValAndOff(c).Off()
+  && same(p0, p1, 1)
   && clobber(x)
-  -> (MOVLstoreconst [makeValAndOff(ValAndOff(a).Val()&0xffff | ValAndOff(c).Val()<<16, ValAndOff(a).Off())] {s} p mem)
-
-(MOVBstoreconstidx1 [c] {s} p i x:(MOVBstoreconstidx1 [a] {s} p i mem))
-  && x.Uses == 1
-  && ValAndOff(a).Off() + 1 == ValAndOff(c).Off()
-  && clobber(x)
-  -> (MOVWstoreconstidx1 [makeValAndOff(ValAndOff(a).Val()&0xff | ValAndOff(c).Val()<<8, ValAndOff(a).Off())] {s} p i mem)
-(MOVWstoreconstidx1 [c] {s} p i x:(MOVWstoreconstidx1 [a] {s} p i mem))
-  && x.Uses == 1
-  && ValAndOff(a).Off() + 2 == ValAndOff(c).Off()
-  && clobber(x)
-  -> (MOVLstoreconstidx1 [makeValAndOff(ValAndOff(a).Val()&0xffff | ValAndOff(c).Val()<<16, ValAndOff(a).Off())] {s} p i mem)
-
-(MOVWstoreconstidx2 [c] {s} p i x:(MOVWstoreconstidx2 [a] {s} p i mem))
-  && x.Uses == 1
-  && ValAndOff(a).Off() + 2 == ValAndOff(c).Off()
-  && clobber(x)
-  -> (MOVLstoreconstidx1 [makeValAndOff(ValAndOff(a).Val()&0xffff | ValAndOff(c).Val()<<16, ValAndOff(a).Off())] {s} p (SHLLconst <i.Type> [1] i) mem)
+  -> (MOVLstoreconst [makeValAndOff(ValAndOff(a).Val()&0xffff | ValAndOff(c).Val()<<16, ValAndOff(a).Off())] {s} p0 mem)
 
 // Combine stores into larger (unaligned) stores.
-(MOVBstore [i] {s} p (SHR(W|L)const [8] w) x:(MOVBstore [i-1] {s} p w mem))
+(MOVBstore [i] {s} p1 (SHR(W|L)const [8] w) x:(MOVBstore [i-1] {s} p0 w mem))
   && x.Uses == 1
+  && same(p0, p1, 1)
   && clobber(x)
-  -> (MOVWstore [i-1] {s} p w mem)
-(MOVBstore [i] {s} p w x:(MOVBstore {s} [i+1] p (SHR(W|L)const [8] w) mem))
+  -> (MOVWstore [i-1] {s} p0 w mem)
+(MOVBstore [i] {s} p1 w x:(MOVBstore {s} [i+1] p0 (SHR(W|L)const [8] w) mem))
   && x.Uses == 1
+  && same(p0, p1, 1)
   && clobber(x)
-  -> (MOVWstore [i] {s} p w mem)
-(MOVBstore [i] {s} p (SHRLconst [j] w) x:(MOVBstore [i-1] {s} p w0:(SHRLconst [j-8] w) mem))
+  -> (MOVWstore [i] {s} p0 w mem)
+(MOVBstore [i] {s} p1 (SHRLconst [j] w) x:(MOVBstore [i-1] {s} p0 w0:(SHRLconst [j-8] w) mem))
   && x.Uses == 1
+  && same(p0, p1, 1)
   && clobber(x)
-  -> (MOVWstore [i-1] {s} p w0 mem)
-(MOVWstore [i] {s} p (SHRLconst [16] w) x:(MOVWstore [i-2] {s} p w mem))
+  -> (MOVWstore [i-1] {s} p0 w0 mem)
+(MOVWstore [i] {s} p1 (SHRLconst [16] w) x:(MOVWstore [i-2] {s} p0 w mem))
   && x.Uses == 1
+  && same(p0, p1, 1)
   && clobber(x)
-  -> (MOVLstore [i-2] {s} p w mem)
-(MOVWstore [i] {s} p (SHRLconst [j] w) x:(MOVWstore [i-2] {s} p w0:(SHRLconst [j-16] w) mem))
+  -> (MOVLstore [i-2] {s} p0 w mem)
+(MOVWstore [i] {s} p1 (SHRLconst [j] w) x:(MOVWstore [i-2] {s} p0 w0:(SHRLconst [j-16] w) mem))
   && x.Uses == 1
+  && same(p0, p1, 1)
   && clobber(x)
-  -> (MOVLstore [i-2] {s} p w0 mem)
+  -> (MOVLstore [i-2] {s} p0 w0 mem)
 
-(MOVBstoreidx1 [i] {s} p idx (SHR(L|W)const [8] w) x:(MOVBstoreidx1 [i-1] {s} p idx w mem))
-  && x.Uses == 1
-  && clobber(x)
-  -> (MOVWstoreidx1 [i-1] {s} p idx w mem)
-(MOVBstoreidx1 [i] {s} p idx w x:(MOVBstoreidx1 [i+1] {s} p idx (SHR(L|W)const [8] w) mem))
-  && x.Uses == 1
-  && clobber(x)
-  -> (MOVWstoreidx1 [i] {s} p idx w mem)
-(MOVBstoreidx1 [i] {s} p idx (SHRLconst [j] w) x:(MOVBstoreidx1 [i-1] {s} p idx w0:(SHRLconst [j-8] w) mem))
-  && x.Uses == 1
-  && clobber(x)
-  -> (MOVWstoreidx1 [i-1] {s} p idx w0 mem)
-(MOVWstoreidx1 [i] {s} p idx (SHRLconst [16] w) x:(MOVWstoreidx1 [i-2] {s} p idx w mem))
-  && x.Uses == 1
-  && clobber(x)
-  -> (MOVLstoreidx1 [i-2] {s} p idx w mem)
-(MOVWstoreidx1 [i] {s} p idx (SHRLconst [j] w) x:(MOVWstoreidx1 [i-2] {s} p idx w0:(SHRLconst [j-16] w) mem))
-  && x.Uses == 1
-  && clobber(x)
-  -> (MOVLstoreidx1 [i-2] {s} p idx w0 mem)
+// Move constant offsets from LEALx up into load. This lets the above combining
+// rules discover indexed load-combining instances.
+(MOV(B|W|L)load [i0] {s0} l:(LEAL1 [i1] {s1} x y) mem) && i1 != 0 && is32Bit(i0+i1)
+-> (MOV(B|W|L)load [i0+i1] {s0} (LEAL1 <l.Type> [0] {s1} x y) mem)
+(MOV(B|W|L)load [i0] {s0} l:(LEAL2 [i1] {s1} x y) mem) && i1 != 0 && is32Bit(i0+i1)
+-> (MOV(B|W|L)load [i0+i1] {s0} (LEAL2 <l.Type> [0] {s1} x y) mem)
+(MOV(B|W|L)load [i0] {s0} l:(LEAL4 [i1] {s1} x y) mem) && i1 != 0 && is32Bit(i0+i1)
+-> (MOV(B|W|L)load [i0+i1] {s0} (LEAL4 <l.Type> [0] {s1} x y) mem)
+(MOV(B|W|L)load [i0] {s0} l:(LEAL8 [i1] {s1} x y) mem) && i1 != 0 && is32Bit(i0+i1)
+-> (MOV(B|W|L)load [i0+i1] {s0} (LEAL8 <l.Type> [0] {s1} x y) mem)
 
-(MOVWstoreidx2 [i] {s} p idx (SHRLconst [16] w) x:(MOVWstoreidx2 [i-2] {s} p idx w mem))
-  && x.Uses == 1
-  && clobber(x)
-  -> (MOVLstoreidx1 [i-2] {s} p (SHLLconst <idx.Type> [1] idx) w mem)
-(MOVWstoreidx2 [i] {s} p idx (SHRLconst [j] w) x:(MOVWstoreidx2 [i-2] {s} p idx w0:(SHRLconst [j-16] w) mem))
-  && x.Uses == 1
-  && clobber(x)
-  -> (MOVLstoreidx1 [i-2] {s} p (SHLLconst <idx.Type> [1] idx) w0 mem)
+(MOV(B|W|L)store [i0] {s0} l:(LEAL1 [i1] {s1} x y) val mem) && i1 != 0 && is32Bit(i0+i1)
+-> (MOV(B|W|L)store [i0+i1] {s0} (LEAL1 <l.Type> [0] {s1} x y) val mem)
+(MOV(B|W|L)store [i0] {s0} l:(LEAL2 [i1] {s1} x y) val mem) && i1 != 0 && is32Bit(i0+i1)
+-> (MOV(B|W|L)store [i0+i1] {s0} (LEAL2 <l.Type> [0] {s1} x y) val mem)
+(MOV(B|W|L)store [i0] {s0} l:(LEAL4 [i1] {s1} x y) val mem) && i1 != 0 && is32Bit(i0+i1)
+-> (MOV(B|W|L)store [i0+i1] {s0} (LEAL4 <l.Type> [0] {s1} x y) val mem)
+(MOV(B|W|L)store [i0] {s0} l:(LEAL8 [i1] {s1} x y) val mem) && i1 != 0 && is32Bit(i0+i1)
+-> (MOV(B|W|L)store [i0+i1] {s0} (LEAL8 <l.Type> [0] {s1} x y) val mem)
 
 // For PIC, break floating-point constant loading into two instructions so we have
 // a register to use for holding the address of the constant pool entry.
diff --git a/src/cmd/compile/internal/ssa/rewrite386.go b/src/cmd/compile/internal/ssa/rewrite386.go
index 8b2da94..e9a4b66 100644
--- a/src/cmd/compile/internal/ssa/rewrite386.go
+++ b/src/cmd/compile/internal/ssa/rewrite386.go
@@ -18,16 +18,10 @@
 		return rewriteValue386_Op386ADDLconst(v)
 	case Op386ADDLconstmodify:
 		return rewriteValue386_Op386ADDLconstmodify(v)
-	case Op386ADDLconstmodifyidx4:
-		return rewriteValue386_Op386ADDLconstmodifyidx4(v)
 	case Op386ADDLload:
 		return rewriteValue386_Op386ADDLload(v)
-	case Op386ADDLloadidx4:
-		return rewriteValue386_Op386ADDLloadidx4(v)
 	case Op386ADDLmodify:
 		return rewriteValue386_Op386ADDLmodify(v)
-	case Op386ADDLmodifyidx4:
-		return rewriteValue386_Op386ADDLmodifyidx4(v)
 	case Op386ADDSD:
 		return rewriteValue386_Op386ADDSD(v)
 	case Op386ADDSDload:
@@ -42,16 +36,10 @@
 		return rewriteValue386_Op386ANDLconst(v)
 	case Op386ANDLconstmodify:
 		return rewriteValue386_Op386ANDLconstmodify(v)
-	case Op386ANDLconstmodifyidx4:
-		return rewriteValue386_Op386ANDLconstmodifyidx4(v)
 	case Op386ANDLload:
 		return rewriteValue386_Op386ANDLload(v)
-	case Op386ANDLloadidx4:
-		return rewriteValue386_Op386ANDLloadidx4(v)
 	case Op386ANDLmodify:
 		return rewriteValue386_Op386ANDLmodify(v)
-	case Op386ANDLmodifyidx4:
-		return rewriteValue386_Op386ANDLmodifyidx4(v)
 	case Op386CMPB:
 		return rewriteValue386_Op386CMPB(v)
 	case Op386CMPBconst:
@@ -96,62 +84,28 @@
 		return rewriteValue386_Op386MOVBLZX(v)
 	case Op386MOVBload:
 		return rewriteValue386_Op386MOVBload(v)
-	case Op386MOVBloadidx1:
-		return rewriteValue386_Op386MOVBloadidx1(v)
 	case Op386MOVBstore:
 		return rewriteValue386_Op386MOVBstore(v)
 	case Op386MOVBstoreconst:
 		return rewriteValue386_Op386MOVBstoreconst(v)
-	case Op386MOVBstoreconstidx1:
-		return rewriteValue386_Op386MOVBstoreconstidx1(v)
-	case Op386MOVBstoreidx1:
-		return rewriteValue386_Op386MOVBstoreidx1(v)
 	case Op386MOVLload:
 		return rewriteValue386_Op386MOVLload(v)
-	case Op386MOVLloadidx1:
-		return rewriteValue386_Op386MOVLloadidx1(v)
-	case Op386MOVLloadidx4:
-		return rewriteValue386_Op386MOVLloadidx4(v)
 	case Op386MOVLstore:
 		return rewriteValue386_Op386MOVLstore(v)
 	case Op386MOVLstoreconst:
 		return rewriteValue386_Op386MOVLstoreconst(v)
-	case Op386MOVLstoreconstidx1:
-		return rewriteValue386_Op386MOVLstoreconstidx1(v)
-	case Op386MOVLstoreconstidx4:
-		return rewriteValue386_Op386MOVLstoreconstidx4(v)
-	case Op386MOVLstoreidx1:
-		return rewriteValue386_Op386MOVLstoreidx1(v)
-	case Op386MOVLstoreidx4:
-		return rewriteValue386_Op386MOVLstoreidx4(v)
 	case Op386MOVSDconst:
 		return rewriteValue386_Op386MOVSDconst(v)
 	case Op386MOVSDload:
 		return rewriteValue386_Op386MOVSDload(v)
-	case Op386MOVSDloadidx1:
-		return rewriteValue386_Op386MOVSDloadidx1(v)
-	case Op386MOVSDloadidx8:
-		return rewriteValue386_Op386MOVSDloadidx8(v)
 	case Op386MOVSDstore:
 		return rewriteValue386_Op386MOVSDstore(v)
-	case Op386MOVSDstoreidx1:
-		return rewriteValue386_Op386MOVSDstoreidx1(v)
-	case Op386MOVSDstoreidx8:
-		return rewriteValue386_Op386MOVSDstoreidx8(v)
 	case Op386MOVSSconst:
 		return rewriteValue386_Op386MOVSSconst(v)
 	case Op386MOVSSload:
 		return rewriteValue386_Op386MOVSSload(v)
-	case Op386MOVSSloadidx1:
-		return rewriteValue386_Op386MOVSSloadidx1(v)
-	case Op386MOVSSloadidx4:
-		return rewriteValue386_Op386MOVSSloadidx4(v)
 	case Op386MOVSSstore:
 		return rewriteValue386_Op386MOVSSstore(v)
-	case Op386MOVSSstoreidx1:
-		return rewriteValue386_Op386MOVSSstoreidx1(v)
-	case Op386MOVSSstoreidx4:
-		return rewriteValue386_Op386MOVSSstoreidx4(v)
 	case Op386MOVWLSX:
 		return rewriteValue386_Op386MOVWLSX(v)
 	case Op386MOVWLSXload:
@@ -160,30 +114,16 @@
 		return rewriteValue386_Op386MOVWLZX(v)
 	case Op386MOVWload:
 		return rewriteValue386_Op386MOVWload(v)
-	case Op386MOVWloadidx1:
-		return rewriteValue386_Op386MOVWloadidx1(v)
-	case Op386MOVWloadidx2:
-		return rewriteValue386_Op386MOVWloadidx2(v)
 	case Op386MOVWstore:
 		return rewriteValue386_Op386MOVWstore(v)
 	case Op386MOVWstoreconst:
 		return rewriteValue386_Op386MOVWstoreconst(v)
-	case Op386MOVWstoreconstidx1:
-		return rewriteValue386_Op386MOVWstoreconstidx1(v)
-	case Op386MOVWstoreconstidx2:
-		return rewriteValue386_Op386MOVWstoreconstidx2(v)
-	case Op386MOVWstoreidx1:
-		return rewriteValue386_Op386MOVWstoreidx1(v)
-	case Op386MOVWstoreidx2:
-		return rewriteValue386_Op386MOVWstoreidx2(v)
 	case Op386MULL:
 		return rewriteValue386_Op386MULL(v)
 	case Op386MULLconst:
 		return rewriteValue386_Op386MULLconst(v)
 	case Op386MULLload:
 		return rewriteValue386_Op386MULLload(v)
-	case Op386MULLloadidx4:
-		return rewriteValue386_Op386MULLloadidx4(v)
 	case Op386MULSD:
 		return rewriteValue386_Op386MULSD(v)
 	case Op386MULSDload:
@@ -202,16 +142,10 @@
 		return rewriteValue386_Op386ORLconst(v)
 	case Op386ORLconstmodify:
 		return rewriteValue386_Op386ORLconstmodify(v)
-	case Op386ORLconstmodifyidx4:
-		return rewriteValue386_Op386ORLconstmodifyidx4(v)
 	case Op386ORLload:
 		return rewriteValue386_Op386ORLload(v)
-	case Op386ORLloadidx4:
-		return rewriteValue386_Op386ORLloadidx4(v)
 	case Op386ORLmodify:
 		return rewriteValue386_Op386ORLmodify(v)
-	case Op386ORLmodifyidx4:
-		return rewriteValue386_Op386ORLmodifyidx4(v)
 	case Op386ROLBconst:
 		return rewriteValue386_Op386ROLBconst(v)
 	case Op386ROLLconst:
@@ -278,12 +212,8 @@
 		return rewriteValue386_Op386SUBLconst(v)
 	case Op386SUBLload:
 		return rewriteValue386_Op386SUBLload(v)
-	case Op386SUBLloadidx4:
-		return rewriteValue386_Op386SUBLloadidx4(v)
 	case Op386SUBLmodify:
 		return rewriteValue386_Op386SUBLmodify(v)
-	case Op386SUBLmodifyidx4:
-		return rewriteValue386_Op386SUBLmodifyidx4(v)
 	case Op386SUBSD:
 		return rewriteValue386_Op386SUBSD(v)
 	case Op386SUBSDload:
@@ -298,16 +228,10 @@
 		return rewriteValue386_Op386XORLconst(v)
 	case Op386XORLconstmodify:
 		return rewriteValue386_Op386XORLconstmodify(v)
-	case Op386XORLconstmodifyidx4:
-		return rewriteValue386_Op386XORLconstmodifyidx4(v)
 	case Op386XORLload:
 		return rewriteValue386_Op386XORLload(v)
-	case Op386XORLloadidx4:
-		return rewriteValue386_Op386XORLloadidx4(v)
 	case Op386XORLmodify:
 		return rewriteValue386_Op386XORLmodify(v)
-	case Op386XORLmodifyidx4:
-		return rewriteValue386_Op386XORLmodifyidx4(v)
 	case OpAdd16:
 		v.Op = Op386ADDL
 		return true
@@ -1042,32 +966,6 @@
 		}
 		break
 	}
-	// match: (ADDL x l:(MOVLloadidx4 [off] {sym} ptr idx mem))
-	// cond: canMergeLoadClobber(v, l, x) && clobber(l)
-	// result: (ADDLloadidx4 x [off] {sym} ptr idx mem)
-	for {
-		for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-			x := v_0
-			l := v_1
-			if l.Op != Op386MOVLloadidx4 {
-				continue
-			}
-			off := l.AuxInt
-			sym := l.Aux
-			mem := l.Args[2]
-			ptr := l.Args[0]
-			idx := l.Args[1]
-			if !(canMergeLoadClobber(v, l, x) && clobber(l)) {
-				continue
-			}
-			v.reset(Op386ADDLloadidx4)
-			v.AuxInt = off
-			v.Aux = sym
-			v.AddArg4(x, ptr, idx, mem)
-			return true
-		}
-		break
-	}
 	// match: (ADDL x (NEGL y))
 	// result: (SUBL x y)
 	for {
@@ -1316,81 +1214,6 @@
 	}
 	return false
 }
-func rewriteValue386_Op386ADDLconstmodifyidx4(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	config := b.Func.Config
-	// match: (ADDLconstmodifyidx4 [valoff1] {sym} (ADDLconst [off2] base) idx mem)
-	// cond: ValAndOff(valoff1).canAdd(off2)
-	// result: (ADDLconstmodifyidx4 [ValAndOff(valoff1).add(off2)] {sym} base idx mem)
-	for {
-		valoff1 := v.AuxInt
-		sym := v.Aux
-		if v_0.Op != Op386ADDLconst {
-			break
-		}
-		off2 := v_0.AuxInt
-		base := v_0.Args[0]
-		idx := v_1
-		mem := v_2
-		if !(ValAndOff(valoff1).canAdd(off2)) {
-			break
-		}
-		v.reset(Op386ADDLconstmodifyidx4)
-		v.AuxInt = ValAndOff(valoff1).add(off2)
-		v.Aux = sym
-		v.AddArg3(base, idx, mem)
-		return true
-	}
-	// match: (ADDLconstmodifyidx4 [valoff1] {sym} base (ADDLconst [off2] idx) mem)
-	// cond: ValAndOff(valoff1).canAdd(off2*4)
-	// result: (ADDLconstmodifyidx4 [ValAndOff(valoff1).add(off2*4)] {sym} base idx mem)
-	for {
-		valoff1 := v.AuxInt
-		sym := v.Aux
-		base := v_0
-		if v_1.Op != Op386ADDLconst {
-			break
-		}
-		off2 := v_1.AuxInt
-		idx := v_1.Args[0]
-		mem := v_2
-		if !(ValAndOff(valoff1).canAdd(off2 * 4)) {
-			break
-		}
-		v.reset(Op386ADDLconstmodifyidx4)
-		v.AuxInt = ValAndOff(valoff1).add(off2 * 4)
-		v.Aux = sym
-		v.AddArg3(base, idx, mem)
-		return true
-	}
-	// match: (ADDLconstmodifyidx4 [valoff1] {sym1} (LEAL [off2] {sym2} base) idx mem)
-	// cond: ValAndOff(valoff1).canAdd(off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared)
-	// result: (ADDLconstmodifyidx4 [ValAndOff(valoff1).add(off2)] {mergeSym(sym1,sym2)} base idx mem)
-	for {
-		valoff1 := v.AuxInt
-		sym1 := v.Aux
-		if v_0.Op != Op386LEAL {
-			break
-		}
-		off2 := v_0.AuxInt
-		sym2 := v_0.Aux
-		base := v_0.Args[0]
-		idx := v_1
-		mem := v_2
-		if !(ValAndOff(valoff1).canAdd(off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared)) {
-			break
-		}
-		v.reset(Op386ADDLconstmodifyidx4)
-		v.AuxInt = ValAndOff(valoff1).add(off2)
-		v.Aux = mergeSym(sym1, sym2)
-		v.AddArg3(base, idx, mem)
-		return true
-	}
-	return false
-}
 func rewriteValue386_Op386ADDLload(v *Value) bool {
 	v_2 := v.Args[2]
 	v_1 := v.Args[1]
@@ -1442,109 +1265,6 @@
 		v.AddArg3(val, base, mem)
 		return true
 	}
-	// match: (ADDLload [off1] {sym1} val (LEAL4 [off2] {sym2} ptr idx) mem)
-	// cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-	// result: (ADDLloadidx4 [off1+off2] {mergeSym(sym1,sym2)} val ptr idx mem)
-	for {
-		off1 := v.AuxInt
-		sym1 := v.Aux
-		val := v_0
-		if v_1.Op != Op386LEAL4 {
-			break
-		}
-		off2 := v_1.AuxInt
-		sym2 := v_1.Aux
-		idx := v_1.Args[1]
-		ptr := v_1.Args[0]
-		mem := v_2
-		if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
-			break
-		}
-		v.reset(Op386ADDLloadidx4)
-		v.AuxInt = off1 + off2
-		v.Aux = mergeSym(sym1, sym2)
-		v.AddArg4(val, ptr, idx, mem)
-		return true
-	}
-	return false
-}
-func rewriteValue386_Op386ADDLloadidx4(v *Value) bool {
-	v_3 := v.Args[3]
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	config := b.Func.Config
-	// match: (ADDLloadidx4 [off1] {sym} val (ADDLconst [off2] base) idx mem)
-	// cond: is32Bit(off1+off2)
-	// result: (ADDLloadidx4 [off1+off2] {sym} val base idx mem)
-	for {
-		off1 := v.AuxInt
-		sym := v.Aux
-		val := v_0
-		if v_1.Op != Op386ADDLconst {
-			break
-		}
-		off2 := v_1.AuxInt
-		base := v_1.Args[0]
-		idx := v_2
-		mem := v_3
-		if !(is32Bit(off1 + off2)) {
-			break
-		}
-		v.reset(Op386ADDLloadidx4)
-		v.AuxInt = off1 + off2
-		v.Aux = sym
-		v.AddArg4(val, base, idx, mem)
-		return true
-	}
-	// match: (ADDLloadidx4 [off1] {sym} val base (ADDLconst [off2] idx) mem)
-	// cond: is32Bit(off1+off2*4)
-	// result: (ADDLloadidx4 [off1+off2*4] {sym} val base idx mem)
-	for {
-		off1 := v.AuxInt
-		sym := v.Aux
-		val := v_0
-		base := v_1
-		if v_2.Op != Op386ADDLconst {
-			break
-		}
-		off2 := v_2.AuxInt
-		idx := v_2.Args[0]
-		mem := v_3
-		if !(is32Bit(off1 + off2*4)) {
-			break
-		}
-		v.reset(Op386ADDLloadidx4)
-		v.AuxInt = off1 + off2*4
-		v.Aux = sym
-		v.AddArg4(val, base, idx, mem)
-		return true
-	}
-	// match: (ADDLloadidx4 [off1] {sym1} val (LEAL [off2] {sym2} base) idx mem)
-	// cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared)
-	// result: (ADDLloadidx4 [off1+off2] {mergeSym(sym1,sym2)} val base idx mem)
-	for {
-		off1 := v.AuxInt
-		sym1 := v.Aux
-		val := v_0
-		if v_1.Op != Op386LEAL {
-			break
-		}
-		off2 := v_1.AuxInt
-		sym2 := v_1.Aux
-		base := v_1.Args[0]
-		idx := v_2
-		mem := v_3
-		if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared)) {
-			break
-		}
-		v.reset(Op386ADDLloadidx4)
-		v.AuxInt = off1 + off2
-		v.Aux = mergeSym(sym1, sym2)
-		v.AddArg4(val, base, idx, mem)
-		return true
-	}
 	return false
 }
 func rewriteValue386_Op386ADDLmodify(v *Value) bool {
@@ -1600,107 +1320,6 @@
 	}
 	return false
 }
-func rewriteValue386_Op386ADDLmodifyidx4(v *Value) bool {
-	v_3 := v.Args[3]
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	config := b.Func.Config
-	// match: (ADDLmodifyidx4 [off1] {sym} (ADDLconst [off2] base) idx val mem)
-	// cond: is32Bit(off1+off2)
-	// result: (ADDLmodifyidx4 [off1+off2] {sym} base idx val mem)
-	for {
-		off1 := v.AuxInt
-		sym := v.Aux
-		if v_0.Op != Op386ADDLconst {
-			break
-		}
-		off2 := v_0.AuxInt
-		base := v_0.Args[0]
-		idx := v_1
-		val := v_2
-		mem := v_3
-		if !(is32Bit(off1 + off2)) {
-			break
-		}
-		v.reset(Op386ADDLmodifyidx4)
-		v.AuxInt = off1 + off2
-		v.Aux = sym
-		v.AddArg4(base, idx, val, mem)
-		return true
-	}
-	// match: (ADDLmodifyidx4 [off1] {sym} base (ADDLconst [off2] idx) val mem)
-	// cond: is32Bit(off1+off2*4)
-	// result: (ADDLmodifyidx4 [off1+off2*4] {sym} base idx val mem)
-	for {
-		off1 := v.AuxInt
-		sym := v.Aux
-		base := v_0
-		if v_1.Op != Op386ADDLconst {
-			break
-		}
-		off2 := v_1.AuxInt
-		idx := v_1.Args[0]
-		val := v_2
-		mem := v_3
-		if !(is32Bit(off1 + off2*4)) {
-			break
-		}
-		v.reset(Op386ADDLmodifyidx4)
-		v.AuxInt = off1 + off2*4
-		v.Aux = sym
-		v.AddArg4(base, idx, val, mem)
-		return true
-	}
-	// match: (ADDLmodifyidx4 [off1] {sym1} (LEAL [off2] {sym2} base) idx val mem)
-	// cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared)
-	// result: (ADDLmodifyidx4 [off1+off2] {mergeSym(sym1,sym2)} base idx val mem)
-	for {
-		off1 := v.AuxInt
-		sym1 := v.Aux
-		if v_0.Op != Op386LEAL {
-			break
-		}
-		off2 := v_0.AuxInt
-		sym2 := v_0.Aux
-		base := v_0.Args[0]
-		idx := v_1
-		val := v_2
-		mem := v_3
-		if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared)) {
-			break
-		}
-		v.reset(Op386ADDLmodifyidx4)
-		v.AuxInt = off1 + off2
-		v.Aux = mergeSym(sym1, sym2)
-		v.AddArg4(base, idx, val, mem)
-		return true
-	}
-	// match: (ADDLmodifyidx4 [off] {sym} ptr idx (MOVLconst [c]) mem)
-	// cond: validValAndOff(c,off)
-	// result: (ADDLconstmodifyidx4 [makeValAndOff(c,off)] {sym} ptr idx mem)
-	for {
-		off := v.AuxInt
-		sym := v.Aux
-		ptr := v_0
-		idx := v_1
-		if v_2.Op != Op386MOVLconst {
-			break
-		}
-		c := v_2.AuxInt
-		mem := v_3
-		if !(validValAndOff(c, off)) {
-			break
-		}
-		v.reset(Op386ADDLconstmodifyidx4)
-		v.AuxInt = makeValAndOff(c, off)
-		v.Aux = sym
-		v.AddArg3(ptr, idx, mem)
-		return true
-	}
-	return false
-}
 func rewriteValue386_Op386ADDSD(v *Value) bool {
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
@@ -1915,32 +1534,6 @@
 		}
 		break
 	}
-	// match: (ANDL x l:(MOVLloadidx4 [off] {sym} ptr idx mem))
-	// cond: canMergeLoadClobber(v, l, x) && clobber(l)
-	// result: (ANDLloadidx4 x [off] {sym} ptr idx mem)
-	for {
-		for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-			x := v_0
-			l := v_1
-			if l.Op != Op386MOVLloadidx4 {
-				continue
-			}
-			off := l.AuxInt
-			sym := l.Aux
-			mem := l.Args[2]
-			ptr := l.Args[0]
-			idx := l.Args[1]
-			if !(canMergeLoadClobber(v, l, x) && clobber(l)) {
-				continue
-			}
-			v.reset(Op386ANDLloadidx4)
-			v.AuxInt = off
-			v.Aux = sym
-			v.AddArg4(x, ptr, idx, mem)
-			return true
-		}
-		break
-	}
 	// match: (ANDL x x)
 	// result: x
 	for {
@@ -2057,81 +1650,6 @@
 	}
 	return false
 }
-func rewriteValue386_Op386ANDLconstmodifyidx4(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	config := b.Func.Config
-	// match: (ANDLconstmodifyidx4 [valoff1] {sym} (ADDLconst [off2] base) idx mem)
-	// cond: ValAndOff(valoff1).canAdd(off2)
-	// result: (ANDLconstmodifyidx4 [ValAndOff(valoff1).add(off2)] {sym} base idx mem)
-	for {
-		valoff1 := v.AuxInt
-		sym := v.Aux
-		if v_0.Op != Op386ADDLconst {
-			break
-		}
-		off2 := v_0.AuxInt
-		base := v_0.Args[0]
-		idx := v_1
-		mem := v_2
-		if !(ValAndOff(valoff1).canAdd(off2)) {
-			break
-		}
-		v.reset(Op386ANDLconstmodifyidx4)
-		v.AuxInt = ValAndOff(valoff1).add(off2)
-		v.Aux = sym
-		v.AddArg3(base, idx, mem)
-		return true
-	}
-	// match: (ANDLconstmodifyidx4 [valoff1] {sym} base (ADDLconst [off2] idx) mem)
-	// cond: ValAndOff(valoff1).canAdd(off2*4)
-	// result: (ANDLconstmodifyidx4 [ValAndOff(valoff1).add(off2*4)] {sym} base idx mem)
-	for {
-		valoff1 := v.AuxInt
-		sym := v.Aux
-		base := v_0
-		if v_1.Op != Op386ADDLconst {
-			break
-		}
-		off2 := v_1.AuxInt
-		idx := v_1.Args[0]
-		mem := v_2
-		if !(ValAndOff(valoff1).canAdd(off2 * 4)) {
-			break
-		}
-		v.reset(Op386ANDLconstmodifyidx4)
-		v.AuxInt = ValAndOff(valoff1).add(off2 * 4)
-		v.Aux = sym
-		v.AddArg3(base, idx, mem)
-		return true
-	}
-	// match: (ANDLconstmodifyidx4 [valoff1] {sym1} (LEAL [off2] {sym2} base) idx mem)
-	// cond: ValAndOff(valoff1).canAdd(off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared)
-	// result: (ANDLconstmodifyidx4 [ValAndOff(valoff1).add(off2)] {mergeSym(sym1,sym2)} base idx mem)
-	for {
-		valoff1 := v.AuxInt
-		sym1 := v.Aux
-		if v_0.Op != Op386LEAL {
-			break
-		}
-		off2 := v_0.AuxInt
-		sym2 := v_0.Aux
-		base := v_0.Args[0]
-		idx := v_1
-		mem := v_2
-		if !(ValAndOff(valoff1).canAdd(off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared)) {
-			break
-		}
-		v.reset(Op386ANDLconstmodifyidx4)
-		v.AuxInt = ValAndOff(valoff1).add(off2)
-		v.Aux = mergeSym(sym1, sym2)
-		v.AddArg3(base, idx, mem)
-		return true
-	}
-	return false
-}
 func rewriteValue386_Op386ANDLload(v *Value) bool {
 	v_2 := v.Args[2]
 	v_1 := v.Args[1]
@@ -2183,109 +1701,6 @@
 		v.AddArg3(val, base, mem)
 		return true
 	}
-	// match: (ANDLload [off1] {sym1} val (LEAL4 [off2] {sym2} ptr idx) mem)
-	// cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-	// result: (ANDLloadidx4 [off1+off2] {mergeSym(sym1,sym2)} val ptr idx mem)
-	for {
-		off1 := v.AuxInt
-		sym1 := v.Aux
-		val := v_0
-		if v_1.Op != Op386LEAL4 {
-			break
-		}
-		off2 := v_1.AuxInt
-		sym2 := v_1.Aux
-		idx := v_1.Args[1]
-		ptr := v_1.Args[0]
-		mem := v_2
-		if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
-			break
-		}
-		v.reset(Op386ANDLloadidx4)
-		v.AuxInt = off1 + off2
-		v.Aux = mergeSym(sym1, sym2)
-		v.AddArg4(val, ptr, idx, mem)
-		return true
-	}
-	return false
-}
-func rewriteValue386_Op386ANDLloadidx4(v *Value) bool {
-	v_3 := v.Args[3]
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	config := b.Func.Config
-	// match: (ANDLloadidx4 [off1] {sym} val (ADDLconst [off2] base) idx mem)
-	// cond: is32Bit(off1+off2)
-	// result: (ANDLloadidx4 [off1+off2] {sym} val base idx mem)
-	for {
-		off1 := v.AuxInt
-		sym := v.Aux
-		val := v_0
-		if v_1.Op != Op386ADDLconst {
-			break
-		}
-		off2 := v_1.AuxInt
-		base := v_1.Args[0]
-		idx := v_2
-		mem := v_3
-		if !(is32Bit(off1 + off2)) {
-			break
-		}
-		v.reset(Op386ANDLloadidx4)
-		v.AuxInt = off1 + off2
-		v.Aux = sym
-		v.AddArg4(val, base, idx, mem)
-		return true
-	}
-	// match: (ANDLloadidx4 [off1] {sym} val base (ADDLconst [off2] idx) mem)
-	// cond: is32Bit(off1+off2*4)
-	// result: (ANDLloadidx4 [off1+off2*4] {sym} val base idx mem)
-	for {
-		off1 := v.AuxInt
-		sym := v.Aux
-		val := v_0
-		base := v_1
-		if v_2.Op != Op386ADDLconst {
-			break
-		}
-		off2 := v_2.AuxInt
-		idx := v_2.Args[0]
-		mem := v_3
-		if !(is32Bit(off1 + off2*4)) {
-			break
-		}
-		v.reset(Op386ANDLloadidx4)
-		v.AuxInt = off1 + off2*4
-		v.Aux = sym
-		v.AddArg4(val, base, idx, mem)
-		return true
-	}
-	// match: (ANDLloadidx4 [off1] {sym1} val (LEAL [off2] {sym2} base) idx mem)
-	// cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared)
-	// result: (ANDLloadidx4 [off1+off2] {mergeSym(sym1,sym2)} val base idx mem)
-	for {
-		off1 := v.AuxInt
-		sym1 := v.Aux
-		val := v_0
-		if v_1.Op != Op386LEAL {
-			break
-		}
-		off2 := v_1.AuxInt
-		sym2 := v_1.Aux
-		base := v_1.Args[0]
-		idx := v_2
-		mem := v_3
-		if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared)) {
-			break
-		}
-		v.reset(Op386ANDLloadidx4)
-		v.AuxInt = off1 + off2
-		v.Aux = mergeSym(sym1, sym2)
-		v.AddArg4(val, base, idx, mem)
-		return true
-	}
 	return false
 }
 func rewriteValue386_Op386ANDLmodify(v *Value) bool {
@@ -2341,107 +1756,6 @@
 	}
 	return false
 }
-func rewriteValue386_Op386ANDLmodifyidx4(v *Value) bool {
-	v_3 := v.Args[3]
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	config := b.Func.Config
-	// match: (ANDLmodifyidx4 [off1] {sym} (ADDLconst [off2] base) idx val mem)
-	// cond: is32Bit(off1+off2)
-	// result: (ANDLmodifyidx4 [off1+off2] {sym} base idx val mem)
-	for {
-		off1 := v.AuxInt
-		sym := v.Aux
-		if v_0.Op != Op386ADDLconst {
-			break
-		}
-		off2 := v_0.AuxInt
-		base := v_0.Args[0]
-		idx := v_1
-		val := v_2
-		mem := v_3
-		if !(is32Bit(off1 + off2)) {
-			break
-		}
-		v.reset(Op386ANDLmodifyidx4)
-		v.AuxInt = off1 + off2
-		v.Aux = sym
-		v.AddArg4(base, idx, val, mem)
-		return true
-	}
-	// match: (ANDLmodifyidx4 [off1] {sym} base (ADDLconst [off2] idx) val mem)
-	// cond: is32Bit(off1+off2*4)
-	// result: (ANDLmodifyidx4 [off1+off2*4] {sym} base idx val mem)
-	for {
-		off1 := v.AuxInt
-		sym := v.Aux
-		base := v_0
-		if v_1.Op != Op386ADDLconst {
-			break
-		}
-		off2 := v_1.AuxInt
-		idx := v_1.Args[0]
-		val := v_2
-		mem := v_3
-		if !(is32Bit(off1 + off2*4)) {
-			break
-		}
-		v.reset(Op386ANDLmodifyidx4)
-		v.AuxInt = off1 + off2*4
-		v.Aux = sym
-		v.AddArg4(base, idx, val, mem)
-		return true
-	}
-	// match: (ANDLmodifyidx4 [off1] {sym1} (LEAL [off2] {sym2} base) idx val mem)
-	// cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared)
-	// result: (ANDLmodifyidx4 [off1+off2] {mergeSym(sym1,sym2)} base idx val mem)
-	for {
-		off1 := v.AuxInt
-		sym1 := v.Aux
-		if v_0.Op != Op386LEAL {
-			break
-		}
-		off2 := v_0.AuxInt
-		sym2 := v_0.Aux
-		base := v_0.Args[0]
-		idx := v_1
-		val := v_2
-		mem := v_3
-		if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared)) {
-			break
-		}
-		v.reset(Op386ANDLmodifyidx4)
-		v.AuxInt = off1 + off2
-		v.Aux = mergeSym(sym1, sym2)
-		v.AddArg4(base, idx, val, mem)
-		return true
-	}
-	// match: (ANDLmodifyidx4 [off] {sym} ptr idx (MOVLconst [c]) mem)
-	// cond: validValAndOff(c,off)
-	// result: (ANDLconstmodifyidx4 [makeValAndOff(c,off)] {sym} ptr idx mem)
-	for {
-		off := v.AuxInt
-		sym := v.Aux
-		ptr := v_0
-		idx := v_1
-		if v_2.Op != Op386MOVLconst {
-			break
-		}
-		c := v_2.AuxInt
-		mem := v_3
-		if !(validValAndOff(c, off)) {
-			break
-		}
-		v.reset(Op386ANDLconstmodifyidx4)
-		v.AuxInt = makeValAndOff(c, off)
-		v.Aux = sym
-		v.AddArg3(ptr, idx, mem)
-		return true
-	}
-	return false
-}
 func rewriteValue386_Op386CMPB(v *Value) bool {
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
@@ -3768,6 +3082,80 @@
 		}
 		break
 	}
+	// match: (LEAL1 [off1] {sym1} x (LEAL1 [off2] {sym2} y y))
+	// cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
+	// result: (LEAL2 [off1+off2] {mergeSym(sym1, sym2)} x y)
+	for {
+		off1 := v.AuxInt
+		sym1 := v.Aux
+		for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
+			x := v_0
+			if v_1.Op != Op386LEAL1 {
+				continue
+			}
+			off2 := v_1.AuxInt
+			sym2 := v_1.Aux
+			y := v_1.Args[1]
+			if y != v_1.Args[0] || !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+				continue
+			}
+			v.reset(Op386LEAL2)
+			v.AuxInt = off1 + off2
+			v.Aux = mergeSym(sym1, sym2)
+			v.AddArg2(x, y)
+			return true
+		}
+		break
+	}
+	// match: (LEAL1 [off1] {sym1} x (LEAL1 [off2] {sym2} x y))
+	// cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
+	// result: (LEAL2 [off1+off2] {mergeSym(sym1, sym2)} y x)
+	for {
+		off1 := v.AuxInt
+		sym1 := v.Aux
+		for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
+			x := v_0
+			if v_1.Op != Op386LEAL1 {
+				continue
+			}
+			off2 := v_1.AuxInt
+			sym2 := v_1.Aux
+			_ = v_1.Args[1]
+			v_1_0 := v_1.Args[0]
+			v_1_1 := v_1.Args[1]
+			for _i1 := 0; _i1 <= 1; _i1, v_1_0, v_1_1 = _i1+1, v_1_1, v_1_0 {
+				if x != v_1_0 {
+					continue
+				}
+				y := v_1_1
+				if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+					continue
+				}
+				v.reset(Op386LEAL2)
+				v.AuxInt = off1 + off2
+				v.Aux = mergeSym(sym1, sym2)
+				v.AddArg2(y, x)
+				return true
+			}
+		}
+		break
+	}
+	// match: (LEAL1 [0] x y)
+	// cond: v.Aux == nil
+	// result: (ADDL x y)
+	for {
+		if v.AuxInt != 0 {
+			break
+		}
+		x := v_0
+		y := v_1
+		if !(v.Aux == nil) {
+			break
+		}
+		v.reset(Op386ADDL)
+		v.AddArg2(x, y)
+		return true
+	}
 	return false
 }
 func rewriteValue386_Op386LEAL2(v *Value) bool {
@@ -3869,6 +3257,28 @@
 		v.AddArg2(x, y)
 		return true
 	}
+	// match: (LEAL2 [off1] {sym1} x (LEAL1 [off2] {sym2} y y))
+	// cond: is32Bit(off1+2*off2) && sym2 == nil
+	// result: (LEAL4 [off1+2*off2] {sym1} x y)
+	for {
+		off1 := v.AuxInt
+		sym1 := v.Aux
+		x := v_0
+		if v_1.Op != Op386LEAL1 {
+			break
+		}
+		off2 := v_1.AuxInt
+		sym2 := v_1.Aux
+		y := v_1.Args[1]
+		if y != v_1.Args[0] || !(is32Bit(off1+2*off2) && sym2 == nil) {
+			break
+		}
+		v.reset(Op386LEAL4)
+		v.AuxInt = off1 + 2*off2
+		v.Aux = sym1
+		v.AddArg2(x, y)
+		return true
+	}
 	return false
 }
 func rewriteValue386_Op386LEAL4(v *Value) bool {
@@ -3954,6 +3364,28 @@
 		v.AddArg2(x, y)
 		return true
 	}
+	// match: (LEAL4 [off1] {sym1} x (LEAL1 [off2] {sym2} y y))
+	// cond: is32Bit(off1+4*off2) && sym2 == nil
+	// result: (LEAL8 [off1+4*off2] {sym1} x y)
+	for {
+		off1 := v.AuxInt
+		sym1 := v.Aux
+		x := v_0
+		if v_1.Op != Op386LEAL1 {
+			break
+		}
+		off2 := v_1.AuxInt
+		sym2 := v_1.Aux
+		y := v_1.Args[1]
+		if y != v_1.Args[0] || !(is32Bit(off1+4*off2) && sym2 == nil) {
+			break
+		}
+		v.reset(Op386LEAL8)
+		v.AuxInt = off1 + 4*off2
+		v.Aux = sym1
+		v.AddArg2(x, y)
+		return true
+	}
 	return false
 }
 func rewriteValue386_Op386LEAL8(v *Value) bool {
@@ -4146,30 +3578,6 @@
 		v0.AddArg2(ptr, mem)
 		return true
 	}
-	// match: (MOVBLZX x:(MOVBloadidx1 [off] {sym} ptr idx mem))
-	// cond: x.Uses == 1 && clobber(x)
-	// result: @x.Block (MOVBloadidx1 <v.Type> [off] {sym} ptr idx mem)
-	for {
-		x := v_0
-		if x.Op != Op386MOVBloadidx1 {
-			break
-		}
-		off := x.AuxInt
-		sym := x.Aux
-		mem := x.Args[2]
-		ptr := x.Args[0]
-		idx := x.Args[1]
-		if !(x.Uses == 1 && clobber(x)) {
-			break
-		}
-		b = x.Block
-		v0 := b.NewValue0(v.Pos, Op386MOVBloadidx1, v.Type)
-		v.copyOf(v0)
-		v0.AuxInt = off
-		v0.Aux = sym
-		v0.AddArg3(ptr, idx, mem)
-		return true
-	}
 	// match: (MOVBLZX (ANDLconst [c] x))
 	// result: (ANDLconst [c & 0xff] x)
 	for {
@@ -4254,55 +3662,117 @@
 		v.AddArg2(base, mem)
 		return true
 	}
-	// match: (MOVBload [off1] {sym1} (LEAL1 [off2] {sym2} ptr idx) mem)
-	// cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-	// result: (MOVBloadidx1 [off1+off2] {mergeSym(sym1,sym2)} ptr idx mem)
+	// match: (MOVBload [i0] {s0} l:(LEAL1 [i1] {s1} x y) mem)
+	// cond: i1 != 0 && is32Bit(i0+i1)
+	// result: (MOVBload [i0+i1] {s0} (LEAL1 <l.Type> [0] {s1} x y) mem)
 	for {
-		off1 := v.AuxInt
-		sym1 := v.Aux
-		if v_0.Op != Op386LEAL1 {
+		i0 := v.AuxInt
+		s0 := v.Aux
+		l := v_0
+		if l.Op != Op386LEAL1 {
 			break
 		}
-		off2 := v_0.AuxInt
-		sym2 := v_0.Aux
-		idx := v_0.Args[1]
-		ptr := v_0.Args[0]
+		i1 := l.AuxInt
+		s1 := l.Aux
+		y := l.Args[1]
+		x := l.Args[0]
 		mem := v_1
-		if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+		if !(i1 != 0 && is32Bit(i0+i1)) {
 			break
 		}
-		v.reset(Op386MOVBloadidx1)
-		v.AuxInt = off1 + off2
-		v.Aux = mergeSym(sym1, sym2)
-		v.AddArg3(ptr, idx, mem)
+		v.reset(Op386MOVBload)
+		v.AuxInt = i0 + i1
+		v.Aux = s0
+		v0 := b.NewValue0(v.Pos, Op386LEAL1, l.Type)
+		v0.AuxInt = 0
+		v0.Aux = s1
+		v0.AddArg2(x, y)
+		v.AddArg2(v0, mem)
 		return true
 	}
-	// match: (MOVBload [off] {sym} (ADDL ptr idx) mem)
-	// cond: ptr.Op != OpSB
-	// result: (MOVBloadidx1 [off] {sym} ptr idx mem)
+	// match: (MOVBload [i0] {s0} l:(LEAL2 [i1] {s1} x y) mem)
+	// cond: i1 != 0 && is32Bit(i0+i1)
+	// result: (MOVBload [i0+i1] {s0} (LEAL2 <l.Type> [0] {s1} x y) mem)
 	for {
-		off := v.AuxInt
-		sym := v.Aux
-		if v_0.Op != Op386ADDL {
+		i0 := v.AuxInt
+		s0 := v.Aux
+		l := v_0
+		if l.Op != Op386LEAL2 {
 			break
 		}
-		_ = v_0.Args[1]
-		v_0_0 := v_0.Args[0]
-		v_0_1 := v_0.Args[1]
-		for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 {
-			ptr := v_0_0
-			idx := v_0_1
-			mem := v_1
-			if !(ptr.Op != OpSB) {
-				continue
-			}
-			v.reset(Op386MOVBloadidx1)
-			v.AuxInt = off
-			v.Aux = sym
-			v.AddArg3(ptr, idx, mem)
-			return true
+		i1 := l.AuxInt
+		s1 := l.Aux
+		y := l.Args[1]
+		x := l.Args[0]
+		mem := v_1
+		if !(i1 != 0 && is32Bit(i0+i1)) {
+			break
 		}
-		break
+		v.reset(Op386MOVBload)
+		v.AuxInt = i0 + i1
+		v.Aux = s0
+		v0 := b.NewValue0(v.Pos, Op386LEAL2, l.Type)
+		v0.AuxInt = 0
+		v0.Aux = s1
+		v0.AddArg2(x, y)
+		v.AddArg2(v0, mem)
+		return true
+	}
+	// match: (MOVBload [i0] {s0} l:(LEAL4 [i1] {s1} x y) mem)
+	// cond: i1 != 0 && is32Bit(i0+i1)
+	// result: (MOVBload [i0+i1] {s0} (LEAL4 <l.Type> [0] {s1} x y) mem)
+	for {
+		i0 := v.AuxInt
+		s0 := v.Aux
+		l := v_0
+		if l.Op != Op386LEAL4 {
+			break
+		}
+		i1 := l.AuxInt
+		s1 := l.Aux
+		y := l.Args[1]
+		x := l.Args[0]
+		mem := v_1
+		if !(i1 != 0 && is32Bit(i0+i1)) {
+			break
+		}
+		v.reset(Op386MOVBload)
+		v.AuxInt = i0 + i1
+		v.Aux = s0
+		v0 := b.NewValue0(v.Pos, Op386LEAL4, l.Type)
+		v0.AuxInt = 0
+		v0.Aux = s1
+		v0.AddArg2(x, y)
+		v.AddArg2(v0, mem)
+		return true
+	}
+	// match: (MOVBload [i0] {s0} l:(LEAL8 [i1] {s1} x y) mem)
+	// cond: i1 != 0 && is32Bit(i0+i1)
+	// result: (MOVBload [i0+i1] {s0} (LEAL8 <l.Type> [0] {s1} x y) mem)
+	for {
+		i0 := v.AuxInt
+		s0 := v.Aux
+		l := v_0
+		if l.Op != Op386LEAL8 {
+			break
+		}
+		i1 := l.AuxInt
+		s1 := l.Aux
+		y := l.Args[1]
+		x := l.Args[0]
+		mem := v_1
+		if !(i1 != 0 && is32Bit(i0+i1)) {
+			break
+		}
+		v.reset(Op386MOVBload)
+		v.AuxInt = i0 + i1
+		v.Aux = s0
+		v0 := b.NewValue0(v.Pos, Op386LEAL8, l.Type)
+		v0.AuxInt = 0
+		v0.Aux = s1
+		v0.AddArg2(x, y)
+		v.AddArg2(v0, mem)
+		return true
 	}
 	// match: (MOVBload [off] {sym} (SB) _)
 	// cond: symIsRO(sym)
@@ -4319,54 +3789,6 @@
 	}
 	return false
 }
-func rewriteValue386_Op386MOVBloadidx1(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	// match: (MOVBloadidx1 [c] {sym} (ADDLconst [d] ptr) idx mem)
-	// result: (MOVBloadidx1 [int64(int32(c+d))] {sym} ptr idx mem)
-	for {
-		c := v.AuxInt
-		sym := v.Aux
-		for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-			if v_0.Op != Op386ADDLconst {
-				continue
-			}
-			d := v_0.AuxInt
-			ptr := v_0.Args[0]
-			idx := v_1
-			mem := v_2
-			v.reset(Op386MOVBloadidx1)
-			v.AuxInt = int64(int32(c + d))
-			v.Aux = sym
-			v.AddArg3(ptr, idx, mem)
-			return true
-		}
-		break
-	}
-	// match: (MOVBloadidx1 [c] {sym} ptr (ADDLconst [d] idx) mem)
-	// result: (MOVBloadidx1 [int64(int32(c+d))] {sym} ptr idx mem)
-	for {
-		c := v.AuxInt
-		sym := v.Aux
-		for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-			ptr := v_0
-			if v_1.Op != Op386ADDLconst {
-				continue
-			}
-			d := v_1.AuxInt
-			idx := v_1.Args[0]
-			mem := v_2
-			v.reset(Op386MOVBloadidx1)
-			v.AuxInt = int64(int32(c + d))
-			v.Aux = sym
-			v.AddArg3(ptr, idx, mem)
-			return true
-		}
-		break
-	}
-	return false
-}
 func rewriteValue386_Op386MOVBstore(v *Value) bool {
 	v_2 := v.Args[2]
 	v_1 := v.Args[1]
@@ -4473,65 +3895,13 @@
 		v.AddArg3(base, val, mem)
 		return true
 	}
-	// match: (MOVBstore [off1] {sym1} (LEAL1 [off2] {sym2} ptr idx) val mem)
-	// cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-	// result: (MOVBstoreidx1 [off1+off2] {mergeSym(sym1,sym2)} ptr idx val mem)
-	for {
-		off1 := v.AuxInt
-		sym1 := v.Aux
-		if v_0.Op != Op386LEAL1 {
-			break
-		}
-		off2 := v_0.AuxInt
-		sym2 := v_0.Aux
-		idx := v_0.Args[1]
-		ptr := v_0.Args[0]
-		val := v_1
-		mem := v_2
-		if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
-			break
-		}
-		v.reset(Op386MOVBstoreidx1)
-		v.AuxInt = off1 + off2
-		v.Aux = mergeSym(sym1, sym2)
-		v.AddArg4(ptr, idx, val, mem)
-		return true
-	}
-	// match: (MOVBstore [off] {sym} (ADDL ptr idx) val mem)
-	// cond: ptr.Op != OpSB
-	// result: (MOVBstoreidx1 [off] {sym} ptr idx val mem)
-	for {
-		off := v.AuxInt
-		sym := v.Aux
-		if v_0.Op != Op386ADDL {
-			break
-		}
-		_ = v_0.Args[1]
-		v_0_0 := v_0.Args[0]
-		v_0_1 := v_0.Args[1]
-		for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 {
-			ptr := v_0_0
-			idx := v_0_1
-			val := v_1
-			mem := v_2
-			if !(ptr.Op != OpSB) {
-				continue
-			}
-			v.reset(Op386MOVBstoreidx1)
-			v.AuxInt = off
-			v.Aux = sym
-			v.AddArg4(ptr, idx, val, mem)
-			return true
-		}
-		break
-	}
-	// match: (MOVBstore [i] {s} p (SHRWconst [8] w) x:(MOVBstore [i-1] {s} p w mem))
-	// cond: x.Uses == 1 && clobber(x)
-	// result: (MOVWstore [i-1] {s} p w mem)
+	// match: (MOVBstore [i] {s} p1 (SHRWconst [8] w) x:(MOVBstore [i-1] {s} p0 w mem))
+	// cond: x.Uses == 1 && same(p0, p1, 1) && clobber(x)
+	// result: (MOVWstore [i-1] {s} p0 w mem)
 	for {
 		i := v.AuxInt
 		s := v.Aux
-		p := v_0
+		p1 := v_0
 		if v_1.Op != Op386SHRWconst || v_1.AuxInt != 8 {
 			break
 		}
@@ -4541,22 +3911,23 @@
 			break
 		}
 		mem := x.Args[2]
-		if p != x.Args[0] || w != x.Args[1] || !(x.Uses == 1 && clobber(x)) {
+		p0 := x.Args[0]
+		if w != x.Args[1] || !(x.Uses == 1 && same(p0, p1, 1) && clobber(x)) {
 			break
 		}
 		v.reset(Op386MOVWstore)
 		v.AuxInt = i - 1
 		v.Aux = s
-		v.AddArg3(p, w, mem)
+		v.AddArg3(p0, w, mem)
 		return true
 	}
-	// match: (MOVBstore [i] {s} p (SHRLconst [8] w) x:(MOVBstore [i-1] {s} p w mem))
-	// cond: x.Uses == 1 && clobber(x)
-	// result: (MOVWstore [i-1] {s} p w mem)
+	// match: (MOVBstore [i] {s} p1 (SHRLconst [8] w) x:(MOVBstore [i-1] {s} p0 w mem))
+	// cond: x.Uses == 1 && same(p0, p1, 1) && clobber(x)
+	// result: (MOVWstore [i-1] {s} p0 w mem)
 	for {
 		i := v.AuxInt
 		s := v.Aux
-		p := v_0
+		p1 := v_0
 		if v_1.Op != Op386SHRLconst || v_1.AuxInt != 8 {
 			break
 		}
@@ -4566,74 +3937,71 @@
 			break
 		}
 		mem := x.Args[2]
-		if p != x.Args[0] || w != x.Args[1] || !(x.Uses == 1 && clobber(x)) {
+		p0 := x.Args[0]
+		if w != x.Args[1] || !(x.Uses == 1 && same(p0, p1, 1) && clobber(x)) {
 			break
 		}
 		v.reset(Op386MOVWstore)
 		v.AuxInt = i - 1
 		v.Aux = s
-		v.AddArg3(p, w, mem)
+		v.AddArg3(p0, w, mem)
 		return true
 	}
-	// match: (MOVBstore [i] {s} p w x:(MOVBstore {s} [i+1] p (SHRWconst [8] w) mem))
-	// cond: x.Uses == 1 && clobber(x)
-	// result: (MOVWstore [i] {s} p w mem)
+	// match: (MOVBstore [i] {s} p1 w x:(MOVBstore {s} [i+1] p0 (SHRWconst [8] w) mem))
+	// cond: x.Uses == 1 && same(p0, p1, 1) && clobber(x)
+	// result: (MOVWstore [i] {s} p0 w mem)
 	for {
 		i := v.AuxInt
 		s := v.Aux
-		p := v_0
+		p1 := v_0
 		w := v_1
 		x := v_2
 		if x.Op != Op386MOVBstore || x.AuxInt != i+1 || x.Aux != s {
 			break
 		}
 		mem := x.Args[2]
-		if p != x.Args[0] {
-			break
-		}
+		p0 := x.Args[0]
 		x_1 := x.Args[1]
-		if x_1.Op != Op386SHRWconst || x_1.AuxInt != 8 || w != x_1.Args[0] || !(x.Uses == 1 && clobber(x)) {
+		if x_1.Op != Op386SHRWconst || x_1.AuxInt != 8 || w != x_1.Args[0] || !(x.Uses == 1 && same(p0, p1, 1) && clobber(x)) {
 			break
 		}
 		v.reset(Op386MOVWstore)
 		v.AuxInt = i
 		v.Aux = s
-		v.AddArg3(p, w, mem)
+		v.AddArg3(p0, w, mem)
 		return true
 	}
-	// match: (MOVBstore [i] {s} p w x:(MOVBstore {s} [i+1] p (SHRLconst [8] w) mem))
-	// cond: x.Uses == 1 && clobber(x)
-	// result: (MOVWstore [i] {s} p w mem)
+	// match: (MOVBstore [i] {s} p1 w x:(MOVBstore {s} [i+1] p0 (SHRLconst [8] w) mem))
+	// cond: x.Uses == 1 && same(p0, p1, 1) && clobber(x)
+	// result: (MOVWstore [i] {s} p0 w mem)
 	for {
 		i := v.AuxInt
 		s := v.Aux
-		p := v_0
+		p1 := v_0
 		w := v_1
 		x := v_2
 		if x.Op != Op386MOVBstore || x.AuxInt != i+1 || x.Aux != s {
 			break
 		}
 		mem := x.Args[2]
-		if p != x.Args[0] {
-			break
-		}
+		p0 := x.Args[0]
 		x_1 := x.Args[1]
-		if x_1.Op != Op386SHRLconst || x_1.AuxInt != 8 || w != x_1.Args[0] || !(x.Uses == 1 && clobber(x)) {
+		if x_1.Op != Op386SHRLconst || x_1.AuxInt != 8 || w != x_1.Args[0] || !(x.Uses == 1 && same(p0, p1, 1) && clobber(x)) {
 			break
 		}
 		v.reset(Op386MOVWstore)
 		v.AuxInt = i
 		v.Aux = s
-		v.AddArg3(p, w, mem)
+		v.AddArg3(p0, w, mem)
 		return true
 	}
-	// match: (MOVBstore [i] {s} p (SHRLconst [j] w) x:(MOVBstore [i-1] {s} p w0:(SHRLconst [j-8] w) mem))
-	// cond: x.Uses == 1 && clobber(x)
-	// result: (MOVWstore [i-1] {s} p w0 mem)
+	// match: (MOVBstore [i] {s} p1 (SHRLconst [j] w) x:(MOVBstore [i-1] {s} p0 w0:(SHRLconst [j-8] w) mem))
+	// cond: x.Uses == 1 && same(p0, p1, 1) && clobber(x)
+	// result: (MOVWstore [i-1] {s} p0 w0 mem)
 	for {
 		i := v.AuxInt
 		s := v.Aux
-		p := v_0
+		p1 := v_0
 		if v_1.Op != Op386SHRLconst {
 			break
 		}
@@ -4644,17 +4012,131 @@
 			break
 		}
 		mem := x.Args[2]
-		if p != x.Args[0] {
-			break
-		}
+		p0 := x.Args[0]
 		w0 := x.Args[1]
-		if w0.Op != Op386SHRLconst || w0.AuxInt != j-8 || w != w0.Args[0] || !(x.Uses == 1 && clobber(x)) {
+		if w0.Op != Op386SHRLconst || w0.AuxInt != j-8 || w != w0.Args[0] || !(x.Uses == 1 && same(p0, p1, 1) && clobber(x)) {
 			break
 		}
 		v.reset(Op386MOVWstore)
 		v.AuxInt = i - 1
 		v.Aux = s
-		v.AddArg3(p, w0, mem)
+		v.AddArg3(p0, w0, mem)
+		return true
+	}
+	// match: (MOVBstore [i0] {s0} l:(LEAL1 [i1] {s1} x y) val mem)
+	// cond: i1 != 0 && is32Bit(i0+i1)
+	// result: (MOVBstore [i0+i1] {s0} (LEAL1 <l.Type> [0] {s1} x y) val mem)
+	for {
+		i0 := v.AuxInt
+		s0 := v.Aux
+		l := v_0
+		if l.Op != Op386LEAL1 {
+			break
+		}
+		i1 := l.AuxInt
+		s1 := l.Aux
+		y := l.Args[1]
+		x := l.Args[0]
+		val := v_1
+		mem := v_2
+		if !(i1 != 0 && is32Bit(i0+i1)) {
+			break
+		}
+		v.reset(Op386MOVBstore)
+		v.AuxInt = i0 + i1
+		v.Aux = s0
+		v0 := b.NewValue0(v.Pos, Op386LEAL1, l.Type)
+		v0.AuxInt = 0
+		v0.Aux = s1
+		v0.AddArg2(x, y)
+		v.AddArg3(v0, val, mem)
+		return true
+	}
+	// match: (MOVBstore [i0] {s0} l:(LEAL2 [i1] {s1} x y) val mem)
+	// cond: i1 != 0 && is32Bit(i0+i1)
+	// result: (MOVBstore [i0+i1] {s0} (LEAL2 <l.Type> [0] {s1} x y) val mem)
+	for {
+		i0 := v.AuxInt
+		s0 := v.Aux
+		l := v_0
+		if l.Op != Op386LEAL2 {
+			break
+		}
+		i1 := l.AuxInt
+		s1 := l.Aux
+		y := l.Args[1]
+		x := l.Args[0]
+		val := v_1
+		mem := v_2
+		if !(i1 != 0 && is32Bit(i0+i1)) {
+			break
+		}
+		v.reset(Op386MOVBstore)
+		v.AuxInt = i0 + i1
+		v.Aux = s0
+		v0 := b.NewValue0(v.Pos, Op386LEAL2, l.Type)
+		v0.AuxInt = 0
+		v0.Aux = s1
+		v0.AddArg2(x, y)
+		v.AddArg3(v0, val, mem)
+		return true
+	}
+	// match: (MOVBstore [i0] {s0} l:(LEAL4 [i1] {s1} x y) val mem)
+	// cond: i1 != 0 && is32Bit(i0+i1)
+	// result: (MOVBstore [i0+i1] {s0} (LEAL4 <l.Type> [0] {s1} x y) val mem)
+	for {
+		i0 := v.AuxInt
+		s0 := v.Aux
+		l := v_0
+		if l.Op != Op386LEAL4 {
+			break
+		}
+		i1 := l.AuxInt
+		s1 := l.Aux
+		y := l.Args[1]
+		x := l.Args[0]
+		val := v_1
+		mem := v_2
+		if !(i1 != 0 && is32Bit(i0+i1)) {
+			break
+		}
+		v.reset(Op386MOVBstore)
+		v.AuxInt = i0 + i1
+		v.Aux = s0
+		v0 := b.NewValue0(v.Pos, Op386LEAL4, l.Type)
+		v0.AuxInt = 0
+		v0.Aux = s1
+		v0.AddArg2(x, y)
+		v.AddArg3(v0, val, mem)
+		return true
+	}
+	// match: (MOVBstore [i0] {s0} l:(LEAL8 [i1] {s1} x y) val mem)
+	// cond: i1 != 0 && is32Bit(i0+i1)
+	// result: (MOVBstore [i0+i1] {s0} (LEAL8 <l.Type> [0] {s1} x y) val mem)
+	for {
+		i0 := v.AuxInt
+		s0 := v.Aux
+		l := v_0
+		if l.Op != Op386LEAL8 {
+			break
+		}
+		i1 := l.AuxInt
+		s1 := l.Aux
+		y := l.Args[1]
+		x := l.Args[0]
+		val := v_1
+		mem := v_2
+		if !(i1 != 0 && is32Bit(i0+i1)) {
+			break
+		}
+		v.reset(Op386MOVBstore)
+		v.AuxInt = i0 + i1
+		v.Aux = s0
+		v0 := b.NewValue0(v.Pos, Op386LEAL8, l.Type)
+		v0.AuxInt = 0
+		v0.Aux = s1
+		v0.AddArg2(x, y)
+		v.AddArg3(v0, val, mem)
 		return true
 	}
 	return false
@@ -4707,53 +4189,13 @@
 		v.AddArg2(ptr, mem)
 		return true
 	}
-	// match: (MOVBstoreconst [x] {sym1} (LEAL1 [off] {sym2} ptr idx) mem)
-	// cond: canMergeSym(sym1, sym2)
-	// result: (MOVBstoreconstidx1 [ValAndOff(x).add(off)] {mergeSym(sym1,sym2)} ptr idx mem)
-	for {
-		x := v.AuxInt
-		sym1 := v.Aux
-		if v_0.Op != Op386LEAL1 {
-			break
-		}
-		off := v_0.AuxInt
-		sym2 := v_0.Aux
-		idx := v_0.Args[1]
-		ptr := v_0.Args[0]
-		mem := v_1
-		if !(canMergeSym(sym1, sym2)) {
-			break
-		}
-		v.reset(Op386MOVBstoreconstidx1)
-		v.AuxInt = ValAndOff(x).add(off)
-		v.Aux = mergeSym(sym1, sym2)
-		v.AddArg3(ptr, idx, mem)
-		return true
-	}
-	// match: (MOVBstoreconst [x] {sym} (ADDL ptr idx) mem)
-	// result: (MOVBstoreconstidx1 [x] {sym} ptr idx mem)
-	for {
-		x := v.AuxInt
-		sym := v.Aux
-		if v_0.Op != Op386ADDL {
-			break
-		}
-		idx := v_0.Args[1]
-		ptr := v_0.Args[0]
-		mem := v_1
-		v.reset(Op386MOVBstoreconstidx1)
-		v.AuxInt = x
-		v.Aux = sym
-		v.AddArg3(ptr, idx, mem)
-		return true
-	}
-	// match: (MOVBstoreconst [c] {s} p x:(MOVBstoreconst [a] {s} p mem))
-	// cond: x.Uses == 1 && ValAndOff(a).Off() + 1 == ValAndOff(c).Off() && clobber(x)
-	// result: (MOVWstoreconst [makeValAndOff(ValAndOff(a).Val()&0xff | ValAndOff(c).Val()<<8, ValAndOff(a).Off())] {s} p mem)
+	// match: (MOVBstoreconst [c] {s} p1 x:(MOVBstoreconst [a] {s} p0 mem))
+	// cond: x.Uses == 1 && ValAndOff(a).Off() + 1 == ValAndOff(c).Off() && same(p0, p1, 1) && clobber(x)
+	// result: (MOVWstoreconst [makeValAndOff(ValAndOff(a).Val()&0xff | ValAndOff(c).Val()<<8, ValAndOff(a).Off())] {s} p0 mem)
 	for {
 		c := v.AuxInt
 		s := v.Aux
-		p := v_0
+		p1 := v_0
 		x := v_1
 		if x.Op != Op386MOVBstoreconst {
 			break
@@ -4763,22 +4205,23 @@
 			break
 		}
 		mem := x.Args[1]
-		if p != x.Args[0] || !(x.Uses == 1 && ValAndOff(a).Off()+1 == ValAndOff(c).Off() && clobber(x)) {
+		p0 := x.Args[0]
+		if !(x.Uses == 1 && ValAndOff(a).Off()+1 == ValAndOff(c).Off() && same(p0, p1, 1) && clobber(x)) {
 			break
 		}
 		v.reset(Op386MOVWstoreconst)
 		v.AuxInt = makeValAndOff(ValAndOff(a).Val()&0xff|ValAndOff(c).Val()<<8, ValAndOff(a).Off())
 		v.Aux = s
-		v.AddArg2(p, mem)
+		v.AddArg2(p0, mem)
 		return true
 	}
-	// match: (MOVBstoreconst [a] {s} p x:(MOVBstoreconst [c] {s} p mem))
-	// cond: x.Uses == 1 && ValAndOff(a).Off() + 1 == ValAndOff(c).Off() && clobber(x)
-	// result: (MOVWstoreconst [makeValAndOff(ValAndOff(a).Val()&0xff | ValAndOff(c).Val()<<8, ValAndOff(a).Off())] {s} p mem)
+	// match: (MOVBstoreconst [a] {s} p1 x:(MOVBstoreconst [c] {s} p0 mem))
+	// cond: x.Uses == 1 && ValAndOff(a).Off() + 1 == ValAndOff(c).Off() && same(p0, p1, 1) && clobber(x)
+	// result: (MOVWstoreconst [makeValAndOff(ValAndOff(a).Val()&0xff | ValAndOff(c).Val()<<8, ValAndOff(a).Off())] {s} p0 mem)
 	for {
 		a := v.AuxInt
 		s := v.Aux
-		p := v_0
+		p1 := v_0
 		x := v_1
 		if x.Op != Op386MOVBstoreconst {
 			break
@@ -4788,308 +4231,18 @@
 			break
 		}
 		mem := x.Args[1]
-		if p != x.Args[0] || !(x.Uses == 1 && ValAndOff(a).Off()+1 == ValAndOff(c).Off() && clobber(x)) {
+		p0 := x.Args[0]
+		if !(x.Uses == 1 && ValAndOff(a).Off()+1 == ValAndOff(c).Off() && same(p0, p1, 1) && clobber(x)) {
 			break
 		}
 		v.reset(Op386MOVWstoreconst)
 		v.AuxInt = makeValAndOff(ValAndOff(a).Val()&0xff|ValAndOff(c).Val()<<8, ValAndOff(a).Off())
 		v.Aux = s
-		v.AddArg2(p, mem)
+		v.AddArg2(p0, mem)
 		return true
 	}
 	return false
 }
-func rewriteValue386_Op386MOVBstoreconstidx1(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	// match: (MOVBstoreconstidx1 [x] {sym} (ADDLconst [c] ptr) idx mem)
-	// result: (MOVBstoreconstidx1 [ValAndOff(x).add(c)] {sym} ptr idx mem)
-	for {
-		x := v.AuxInt
-		sym := v.Aux
-		if v_0.Op != Op386ADDLconst {
-			break
-		}
-		c := v_0.AuxInt
-		ptr := v_0.Args[0]
-		idx := v_1
-		mem := v_2
-		v.reset(Op386MOVBstoreconstidx1)
-		v.AuxInt = ValAndOff(x).add(c)
-		v.Aux = sym
-		v.AddArg3(ptr, idx, mem)
-		return true
-	}
-	// match: (MOVBstoreconstidx1 [x] {sym} ptr (ADDLconst [c] idx) mem)
-	// result: (MOVBstoreconstidx1 [ValAndOff(x).add(c)] {sym} ptr idx mem)
-	for {
-		x := v.AuxInt
-		sym := v.Aux
-		ptr := v_0
-		if v_1.Op != Op386ADDLconst {
-			break
-		}
-		c := v_1.AuxInt
-		idx := v_1.Args[0]
-		mem := v_2
-		v.reset(Op386MOVBstoreconstidx1)
-		v.AuxInt = ValAndOff(x).add(c)
-		v.Aux = sym
-		v.AddArg3(ptr, idx, mem)
-		return true
-	}
-	// match: (MOVBstoreconstidx1 [c] {s} p i x:(MOVBstoreconstidx1 [a] {s} p i mem))
-	// cond: x.Uses == 1 && ValAndOff(a).Off() + 1 == ValAndOff(c).Off() && clobber(x)
-	// result: (MOVWstoreconstidx1 [makeValAndOff(ValAndOff(a).Val()&0xff | ValAndOff(c).Val()<<8, ValAndOff(a).Off())] {s} p i mem)
-	for {
-		c := v.AuxInt
-		s := v.Aux
-		p := v_0
-		i := v_1
-		x := v_2
-		if x.Op != Op386MOVBstoreconstidx1 {
-			break
-		}
-		a := x.AuxInt
-		if x.Aux != s {
-			break
-		}
-		mem := x.Args[2]
-		if p != x.Args[0] || i != x.Args[1] || !(x.Uses == 1 && ValAndOff(a).Off()+1 == ValAndOff(c).Off() && clobber(x)) {
-			break
-		}
-		v.reset(Op386MOVWstoreconstidx1)
-		v.AuxInt = makeValAndOff(ValAndOff(a).Val()&0xff|ValAndOff(c).Val()<<8, ValAndOff(a).Off())
-		v.Aux = s
-		v.AddArg3(p, i, mem)
-		return true
-	}
-	return false
-}
-func rewriteValue386_Op386MOVBstoreidx1(v *Value) bool {
-	v_3 := v.Args[3]
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	// match: (MOVBstoreidx1 [c] {sym} (ADDLconst [d] ptr) idx val mem)
-	// result: (MOVBstoreidx1 [int64(int32(c+d))] {sym} ptr idx val mem)
-	for {
-		c := v.AuxInt
-		sym := v.Aux
-		for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-			if v_0.Op != Op386ADDLconst {
-				continue
-			}
-			d := v_0.AuxInt
-			ptr := v_0.Args[0]
-			idx := v_1
-			val := v_2
-			mem := v_3
-			v.reset(Op386MOVBstoreidx1)
-			v.AuxInt = int64(int32(c + d))
-			v.Aux = sym
-			v.AddArg4(ptr, idx, val, mem)
-			return true
-		}
-		break
-	}
-	// match: (MOVBstoreidx1 [c] {sym} ptr (ADDLconst [d] idx) val mem)
-	// result: (MOVBstoreidx1 [int64(int32(c+d))] {sym} ptr idx val mem)
-	for {
-		c := v.AuxInt
-		sym := v.Aux
-		for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-			ptr := v_0
-			if v_1.Op != Op386ADDLconst {
-				continue
-			}
-			d := v_1.AuxInt
-			idx := v_1.Args[0]
-			val := v_2
-			mem := v_3
-			v.reset(Op386MOVBstoreidx1)
-			v.AuxInt = int64(int32(c + d))
-			v.Aux = sym
-			v.AddArg4(ptr, idx, val, mem)
-			return true
-		}
-		break
-	}
-	// match: (MOVBstoreidx1 [i] {s} p idx (SHRLconst [8] w) x:(MOVBstoreidx1 [i-1] {s} p idx w mem))
-	// cond: x.Uses == 1 && clobber(x)
-	// result: (MOVWstoreidx1 [i-1] {s} p idx w mem)
-	for {
-		i := v.AuxInt
-		s := v.Aux
-		for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-			p := v_0
-			idx := v_1
-			if v_2.Op != Op386SHRLconst || v_2.AuxInt != 8 {
-				continue
-			}
-			w := v_2.Args[0]
-			x := v_3
-			if x.Op != Op386MOVBstoreidx1 || x.AuxInt != i-1 || x.Aux != s {
-				continue
-			}
-			mem := x.Args[3]
-			x_0 := x.Args[0]
-			x_1 := x.Args[1]
-			for _i1 := 0; _i1 <= 1; _i1, x_0, x_1 = _i1+1, x_1, x_0 {
-				if p != x_0 || idx != x_1 || w != x.Args[2] || !(x.Uses == 1 && clobber(x)) {
-					continue
-				}
-				v.reset(Op386MOVWstoreidx1)
-				v.AuxInt = i - 1
-				v.Aux = s
-				v.AddArg4(p, idx, w, mem)
-				return true
-			}
-		}
-		break
-	}
-	// match: (MOVBstoreidx1 [i] {s} p idx (SHRWconst [8] w) x:(MOVBstoreidx1 [i-1] {s} p idx w mem))
-	// cond: x.Uses == 1 && clobber(x)
-	// result: (MOVWstoreidx1 [i-1] {s} p idx w mem)
-	for {
-		i := v.AuxInt
-		s := v.Aux
-		for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-			p := v_0
-			idx := v_1
-			if v_2.Op != Op386SHRWconst || v_2.AuxInt != 8 {
-				continue
-			}
-			w := v_2.Args[0]
-			x := v_3
-			if x.Op != Op386MOVBstoreidx1 || x.AuxInt != i-1 || x.Aux != s {
-				continue
-			}
-			mem := x.Args[3]
-			x_0 := x.Args[0]
-			x_1 := x.Args[1]
-			for _i1 := 0; _i1 <= 1; _i1, x_0, x_1 = _i1+1, x_1, x_0 {
-				if p != x_0 || idx != x_1 || w != x.Args[2] || !(x.Uses == 1 && clobber(x)) {
-					continue
-				}
-				v.reset(Op386MOVWstoreidx1)
-				v.AuxInt = i - 1
-				v.Aux = s
-				v.AddArg4(p, idx, w, mem)
-				return true
-			}
-		}
-		break
-	}
-	// match: (MOVBstoreidx1 [i] {s} p idx w x:(MOVBstoreidx1 [i+1] {s} p idx (SHRLconst [8] w) mem))
-	// cond: x.Uses == 1 && clobber(x)
-	// result: (MOVWstoreidx1 [i] {s} p idx w mem)
-	for {
-		i := v.AuxInt
-		s := v.Aux
-		for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-			p := v_0
-			idx := v_1
-			w := v_2
-			x := v_3
-			if x.Op != Op386MOVBstoreidx1 || x.AuxInt != i+1 || x.Aux != s {
-				continue
-			}
-			mem := x.Args[3]
-			x_0 := x.Args[0]
-			x_1 := x.Args[1]
-			for _i1 := 0; _i1 <= 1; _i1, x_0, x_1 = _i1+1, x_1, x_0 {
-				if p != x_0 || idx != x_1 {
-					continue
-				}
-				x_2 := x.Args[2]
-				if x_2.Op != Op386SHRLconst || x_2.AuxInt != 8 || w != x_2.Args[0] || !(x.Uses == 1 && clobber(x)) {
-					continue
-				}
-				v.reset(Op386MOVWstoreidx1)
-				v.AuxInt = i
-				v.Aux = s
-				v.AddArg4(p, idx, w, mem)
-				return true
-			}
-		}
-		break
-	}
-	// match: (MOVBstoreidx1 [i] {s} p idx w x:(MOVBstoreidx1 [i+1] {s} p idx (SHRWconst [8] w) mem))
-	// cond: x.Uses == 1 && clobber(x)
-	// result: (MOVWstoreidx1 [i] {s} p idx w mem)
-	for {
-		i := v.AuxInt
-		s := v.Aux
-		for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-			p := v_0
-			idx := v_1
-			w := v_2
-			x := v_3
-			if x.Op != Op386MOVBstoreidx1 || x.AuxInt != i+1 || x.Aux != s {
-				continue
-			}
-			mem := x.Args[3]
-			x_0 := x.Args[0]
-			x_1 := x.Args[1]
-			for _i1 := 0; _i1 <= 1; _i1, x_0, x_1 = _i1+1, x_1, x_0 {
-				if p != x_0 || idx != x_1 {
-					continue
-				}
-				x_2 := x.Args[2]
-				if x_2.Op != Op386SHRWconst || x_2.AuxInt != 8 || w != x_2.Args[0] || !(x.Uses == 1 && clobber(x)) {
-					continue
-				}
-				v.reset(Op386MOVWstoreidx1)
-				v.AuxInt = i
-				v.Aux = s
-				v.AddArg4(p, idx, w, mem)
-				return true
-			}
-		}
-		break
-	}
-	// match: (MOVBstoreidx1 [i] {s} p idx (SHRLconst [j] w) x:(MOVBstoreidx1 [i-1] {s} p idx w0:(SHRLconst [j-8] w) mem))
-	// cond: x.Uses == 1 && clobber(x)
-	// result: (MOVWstoreidx1 [i-1] {s} p idx w0 mem)
-	for {
-		i := v.AuxInt
-		s := v.Aux
-		for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-			p := v_0
-			idx := v_1
-			if v_2.Op != Op386SHRLconst {
-				continue
-			}
-			j := v_2.AuxInt
-			w := v_2.Args[0]
-			x := v_3
-			if x.Op != Op386MOVBstoreidx1 || x.AuxInt != i-1 || x.Aux != s {
-				continue
-			}
-			mem := x.Args[3]
-			x_0 := x.Args[0]
-			x_1 := x.Args[1]
-			for _i1 := 0; _i1 <= 1; _i1, x_0, x_1 = _i1+1, x_1, x_0 {
-				if p != x_0 || idx != x_1 {
-					continue
-				}
-				w0 := x.Args[2]
-				if w0.Op != Op386SHRLconst || w0.AuxInt != j-8 || w != w0.Args[0] || !(x.Uses == 1 && clobber(x)) {
-					continue
-				}
-				v.reset(Op386MOVWstoreidx1)
-				v.AuxInt = i - 1
-				v.Aux = s
-				v.AddArg4(p, idx, w0, mem)
-				return true
-			}
-		}
-		break
-	}
-	return false
-}
 func rewriteValue386_Op386MOVLload(v *Value) bool {
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
@@ -5158,78 +4311,117 @@
 		v.AddArg2(base, mem)
 		return true
 	}
-	// match: (MOVLload [off1] {sym1} (LEAL1 [off2] {sym2} ptr idx) mem)
-	// cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-	// result: (MOVLloadidx1 [off1+off2] {mergeSym(sym1,sym2)} ptr idx mem)
+	// match: (MOVLload [i0] {s0} l:(LEAL1 [i1] {s1} x y) mem)
+	// cond: i1 != 0 && is32Bit(i0+i1)
+	// result: (MOVLload [i0+i1] {s0} (LEAL1 <l.Type> [0] {s1} x y) mem)
 	for {
-		off1 := v.AuxInt
-		sym1 := v.Aux
-		if v_0.Op != Op386LEAL1 {
+		i0 := v.AuxInt
+		s0 := v.Aux
+		l := v_0
+		if l.Op != Op386LEAL1 {
 			break
 		}
-		off2 := v_0.AuxInt
-		sym2 := v_0.Aux
-		idx := v_0.Args[1]
-		ptr := v_0.Args[0]
+		i1 := l.AuxInt
+		s1 := l.Aux
+		y := l.Args[1]
+		x := l.Args[0]
 		mem := v_1
-		if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+		if !(i1 != 0 && is32Bit(i0+i1)) {
 			break
 		}
-		v.reset(Op386MOVLloadidx1)
-		v.AuxInt = off1 + off2
-		v.Aux = mergeSym(sym1, sym2)
-		v.AddArg3(ptr, idx, mem)
+		v.reset(Op386MOVLload)
+		v.AuxInt = i0 + i1
+		v.Aux = s0
+		v0 := b.NewValue0(v.Pos, Op386LEAL1, l.Type)
+		v0.AuxInt = 0
+		v0.Aux = s1
+		v0.AddArg2(x, y)
+		v.AddArg2(v0, mem)
 		return true
 	}
-	// match: (MOVLload [off1] {sym1} (LEAL4 [off2] {sym2} ptr idx) mem)
-	// cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-	// result: (MOVLloadidx4 [off1+off2] {mergeSym(sym1,sym2)} ptr idx mem)
+	// match: (MOVLload [i0] {s0} l:(LEAL2 [i1] {s1} x y) mem)
+	// cond: i1 != 0 && is32Bit(i0+i1)
+	// result: (MOVLload [i0+i1] {s0} (LEAL2 <l.Type> [0] {s1} x y) mem)
 	for {
-		off1 := v.AuxInt
-		sym1 := v.Aux
-		if v_0.Op != Op386LEAL4 {
+		i0 := v.AuxInt
+		s0 := v.Aux
+		l := v_0
+		if l.Op != Op386LEAL2 {
 			break
 		}
-		off2 := v_0.AuxInt
-		sym2 := v_0.Aux
-		idx := v_0.Args[1]
-		ptr := v_0.Args[0]
+		i1 := l.AuxInt
+		s1 := l.Aux
+		y := l.Args[1]
+		x := l.Args[0]
 		mem := v_1
-		if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+		if !(i1 != 0 && is32Bit(i0+i1)) {
 			break
 		}
-		v.reset(Op386MOVLloadidx4)
-		v.AuxInt = off1 + off2
-		v.Aux = mergeSym(sym1, sym2)
-		v.AddArg3(ptr, idx, mem)
+		v.reset(Op386MOVLload)
+		v.AuxInt = i0 + i1
+		v.Aux = s0
+		v0 := b.NewValue0(v.Pos, Op386LEAL2, l.Type)
+		v0.AuxInt = 0
+		v0.Aux = s1
+		v0.AddArg2(x, y)
+		v.AddArg2(v0, mem)
 		return true
 	}
-	// match: (MOVLload [off] {sym} (ADDL ptr idx) mem)
-	// cond: ptr.Op != OpSB
-	// result: (MOVLloadidx1 [off] {sym} ptr idx mem)
+	// match: (MOVLload [i0] {s0} l:(LEAL4 [i1] {s1} x y) mem)
+	// cond: i1 != 0 && is32Bit(i0+i1)
+	// result: (MOVLload [i0+i1] {s0} (LEAL4 <l.Type> [0] {s1} x y) mem)
 	for {
-		off := v.AuxInt
-		sym := v.Aux
-		if v_0.Op != Op386ADDL {
+		i0 := v.AuxInt
+		s0 := v.Aux
+		l := v_0
+		if l.Op != Op386LEAL4 {
 			break
 		}
-		_ = v_0.Args[1]
-		v_0_0 := v_0.Args[0]
-		v_0_1 := v_0.Args[1]
-		for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 {
-			ptr := v_0_0
-			idx := v_0_1
-			mem := v_1
-			if !(ptr.Op != OpSB) {
-				continue
-			}
-			v.reset(Op386MOVLloadidx1)
-			v.AuxInt = off
-			v.Aux = sym
-			v.AddArg3(ptr, idx, mem)
-			return true
+		i1 := l.AuxInt
+		s1 := l.Aux
+		y := l.Args[1]
+		x := l.Args[0]
+		mem := v_1
+		if !(i1 != 0 && is32Bit(i0+i1)) {
+			break
 		}
-		break
+		v.reset(Op386MOVLload)
+		v.AuxInt = i0 + i1
+		v.Aux = s0
+		v0 := b.NewValue0(v.Pos, Op386LEAL4, l.Type)
+		v0.AuxInt = 0
+		v0.Aux = s1
+		v0.AddArg2(x, y)
+		v.AddArg2(v0, mem)
+		return true
+	}
+	// match: (MOVLload [i0] {s0} l:(LEAL8 [i1] {s1} x y) mem)
+	// cond: i1 != 0 && is32Bit(i0+i1)
+	// result: (MOVLload [i0+i1] {s0} (LEAL8 <l.Type> [0] {s1} x y) mem)
+	for {
+		i0 := v.AuxInt
+		s0 := v.Aux
+		l := v_0
+		if l.Op != Op386LEAL8 {
+			break
+		}
+		i1 := l.AuxInt
+		s1 := l.Aux
+		y := l.Args[1]
+		x := l.Args[0]
+		mem := v_1
+		if !(i1 != 0 && is32Bit(i0+i1)) {
+			break
+		}
+		v.reset(Op386MOVLload)
+		v.AuxInt = i0 + i1
+		v.Aux = s0
+		v0 := b.NewValue0(v.Pos, Op386LEAL8, l.Type)
+		v0.AuxInt = 0
+		v0.Aux = s1
+		v0.AddArg2(x, y)
+		v.AddArg2(v0, mem)
+		return true
 	}
 	// match: (MOVLload [off] {sym} (SB) _)
 	// cond: symIsRO(sym)
@@ -5246,116 +4438,6 @@
 	}
 	return false
 }
-func rewriteValue386_Op386MOVLloadidx1(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	// match: (MOVLloadidx1 [c] {sym} ptr (SHLLconst [2] idx) mem)
-	// result: (MOVLloadidx4 [c] {sym} ptr idx mem)
-	for {
-		c := v.AuxInt
-		sym := v.Aux
-		for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-			ptr := v_0
-			if v_1.Op != Op386SHLLconst || v_1.AuxInt != 2 {
-				continue
-			}
-			idx := v_1.Args[0]
-			mem := v_2
-			v.reset(Op386MOVLloadidx4)
-			v.AuxInt = c
-			v.Aux = sym
-			v.AddArg3(ptr, idx, mem)
-			return true
-		}
-		break
-	}
-	// match: (MOVLloadidx1 [c] {sym} (ADDLconst [d] ptr) idx mem)
-	// result: (MOVLloadidx1 [int64(int32(c+d))] {sym} ptr idx mem)
-	for {
-		c := v.AuxInt
-		sym := v.Aux
-		for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-			if v_0.Op != Op386ADDLconst {
-				continue
-			}
-			d := v_0.AuxInt
-			ptr := v_0.Args[0]
-			idx := v_1
-			mem := v_2
-			v.reset(Op386MOVLloadidx1)
-			v.AuxInt = int64(int32(c + d))
-			v.Aux = sym
-			v.AddArg3(ptr, idx, mem)
-			return true
-		}
-		break
-	}
-	// match: (MOVLloadidx1 [c] {sym} ptr (ADDLconst [d] idx) mem)
-	// result: (MOVLloadidx1 [int64(int32(c+d))] {sym} ptr idx mem)
-	for {
-		c := v.AuxInt
-		sym := v.Aux
-		for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-			ptr := v_0
-			if v_1.Op != Op386ADDLconst {
-				continue
-			}
-			d := v_1.AuxInt
-			idx := v_1.Args[0]
-			mem := v_2
-			v.reset(Op386MOVLloadidx1)
-			v.AuxInt = int64(int32(c + d))
-			v.Aux = sym
-			v.AddArg3(ptr, idx, mem)
-			return true
-		}
-		break
-	}
-	return false
-}
-func rewriteValue386_Op386MOVLloadidx4(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	// match: (MOVLloadidx4 [c] {sym} (ADDLconst [d] ptr) idx mem)
-	// result: (MOVLloadidx4 [int64(int32(c+d))] {sym} ptr idx mem)
-	for {
-		c := v.AuxInt
-		sym := v.Aux
-		if v_0.Op != Op386ADDLconst {
-			break
-		}
-		d := v_0.AuxInt
-		ptr := v_0.Args[0]
-		idx := v_1
-		mem := v_2
-		v.reset(Op386MOVLloadidx4)
-		v.AuxInt = int64(int32(c + d))
-		v.Aux = sym
-		v.AddArg3(ptr, idx, mem)
-		return true
-	}
-	// match: (MOVLloadidx4 [c] {sym} ptr (ADDLconst [d] idx) mem)
-	// result: (MOVLloadidx4 [int64(int32(c+4*d))] {sym} ptr idx mem)
-	for {
-		c := v.AuxInt
-		sym := v.Aux
-		ptr := v_0
-		if v_1.Op != Op386ADDLconst {
-			break
-		}
-		d := v_1.AuxInt
-		idx := v_1.Args[0]
-		mem := v_2
-		v.reset(Op386MOVLloadidx4)
-		v.AuxInt = int64(int32(c + 4*d))
-		v.Aux = sym
-		v.AddArg3(ptr, idx, mem)
-		return true
-	}
-	return false
-}
 func rewriteValue386_Op386MOVLstore(v *Value) bool {
 	v_2 := v.Args[2]
 	v_1 := v.Args[1]
@@ -5428,82 +4510,6 @@
 		v.AddArg3(base, val, mem)
 		return true
 	}
-	// match: (MOVLstore [off1] {sym1} (LEAL1 [off2] {sym2} ptr idx) val mem)
-	// cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-	// result: (MOVLstoreidx1 [off1+off2] {mergeSym(sym1,sym2)} ptr idx val mem)
-	for {
-		off1 := v.AuxInt
-		sym1 := v.Aux
-		if v_0.Op != Op386LEAL1 {
-			break
-		}
-		off2 := v_0.AuxInt
-		sym2 := v_0.Aux
-		idx := v_0.Args[1]
-		ptr := v_0.Args[0]
-		val := v_1
-		mem := v_2
-		if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
-			break
-		}
-		v.reset(Op386MOVLstoreidx1)
-		v.AuxInt = off1 + off2
-		v.Aux = mergeSym(sym1, sym2)
-		v.AddArg4(ptr, idx, val, mem)
-		return true
-	}
-	// match: (MOVLstore [off1] {sym1} (LEAL4 [off2] {sym2} ptr idx) val mem)
-	// cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-	// result: (MOVLstoreidx4 [off1+off2] {mergeSym(sym1,sym2)} ptr idx val mem)
-	for {
-		off1 := v.AuxInt
-		sym1 := v.Aux
-		if v_0.Op != Op386LEAL4 {
-			break
-		}
-		off2 := v_0.AuxInt
-		sym2 := v_0.Aux
-		idx := v_0.Args[1]
-		ptr := v_0.Args[0]
-		val := v_1
-		mem := v_2
-		if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
-			break
-		}
-		v.reset(Op386MOVLstoreidx4)
-		v.AuxInt = off1 + off2
-		v.Aux = mergeSym(sym1, sym2)
-		v.AddArg4(ptr, idx, val, mem)
-		return true
-	}
-	// match: (MOVLstore [off] {sym} (ADDL ptr idx) val mem)
-	// cond: ptr.Op != OpSB
-	// result: (MOVLstoreidx1 [off] {sym} ptr idx val mem)
-	for {
-		off := v.AuxInt
-		sym := v.Aux
-		if v_0.Op != Op386ADDL {
-			break
-		}
-		_ = v_0.Args[1]
-		v_0_0 := v_0.Args[0]
-		v_0_1 := v_0.Args[1]
-		for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 {
-			ptr := v_0_0
-			idx := v_0_1
-			val := v_1
-			mem := v_2
-			if !(ptr.Op != OpSB) {
-				continue
-			}
-			v.reset(Op386MOVLstoreidx1)
-			v.AuxInt = off
-			v.Aux = sym
-			v.AddArg4(ptr, idx, val, mem)
-			return true
-		}
-		break
-	}
 	// match: (MOVLstore {sym} [off] ptr y:(ADDLload x [off] {sym} ptr mem) mem)
 	// cond: y.Uses==1 && clobber(y)
 	// result: (ADDLmodify [off] {sym} ptr x mem)
@@ -5862,6 +4868,122 @@
 		v.AddArg2(ptr, mem)
 		return true
 	}
+	// match: (MOVLstore [i0] {s0} l:(LEAL1 [i1] {s1} x y) val mem)
+	// cond: i1 != 0 && is32Bit(i0+i1)
+	// result: (MOVLstore [i0+i1] {s0} (LEAL1 <l.Type> [0] {s1} x y) val mem)
+	for {
+		i0 := v.AuxInt
+		s0 := v.Aux
+		l := v_0
+		if l.Op != Op386LEAL1 {
+			break
+		}
+		i1 := l.AuxInt
+		s1 := l.Aux
+		y := l.Args[1]
+		x := l.Args[0]
+		val := v_1
+		mem := v_2
+		if !(i1 != 0 && is32Bit(i0+i1)) {
+			break
+		}
+		v.reset(Op386MOVLstore)
+		v.AuxInt = i0 + i1
+		v.Aux = s0
+		v0 := b.NewValue0(v.Pos, Op386LEAL1, l.Type)
+		v0.AuxInt = 0
+		v0.Aux = s1
+		v0.AddArg2(x, y)
+		v.AddArg3(v0, val, mem)
+		return true
+	}
+	// match: (MOVLstore [i0] {s0} l:(LEAL2 [i1] {s1} x y) val mem)
+	// cond: i1 != 0 && is32Bit(i0+i1)
+	// result: (MOVLstore [i0+i1] {s0} (LEAL2 <l.Type> [0] {s1} x y) val mem)
+	for {
+		i0 := v.AuxInt
+		s0 := v.Aux
+		l := v_0
+		if l.Op != Op386LEAL2 {
+			break
+		}
+		i1 := l.AuxInt
+		s1 := l.Aux
+		y := l.Args[1]
+		x := l.Args[0]
+		val := v_1
+		mem := v_2
+		if !(i1 != 0 && is32Bit(i0+i1)) {
+			break
+		}
+		v.reset(Op386MOVLstore)
+		v.AuxInt = i0 + i1
+		v.Aux = s0
+		v0 := b.NewValue0(v.Pos, Op386LEAL2, l.Type)
+		v0.AuxInt = 0
+		v0.Aux = s1
+		v0.AddArg2(x, y)
+		v.AddArg3(v0, val, mem)
+		return true
+	}
+	// match: (MOVLstore [i0] {s0} l:(LEAL4 [i1] {s1} x y) val mem)
+	// cond: i1 != 0 && is32Bit(i0+i1)
+	// result: (MOVLstore [i0+i1] {s0} (LEAL4 <l.Type> [0] {s1} x y) val mem)
+	for {
+		i0 := v.AuxInt
+		s0 := v.Aux
+		l := v_0
+		if l.Op != Op386LEAL4 {
+			break
+		}
+		i1 := l.AuxInt
+		s1 := l.Aux
+		y := l.Args[1]
+		x := l.Args[0]
+		val := v_1
+		mem := v_2
+		if !(i1 != 0 && is32Bit(i0+i1)) {
+			break
+		}
+		v.reset(Op386MOVLstore)
+		v.AuxInt = i0 + i1
+		v.Aux = s0
+		v0 := b.NewValue0(v.Pos, Op386LEAL4, l.Type)
+		v0.AuxInt = 0
+		v0.Aux = s1
+		v0.AddArg2(x, y)
+		v.AddArg3(v0, val, mem)
+		return true
+	}
+	// match: (MOVLstore [i0] {s0} l:(LEAL8 [i1] {s1} x y) val mem)
+	// cond: i1 != 0 && is32Bit(i0+i1)
+	// result: (MOVLstore [i0+i1] {s0} (LEAL8 <l.Type> [0] {s1} x y) val mem)
+	for {
+		i0 := v.AuxInt
+		s0 := v.Aux
+		l := v_0
+		if l.Op != Op386LEAL8 {
+			break
+		}
+		i1 := l.AuxInt
+		s1 := l.Aux
+		y := l.Args[1]
+		x := l.Args[0]
+		val := v_1
+		mem := v_2
+		if !(i1 != 0 && is32Bit(i0+i1)) {
+			break
+		}
+		v.reset(Op386MOVLstore)
+		v.AuxInt = i0 + i1
+		v.Aux = s0
+		v0 := b.NewValue0(v.Pos, Op386LEAL8, l.Type)
+		v0.AuxInt = 0
+		v0.Aux = s1
+		v0.AddArg2(x, y)
+		v.AddArg3(v0, val, mem)
+		return true
+	}
 	return false
 }
 func rewriteValue386_Op386MOVLstoreconst(v *Value) bool {
@@ -5912,658 +5034,6 @@
 		v.AddArg2(ptr, mem)
 		return true
 	}
-	// match: (MOVLstoreconst [x] {sym1} (LEAL1 [off] {sym2} ptr idx) mem)
-	// cond: canMergeSym(sym1, sym2)
-	// result: (MOVLstoreconstidx1 [ValAndOff(x).add(off)] {mergeSym(sym1,sym2)} ptr idx mem)
-	for {
-		x := v.AuxInt
-		sym1 := v.Aux
-		if v_0.Op != Op386LEAL1 {
-			break
-		}
-		off := v_0.AuxInt
-		sym2 := v_0.Aux
-		idx := v_0.Args[1]
-		ptr := v_0.Args[0]
-		mem := v_1
-		if !(canMergeSym(sym1, sym2)) {
-			break
-		}
-		v.reset(Op386MOVLstoreconstidx1)
-		v.AuxInt = ValAndOff(x).add(off)
-		v.Aux = mergeSym(sym1, sym2)
-		v.AddArg3(ptr, idx, mem)
-		return true
-	}
-	// match: (MOVLstoreconst [x] {sym1} (LEAL4 [off] {sym2} ptr idx) mem)
-	// cond: canMergeSym(sym1, sym2)
-	// result: (MOVLstoreconstidx4 [ValAndOff(x).add(off)] {mergeSym(sym1,sym2)} ptr idx mem)
-	for {
-		x := v.AuxInt
-		sym1 := v.Aux
-		if v_0.Op != Op386LEAL4 {
-			break
-		}
-		off := v_0.AuxInt
-		sym2 := v_0.Aux
-		idx := v_0.Args[1]
-		ptr := v_0.Args[0]
-		mem := v_1
-		if !(canMergeSym(sym1, sym2)) {
-			break
-		}
-		v.reset(Op386MOVLstoreconstidx4)
-		v.AuxInt = ValAndOff(x).add(off)
-		v.Aux = mergeSym(sym1, sym2)
-		v.AddArg3(ptr, idx, mem)
-		return true
-	}
-	// match: (MOVLstoreconst [x] {sym} (ADDL ptr idx) mem)
-	// result: (MOVLstoreconstidx1 [x] {sym} ptr idx mem)
-	for {
-		x := v.AuxInt
-		sym := v.Aux
-		if v_0.Op != Op386ADDL {
-			break
-		}
-		idx := v_0.Args[1]
-		ptr := v_0.Args[0]
-		mem := v_1
-		v.reset(Op386MOVLstoreconstidx1)
-		v.AuxInt = x
-		v.Aux = sym
-		v.AddArg3(ptr, idx, mem)
-		return true
-	}
-	return false
-}
-func rewriteValue386_Op386MOVLstoreconstidx1(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	// match: (MOVLstoreconstidx1 [c] {sym} ptr (SHLLconst [2] idx) mem)
-	// result: (MOVLstoreconstidx4 [c] {sym} ptr idx mem)
-	for {
-		c := v.AuxInt
-		sym := v.Aux
-		ptr := v_0
-		if v_1.Op != Op386SHLLconst || v_1.AuxInt != 2 {
-			break
-		}
-		idx := v_1.Args[0]
-		mem := v_2
-		v.reset(Op386MOVLstoreconstidx4)
-		v.AuxInt = c
-		v.Aux = sym
-		v.AddArg3(ptr, idx, mem)
-		return true
-	}
-	// match: (MOVLstoreconstidx1 [x] {sym} (ADDLconst [c] ptr) idx mem)
-	// result: (MOVLstoreconstidx1 [ValAndOff(x).add(c)] {sym} ptr idx mem)
-	for {
-		x := v.AuxInt
-		sym := v.Aux
-		if v_0.Op != Op386ADDLconst {
-			break
-		}
-		c := v_0.AuxInt
-		ptr := v_0.Args[0]
-		idx := v_1
-		mem := v_2
-		v.reset(Op386MOVLstoreconstidx1)
-		v.AuxInt = ValAndOff(x).add(c)
-		v.Aux = sym
-		v.AddArg3(ptr, idx, mem)
-		return true
-	}
-	// match: (MOVLstoreconstidx1 [x] {sym} ptr (ADDLconst [c] idx) mem)
-	// result: (MOVLstoreconstidx1 [ValAndOff(x).add(c)] {sym} ptr idx mem)
-	for {
-		x := v.AuxInt
-		sym := v.Aux
-		ptr := v_0
-		if v_1.Op != Op386ADDLconst {
-			break
-		}
-		c := v_1.AuxInt
-		idx := v_1.Args[0]
-		mem := v_2
-		v.reset(Op386MOVLstoreconstidx1)
-		v.AuxInt = ValAndOff(x).add(c)
-		v.Aux = sym
-		v.AddArg3(ptr, idx, mem)
-		return true
-	}
-	return false
-}
-func rewriteValue386_Op386MOVLstoreconstidx4(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	// match: (MOVLstoreconstidx4 [x] {sym} (ADDLconst [c] ptr) idx mem)
-	// result: (MOVLstoreconstidx4 [ValAndOff(x).add(c)] {sym} ptr idx mem)
-	for {
-		x := v.AuxInt
-		sym := v.Aux
-		if v_0.Op != Op386ADDLconst {
-			break
-		}
-		c := v_0.AuxInt
-		ptr := v_0.Args[0]
-		idx := v_1
-		mem := v_2
-		v.reset(Op386MOVLstoreconstidx4)
-		v.AuxInt = ValAndOff(x).add(c)
-		v.Aux = sym
-		v.AddArg3(ptr, idx, mem)
-		return true
-	}
-	// match: (MOVLstoreconstidx4 [x] {sym} ptr (ADDLconst [c] idx) mem)
-	// result: (MOVLstoreconstidx4 [ValAndOff(x).add(4*c)] {sym} ptr idx mem)
-	for {
-		x := v.AuxInt
-		sym := v.Aux
-		ptr := v_0
-		if v_1.Op != Op386ADDLconst {
-			break
-		}
-		c := v_1.AuxInt
-		idx := v_1.Args[0]
-		mem := v_2
-		v.reset(Op386MOVLstoreconstidx4)
-		v.AuxInt = ValAndOff(x).add(4 * c)
-		v.Aux = sym
-		v.AddArg3(ptr, idx, mem)
-		return true
-	}
-	return false
-}
-func rewriteValue386_Op386MOVLstoreidx1(v *Value) bool {
-	v_3 := v.Args[3]
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	// match: (MOVLstoreidx1 [c] {sym} ptr (SHLLconst [2] idx) val mem)
-	// result: (MOVLstoreidx4 [c] {sym} ptr idx val mem)
-	for {
-		c := v.AuxInt
-		sym := v.Aux
-		for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-			ptr := v_0
-			if v_1.Op != Op386SHLLconst || v_1.AuxInt != 2 {
-				continue
-			}
-			idx := v_1.Args[0]
-			val := v_2
-			mem := v_3
-			v.reset(Op386MOVLstoreidx4)
-			v.AuxInt = c
-			v.Aux = sym
-			v.AddArg4(ptr, idx, val, mem)
-			return true
-		}
-		break
-	}
-	// match: (MOVLstoreidx1 [c] {sym} (ADDLconst [d] ptr) idx val mem)
-	// result: (MOVLstoreidx1 [int64(int32(c+d))] {sym} ptr idx val mem)
-	for {
-		c := v.AuxInt
-		sym := v.Aux
-		for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-			if v_0.Op != Op386ADDLconst {
-				continue
-			}
-			d := v_0.AuxInt
-			ptr := v_0.Args[0]
-			idx := v_1
-			val := v_2
-			mem := v_3
-			v.reset(Op386MOVLstoreidx1)
-			v.AuxInt = int64(int32(c + d))
-			v.Aux = sym
-			v.AddArg4(ptr, idx, val, mem)
-			return true
-		}
-		break
-	}
-	// match: (MOVLstoreidx1 [c] {sym} ptr (ADDLconst [d] idx) val mem)
-	// result: (MOVLstoreidx1 [int64(int32(c+d))] {sym} ptr idx val mem)
-	for {
-		c := v.AuxInt
-		sym := v.Aux
-		for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-			ptr := v_0
-			if v_1.Op != Op386ADDLconst {
-				continue
-			}
-			d := v_1.AuxInt
-			idx := v_1.Args[0]
-			val := v_2
-			mem := v_3
-			v.reset(Op386MOVLstoreidx1)
-			v.AuxInt = int64(int32(c + d))
-			v.Aux = sym
-			v.AddArg4(ptr, idx, val, mem)
-			return true
-		}
-		break
-	}
-	return false
-}
-func rewriteValue386_Op386MOVLstoreidx4(v *Value) bool {
-	v_3 := v.Args[3]
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	// match: (MOVLstoreidx4 [c] {sym} (ADDLconst [d] ptr) idx val mem)
-	// result: (MOVLstoreidx4 [int64(int32(c+d))] {sym} ptr idx val mem)
-	for {
-		c := v.AuxInt
-		sym := v.Aux
-		if v_0.Op != Op386ADDLconst {
-			break
-		}
-		d := v_0.AuxInt
-		ptr := v_0.Args[0]
-		idx := v_1
-		val := v_2
-		mem := v_3
-		v.reset(Op386MOVLstoreidx4)
-		v.AuxInt = int64(int32(c + d))
-		v.Aux = sym
-		v.AddArg4(ptr, idx, val, mem)
-		return true
-	}
-	// match: (MOVLstoreidx4 [c] {sym} ptr (ADDLconst [d] idx) val mem)
-	// result: (MOVLstoreidx4 [int64(int32(c+4*d))] {sym} ptr idx val mem)
-	for {
-		c := v.AuxInt
-		sym := v.Aux
-		ptr := v_0
-		if v_1.Op != Op386ADDLconst {
-			break
-		}
-		d := v_1.AuxInt
-		idx := v_1.Args[0]
-		val := v_2
-		mem := v_3
-		v.reset(Op386MOVLstoreidx4)
-		v.AuxInt = int64(int32(c + 4*d))
-		v.Aux = sym
-		v.AddArg4(ptr, idx, val, mem)
-		return true
-	}
-	// match: (MOVLstoreidx4 {sym} [off] ptr idx y:(ADDLloadidx4 x [off] {sym} ptr idx mem) mem)
-	// cond: y.Uses==1 && clobber(y)
-	// result: (ADDLmodifyidx4 [off] {sym} ptr idx x mem)
-	for {
-		off := v.AuxInt
-		sym := v.Aux
-		ptr := v_0
-		idx := v_1
-		y := v_2
-		if y.Op != Op386ADDLloadidx4 || y.AuxInt != off || y.Aux != sym {
-			break
-		}
-		mem := y.Args[3]
-		x := y.Args[0]
-		if ptr != y.Args[1] || idx != y.Args[2] || mem != v_3 || !(y.Uses == 1 && clobber(y)) {
-			break
-		}
-		v.reset(Op386ADDLmodifyidx4)
-		v.AuxInt = off
-		v.Aux = sym
-		v.AddArg4(ptr, idx, x, mem)
-		return true
-	}
-	// match: (MOVLstoreidx4 {sym} [off] ptr idx y:(ANDLloadidx4 x [off] {sym} ptr idx mem) mem)
-	// cond: y.Uses==1 && clobber(y)
-	// result: (ANDLmodifyidx4 [off] {sym} ptr idx x mem)
-	for {
-		off := v.AuxInt
-		sym := v.Aux
-		ptr := v_0
-		idx := v_1
-		y := v_2
-		if y.Op != Op386ANDLloadidx4 || y.AuxInt != off || y.Aux != sym {
-			break
-		}
-		mem := y.Args[3]
-		x := y.Args[0]
-		if ptr != y.Args[1] || idx != y.Args[2] || mem != v_3 || !(y.Uses == 1 && clobber(y)) {
-			break
-		}
-		v.reset(Op386ANDLmodifyidx4)
-		v.AuxInt = off
-		v.Aux = sym
-		v.AddArg4(ptr, idx, x, mem)
-		return true
-	}
-	// match: (MOVLstoreidx4 {sym} [off] ptr idx y:(ORLloadidx4 x [off] {sym} ptr idx mem) mem)
-	// cond: y.Uses==1 && clobber(y)
-	// result: (ORLmodifyidx4 [off] {sym} ptr idx x mem)
-	for {
-		off := v.AuxInt
-		sym := v.Aux
-		ptr := v_0
-		idx := v_1
-		y := v_2
-		if y.Op != Op386ORLloadidx4 || y.AuxInt != off || y.Aux != sym {
-			break
-		}
-		mem := y.Args[3]
-		x := y.Args[0]
-		if ptr != y.Args[1] || idx != y.Args[2] || mem != v_3 || !(y.Uses == 1 && clobber(y)) {
-			break
-		}
-		v.reset(Op386ORLmodifyidx4)
-		v.AuxInt = off
-		v.Aux = sym
-		v.AddArg4(ptr, idx, x, mem)
-		return true
-	}
-	// match: (MOVLstoreidx4 {sym} [off] ptr idx y:(XORLloadidx4 x [off] {sym} ptr idx mem) mem)
-	// cond: y.Uses==1 && clobber(y)
-	// result: (XORLmodifyidx4 [off] {sym} ptr idx x mem)
-	for {
-		off := v.AuxInt
-		sym := v.Aux
-		ptr := v_0
-		idx := v_1
-		y := v_2
-		if y.Op != Op386XORLloadidx4 || y.AuxInt != off || y.Aux != sym {
-			break
-		}
-		mem := y.Args[3]
-		x := y.Args[0]
-		if ptr != y.Args[1] || idx != y.Args[2] || mem != v_3 || !(y.Uses == 1 && clobber(y)) {
-			break
-		}
-		v.reset(Op386XORLmodifyidx4)
-		v.AuxInt = off
-		v.Aux = sym
-		v.AddArg4(ptr, idx, x, mem)
-		return true
-	}
-	// match: (MOVLstoreidx4 {sym} [off] ptr idx y:(ADDL l:(MOVLloadidx4 [off] {sym} ptr idx mem) x) mem)
-	// cond: y.Uses==1 && l.Uses==1 && clobber(y, l)
-	// result: (ADDLmodifyidx4 [off] {sym} ptr idx x mem)
-	for {
-		off := v.AuxInt
-		sym := v.Aux
-		ptr := v_0
-		idx := v_1
-		y := v_2
-		if y.Op != Op386ADDL {
-			break
-		}
-		_ = y.Args[1]
-		y_0 := y.Args[0]
-		y_1 := y.Args[1]
-		for _i0 := 0; _i0 <= 1; _i0, y_0, y_1 = _i0+1, y_1, y_0 {
-			l := y_0
-			if l.Op != Op386MOVLloadidx4 || l.AuxInt != off || l.Aux != sym {
-				continue
-			}
-			mem := l.Args[2]
-			if ptr != l.Args[0] || idx != l.Args[1] {
-				continue
-			}
-			x := y_1
-			if mem != v_3 || !(y.Uses == 1 && l.Uses == 1 && clobber(y, l)) {
-				continue
-			}
-			v.reset(Op386ADDLmodifyidx4)
-			v.AuxInt = off
-			v.Aux = sym
-			v.AddArg4(ptr, idx, x, mem)
-			return true
-		}
-		break
-	}
-	// match: (MOVLstoreidx4 {sym} [off] ptr idx y:(SUBL l:(MOVLloadidx4 [off] {sym} ptr idx mem) x) mem)
-	// cond: y.Uses==1 && l.Uses==1 && clobber(y, l)
-	// result: (SUBLmodifyidx4 [off] {sym} ptr idx x mem)
-	for {
-		off := v.AuxInt
-		sym := v.Aux
-		ptr := v_0
-		idx := v_1
-		y := v_2
-		if y.Op != Op386SUBL {
-			break
-		}
-		x := y.Args[1]
-		l := y.Args[0]
-		if l.Op != Op386MOVLloadidx4 || l.AuxInt != off || l.Aux != sym {
-			break
-		}
-		mem := l.Args[2]
-		if ptr != l.Args[0] || idx != l.Args[1] || mem != v_3 || !(y.Uses == 1 && l.Uses == 1 && clobber(y, l)) {
-			break
-		}
-		v.reset(Op386SUBLmodifyidx4)
-		v.AuxInt = off
-		v.Aux = sym
-		v.AddArg4(ptr, idx, x, mem)
-		return true
-	}
-	// match: (MOVLstoreidx4 {sym} [off] ptr idx y:(ANDL l:(MOVLloadidx4 [off] {sym} ptr idx mem) x) mem)
-	// cond: y.Uses==1 && l.Uses==1 && clobber(y, l)
-	// result: (ANDLmodifyidx4 [off] {sym} ptr idx x mem)
-	for {
-		off := v.AuxInt
-		sym := v.Aux
-		ptr := v_0
-		idx := v_1
-		y := v_2
-		if y.Op != Op386ANDL {
-			break
-		}
-		_ = y.Args[1]
-		y_0 := y.Args[0]
-		y_1 := y.Args[1]
-		for _i0 := 0; _i0 <= 1; _i0, y_0, y_1 = _i0+1, y_1, y_0 {
-			l := y_0
-			if l.Op != Op386MOVLloadidx4 || l.AuxInt != off || l.Aux != sym {
-				continue
-			}
-			mem := l.Args[2]
-			if ptr != l.Args[0] || idx != l.Args[1] {
-				continue
-			}
-			x := y_1
-			if mem != v_3 || !(y.Uses == 1 && l.Uses == 1 && clobber(y, l)) {
-				continue
-			}
-			v.reset(Op386ANDLmodifyidx4)
-			v.AuxInt = off
-			v.Aux = sym
-			v.AddArg4(ptr, idx, x, mem)
-			return true
-		}
-		break
-	}
-	// match: (MOVLstoreidx4 {sym} [off] ptr idx y:(ORL l:(MOVLloadidx4 [off] {sym} ptr idx mem) x) mem)
-	// cond: y.Uses==1 && l.Uses==1 && clobber(y, l)
-	// result: (ORLmodifyidx4 [off] {sym} ptr idx x mem)
-	for {
-		off := v.AuxInt
-		sym := v.Aux
-		ptr := v_0
-		idx := v_1
-		y := v_2
-		if y.Op != Op386ORL {
-			break
-		}
-		_ = y.Args[1]
-		y_0 := y.Args[0]
-		y_1 := y.Args[1]
-		for _i0 := 0; _i0 <= 1; _i0, y_0, y_1 = _i0+1, y_1, y_0 {
-			l := y_0
-			if l.Op != Op386MOVLloadidx4 || l.AuxInt != off || l.Aux != sym {
-				continue
-			}
-			mem := l.Args[2]
-			if ptr != l.Args[0] || idx != l.Args[1] {
-				continue
-			}
-			x := y_1
-			if mem != v_3 || !(y.Uses == 1 && l.Uses == 1 && clobber(y, l)) {
-				continue
-			}
-			v.reset(Op386ORLmodifyidx4)
-			v.AuxInt = off
-			v.Aux = sym
-			v.AddArg4(ptr, idx, x, mem)
-			return true
-		}
-		break
-	}
-	// match: (MOVLstoreidx4 {sym} [off] ptr idx y:(XORL l:(MOVLloadidx4 [off] {sym} ptr idx mem) x) mem)
-	// cond: y.Uses==1 && l.Uses==1 && clobber(y, l)
-	// result: (XORLmodifyidx4 [off] {sym} ptr idx x mem)
-	for {
-		off := v.AuxInt
-		sym := v.Aux
-		ptr := v_0
-		idx := v_1
-		y := v_2
-		if y.Op != Op386XORL {
-			break
-		}
-		_ = y.Args[1]
-		y_0 := y.Args[0]
-		y_1 := y.Args[1]
-		for _i0 := 0; _i0 <= 1; _i0, y_0, y_1 = _i0+1, y_1, y_0 {
-			l := y_0
-			if l.Op != Op386MOVLloadidx4 || l.AuxInt != off || l.Aux != sym {
-				continue
-			}
-			mem := l.Args[2]
-			if ptr != l.Args[0] || idx != l.Args[1] {
-				continue
-			}
-			x := y_1
-			if mem != v_3 || !(y.Uses == 1 && l.Uses == 1 && clobber(y, l)) {
-				continue
-			}
-			v.reset(Op386XORLmodifyidx4)
-			v.AuxInt = off
-			v.Aux = sym
-			v.AddArg4(ptr, idx, x, mem)
-			return true
-		}
-		break
-	}
-	// match: (MOVLstoreidx4 {sym} [off] ptr idx y:(ADDLconst [c] l:(MOVLloadidx4 [off] {sym} ptr idx mem)) mem)
-	// cond: y.Uses==1 && l.Uses==1 && clobber(y, l) && validValAndOff(c,off)
-	// result: (ADDLconstmodifyidx4 [makeValAndOff(c,off)] {sym} ptr idx mem)
-	for {
-		off := v.AuxInt
-		sym := v.Aux
-		ptr := v_0
-		idx := v_1
-		y := v_2
-		if y.Op != Op386ADDLconst {
-			break
-		}
-		c := y.AuxInt
-		l := y.Args[0]
-		if l.Op != Op386MOVLloadidx4 || l.AuxInt != off || l.Aux != sym {
-			break
-		}
-		mem := l.Args[2]
-		if ptr != l.Args[0] || idx != l.Args[1] || mem != v_3 || !(y.Uses == 1 && l.Uses == 1 && clobber(y, l) && validValAndOff(c, off)) {
-			break
-		}
-		v.reset(Op386ADDLconstmodifyidx4)
-		v.AuxInt = makeValAndOff(c, off)
-		v.Aux = sym
-		v.AddArg3(ptr, idx, mem)
-		return true
-	}
-	// match: (MOVLstoreidx4 {sym} [off] ptr idx y:(ANDLconst [c] l:(MOVLloadidx4 [off] {sym} ptr idx mem)) mem)
-	// cond: y.Uses==1 && l.Uses==1 && clobber(y, l) && validValAndOff(c,off)
-	// result: (ANDLconstmodifyidx4 [makeValAndOff(c,off)] {sym} ptr idx mem)
-	for {
-		off := v.AuxInt
-		sym := v.Aux
-		ptr := v_0
-		idx := v_1
-		y := v_2
-		if y.Op != Op386ANDLconst {
-			break
-		}
-		c := y.AuxInt
-		l := y.Args[0]
-		if l.Op != Op386MOVLloadidx4 || l.AuxInt != off || l.Aux != sym {
-			break
-		}
-		mem := l.Args[2]
-		if ptr != l.Args[0] || idx != l.Args[1] || mem != v_3 || !(y.Uses == 1 && l.Uses == 1 && clobber(y, l) && validValAndOff(c, off)) {
-			break
-		}
-		v.reset(Op386ANDLconstmodifyidx4)
-		v.AuxInt = makeValAndOff(c, off)
-		v.Aux = sym
-		v.AddArg3(ptr, idx, mem)
-		return true
-	}
-	// match: (MOVLstoreidx4 {sym} [off] ptr idx y:(ORLconst [c] l:(MOVLloadidx4 [off] {sym} ptr idx mem)) mem)
-	// cond: y.Uses==1 && l.Uses==1 && clobber(y, l) && validValAndOff(c,off)
-	// result: (ORLconstmodifyidx4 [makeValAndOff(c,off)] {sym} ptr idx mem)
-	for {
-		off := v.AuxInt
-		sym := v.Aux
-		ptr := v_0
-		idx := v_1
-		y := v_2
-		if y.Op != Op386ORLconst {
-			break
-		}
-		c := y.AuxInt
-		l := y.Args[0]
-		if l.Op != Op386MOVLloadidx4 || l.AuxInt != off || l.Aux != sym {
-			break
-		}
-		mem := l.Args[2]
-		if ptr != l.Args[0] || idx != l.Args[1] || mem != v_3 || !(y.Uses == 1 && l.Uses == 1 && clobber(y, l) && validValAndOff(c, off)) {
-			break
-		}
-		v.reset(Op386ORLconstmodifyidx4)
-		v.AuxInt = makeValAndOff(c, off)
-		v.Aux = sym
-		v.AddArg3(ptr, idx, mem)
-		return true
-	}
-	// match: (MOVLstoreidx4 {sym} [off] ptr idx y:(XORLconst [c] l:(MOVLloadidx4 [off] {sym} ptr idx mem)) mem)
-	// cond: y.Uses==1 && l.Uses==1 && clobber(y, l) && validValAndOff(c,off)
-	// result: (XORLconstmodifyidx4 [makeValAndOff(c,off)] {sym} ptr idx mem)
-	for {
-		off := v.AuxInt
-		sym := v.Aux
-		ptr := v_0
-		idx := v_1
-		y := v_2
-		if y.Op != Op386XORLconst {
-			break
-		}
-		c := y.AuxInt
-		l := y.Args[0]
-		if l.Op != Op386MOVLloadidx4 || l.AuxInt != off || l.Aux != sym {
-			break
-		}
-		mem := l.Args[2]
-		if ptr != l.Args[0] || idx != l.Args[1] || mem != v_3 || !(y.Uses == 1 && l.Uses == 1 && clobber(y, l) && validValAndOff(c, off)) {
-			break
-		}
-		v.reset(Op386XORLconstmodifyidx4)
-		v.AuxInt = makeValAndOff(c, off)
-		v.Aux = sym
-		v.AddArg3(ptr, idx, mem)
-		return true
-	}
 	return false
 }
 func rewriteValue386_Op386MOVSDconst(v *Value) bool {
@@ -6634,163 +5104,6 @@
 		v.AddArg2(base, mem)
 		return true
 	}
-	// match: (MOVSDload [off1] {sym1} (LEAL1 [off2] {sym2} ptr idx) mem)
-	// cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-	// result: (MOVSDloadidx1 [off1+off2] {mergeSym(sym1,sym2)} ptr idx mem)
-	for {
-		off1 := v.AuxInt
-		sym1 := v.Aux
-		if v_0.Op != Op386LEAL1 {
-			break
-		}
-		off2 := v_0.AuxInt
-		sym2 := v_0.Aux
-		idx := v_0.Args[1]
-		ptr := v_0.Args[0]
-		mem := v_1
-		if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
-			break
-		}
-		v.reset(Op386MOVSDloadidx1)
-		v.AuxInt = off1 + off2
-		v.Aux = mergeSym(sym1, sym2)
-		v.AddArg3(ptr, idx, mem)
-		return true
-	}
-	// match: (MOVSDload [off1] {sym1} (LEAL8 [off2] {sym2} ptr idx) mem)
-	// cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-	// result: (MOVSDloadidx8 [off1+off2] {mergeSym(sym1,sym2)} ptr idx mem)
-	for {
-		off1 := v.AuxInt
-		sym1 := v.Aux
-		if v_0.Op != Op386LEAL8 {
-			break
-		}
-		off2 := v_0.AuxInt
-		sym2 := v_0.Aux
-		idx := v_0.Args[1]
-		ptr := v_0.Args[0]
-		mem := v_1
-		if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
-			break
-		}
-		v.reset(Op386MOVSDloadidx8)
-		v.AuxInt = off1 + off2
-		v.Aux = mergeSym(sym1, sym2)
-		v.AddArg3(ptr, idx, mem)
-		return true
-	}
-	// match: (MOVSDload [off] {sym} (ADDL ptr idx) mem)
-	// cond: ptr.Op != OpSB
-	// result: (MOVSDloadidx1 [off] {sym} ptr idx mem)
-	for {
-		off := v.AuxInt
-		sym := v.Aux
-		if v_0.Op != Op386ADDL {
-			break
-		}
-		_ = v_0.Args[1]
-		v_0_0 := v_0.Args[0]
-		v_0_1 := v_0.Args[1]
-		for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 {
-			ptr := v_0_0
-			idx := v_0_1
-			mem := v_1
-			if !(ptr.Op != OpSB) {
-				continue
-			}
-			v.reset(Op386MOVSDloadidx1)
-			v.AuxInt = off
-			v.Aux = sym
-			v.AddArg3(ptr, idx, mem)
-			return true
-		}
-		break
-	}
-	return false
-}
-func rewriteValue386_Op386MOVSDloadidx1(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	// match: (MOVSDloadidx1 [c] {sym} (ADDLconst [d] ptr) idx mem)
-	// result: (MOVSDloadidx1 [int64(int32(c+d))] {sym} ptr idx mem)
-	for {
-		c := v.AuxInt
-		sym := v.Aux
-		if v_0.Op != Op386ADDLconst {
-			break
-		}
-		d := v_0.AuxInt
-		ptr := v_0.Args[0]
-		idx := v_1
-		mem := v_2
-		v.reset(Op386MOVSDloadidx1)
-		v.AuxInt = int64(int32(c + d))
-		v.Aux = sym
-		v.AddArg3(ptr, idx, mem)
-		return true
-	}
-	// match: (MOVSDloadidx1 [c] {sym} ptr (ADDLconst [d] idx) mem)
-	// result: (MOVSDloadidx1 [int64(int32(c+d))] {sym} ptr idx mem)
-	for {
-		c := v.AuxInt
-		sym := v.Aux
-		ptr := v_0
-		if v_1.Op != Op386ADDLconst {
-			break
-		}
-		d := v_1.AuxInt
-		idx := v_1.Args[0]
-		mem := v_2
-		v.reset(Op386MOVSDloadidx1)
-		v.AuxInt = int64(int32(c + d))
-		v.Aux = sym
-		v.AddArg3(ptr, idx, mem)
-		return true
-	}
-	return false
-}
-func rewriteValue386_Op386MOVSDloadidx8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	// match: (MOVSDloadidx8 [c] {sym} (ADDLconst [d] ptr) idx mem)
-	// result: (MOVSDloadidx8 [int64(int32(c+d))] {sym} ptr idx mem)
-	for {
-		c := v.AuxInt
-		sym := v.Aux
-		if v_0.Op != Op386ADDLconst {
-			break
-		}
-		d := v_0.AuxInt
-		ptr := v_0.Args[0]
-		idx := v_1
-		mem := v_2
-		v.reset(Op386MOVSDloadidx8)
-		v.AuxInt = int64(int32(c + d))
-		v.Aux = sym
-		v.AddArg3(ptr, idx, mem)
-		return true
-	}
-	// match: (MOVSDloadidx8 [c] {sym} ptr (ADDLconst [d] idx) mem)
-	// result: (MOVSDloadidx8 [int64(int32(c+8*d))] {sym} ptr idx mem)
-	for {
-		c := v.AuxInt
-		sym := v.Aux
-		ptr := v_0
-		if v_1.Op != Op386ADDLconst {
-			break
-		}
-		d := v_1.AuxInt
-		idx := v_1.Args[0]
-		mem := v_2
-		v.reset(Op386MOVSDloadidx8)
-		v.AuxInt = int64(int32(c + 8*d))
-		v.Aux = sym
-		v.AddArg3(ptr, idx, mem)
-		return true
-	}
 	return false
 }
 func rewriteValue386_Op386MOVSDstore(v *Value) bool {
@@ -6844,172 +5157,6 @@
 		v.AddArg3(base, val, mem)
 		return true
 	}
-	// match: (MOVSDstore [off1] {sym1} (LEAL1 [off2] {sym2} ptr idx) val mem)
-	// cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-	// result: (MOVSDstoreidx1 [off1+off2] {mergeSym(sym1,sym2)} ptr idx val mem)
-	for {
-		off1 := v.AuxInt
-		sym1 := v.Aux
-		if v_0.Op != Op386LEAL1 {
-			break
-		}
-		off2 := v_0.AuxInt
-		sym2 := v_0.Aux
-		idx := v_0.Args[1]
-		ptr := v_0.Args[0]
-		val := v_1
-		mem := v_2
-		if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
-			break
-		}
-		v.reset(Op386MOVSDstoreidx1)
-		v.AuxInt = off1 + off2
-		v.Aux = mergeSym(sym1, sym2)
-		v.AddArg4(ptr, idx, val, mem)
-		return true
-	}
-	// match: (MOVSDstore [off1] {sym1} (LEAL8 [off2] {sym2} ptr idx) val mem)
-	// cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-	// result: (MOVSDstoreidx8 [off1+off2] {mergeSym(sym1,sym2)} ptr idx val mem)
-	for {
-		off1 := v.AuxInt
-		sym1 := v.Aux
-		if v_0.Op != Op386LEAL8 {
-			break
-		}
-		off2 := v_0.AuxInt
-		sym2 := v_0.Aux
-		idx := v_0.Args[1]
-		ptr := v_0.Args[0]
-		val := v_1
-		mem := v_2
-		if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
-			break
-		}
-		v.reset(Op386MOVSDstoreidx8)
-		v.AuxInt = off1 + off2
-		v.Aux = mergeSym(sym1, sym2)
-		v.AddArg4(ptr, idx, val, mem)
-		return true
-	}
-	// match: (MOVSDstore [off] {sym} (ADDL ptr idx) val mem)
-	// cond: ptr.Op != OpSB
-	// result: (MOVSDstoreidx1 [off] {sym} ptr idx val mem)
-	for {
-		off := v.AuxInt
-		sym := v.Aux
-		if v_0.Op != Op386ADDL {
-			break
-		}
-		_ = v_0.Args[1]
-		v_0_0 := v_0.Args[0]
-		v_0_1 := v_0.Args[1]
-		for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 {
-			ptr := v_0_0
-			idx := v_0_1
-			val := v_1
-			mem := v_2
-			if !(ptr.Op != OpSB) {
-				continue
-			}
-			v.reset(Op386MOVSDstoreidx1)
-			v.AuxInt = off
-			v.Aux = sym
-			v.AddArg4(ptr, idx, val, mem)
-			return true
-		}
-		break
-	}
-	return false
-}
-func rewriteValue386_Op386MOVSDstoreidx1(v *Value) bool {
-	v_3 := v.Args[3]
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	// match: (MOVSDstoreidx1 [c] {sym} (ADDLconst [d] ptr) idx val mem)
-	// result: (MOVSDstoreidx1 [int64(int32(c+d))] {sym} ptr idx val mem)
-	for {
-		c := v.AuxInt
-		sym := v.Aux
-		if v_0.Op != Op386ADDLconst {
-			break
-		}
-		d := v_0.AuxInt
-		ptr := v_0.Args[0]
-		idx := v_1
-		val := v_2
-		mem := v_3
-		v.reset(Op386MOVSDstoreidx1)
-		v.AuxInt = int64(int32(c + d))
-		v.Aux = sym
-		v.AddArg4(ptr, idx, val, mem)
-		return true
-	}
-	// match: (MOVSDstoreidx1 [c] {sym} ptr (ADDLconst [d] idx) val mem)
-	// result: (MOVSDstoreidx1 [int64(int32(c+d))] {sym} ptr idx val mem)
-	for {
-		c := v.AuxInt
-		sym := v.Aux
-		ptr := v_0
-		if v_1.Op != Op386ADDLconst {
-			break
-		}
-		d := v_1.AuxInt
-		idx := v_1.Args[0]
-		val := v_2
-		mem := v_3
-		v.reset(Op386MOVSDstoreidx1)
-		v.AuxInt = int64(int32(c + d))
-		v.Aux = sym
-		v.AddArg4(ptr, idx, val, mem)
-		return true
-	}
-	return false
-}
-func rewriteValue386_Op386MOVSDstoreidx8(v *Value) bool {
-	v_3 := v.Args[3]
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	// match: (MOVSDstoreidx8 [c] {sym} (ADDLconst [d] ptr) idx val mem)
-	// result: (MOVSDstoreidx8 [int64(int32(c+d))] {sym} ptr idx val mem)
-	for {
-		c := v.AuxInt
-		sym := v.Aux
-		if v_0.Op != Op386ADDLconst {
-			break
-		}
-		d := v_0.AuxInt
-		ptr := v_0.Args[0]
-		idx := v_1
-		val := v_2
-		mem := v_3
-		v.reset(Op386MOVSDstoreidx8)
-		v.AuxInt = int64(int32(c + d))
-		v.Aux = sym
-		v.AddArg4(ptr, idx, val, mem)
-		return true
-	}
-	// match: (MOVSDstoreidx8 [c] {sym} ptr (ADDLconst [d] idx) val mem)
-	// result: (MOVSDstoreidx8 [int64(int32(c+8*d))] {sym} ptr idx val mem)
-	for {
-		c := v.AuxInt
-		sym := v.Aux
-		ptr := v_0
-		if v_1.Op != Op386ADDLconst {
-			break
-		}
-		d := v_1.AuxInt
-		idx := v_1.Args[0]
-		val := v_2
-		mem := v_3
-		v.reset(Op386MOVSDstoreidx8)
-		v.AuxInt = int64(int32(c + 8*d))
-		v.Aux = sym
-		v.AddArg4(ptr, idx, val, mem)
-		return true
-	}
 	return false
 }
 func rewriteValue386_Op386MOVSSconst(v *Value) bool {
@@ -7080,163 +5227,6 @@
 		v.AddArg2(base, mem)
 		return true
 	}
-	// match: (MOVSSload [off1] {sym1} (LEAL1 [off2] {sym2} ptr idx) mem)
-	// cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-	// result: (MOVSSloadidx1 [off1+off2] {mergeSym(sym1,sym2)} ptr idx mem)
-	for {
-		off1 := v.AuxInt
-		sym1 := v.Aux
-		if v_0.Op != Op386LEAL1 {
-			break
-		}
-		off2 := v_0.AuxInt
-		sym2 := v_0.Aux
-		idx := v_0.Args[1]
-		ptr := v_0.Args[0]
-		mem := v_1
-		if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
-			break
-		}
-		v.reset(Op386MOVSSloadidx1)
-		v.AuxInt = off1 + off2
-		v.Aux = mergeSym(sym1, sym2)
-		v.AddArg3(ptr, idx, mem)
-		return true
-	}
-	// match: (MOVSSload [off1] {sym1} (LEAL4 [off2] {sym2} ptr idx) mem)
-	// cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-	// result: (MOVSSloadidx4 [off1+off2] {mergeSym(sym1,sym2)} ptr idx mem)
-	for {
-		off1 := v.AuxInt
-		sym1 := v.Aux
-		if v_0.Op != Op386LEAL4 {
-			break
-		}
-		off2 := v_0.AuxInt
-		sym2 := v_0.Aux
-		idx := v_0.Args[1]
-		ptr := v_0.Args[0]
-		mem := v_1
-		if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
-			break
-		}
-		v.reset(Op386MOVSSloadidx4)
-		v.AuxInt = off1 + off2
-		v.Aux = mergeSym(sym1, sym2)
-		v.AddArg3(ptr, idx, mem)
-		return true
-	}
-	// match: (MOVSSload [off] {sym} (ADDL ptr idx) mem)
-	// cond: ptr.Op != OpSB
-	// result: (MOVSSloadidx1 [off] {sym} ptr idx mem)
-	for {
-		off := v.AuxInt
-		sym := v.Aux
-		if v_0.Op != Op386ADDL {
-			break
-		}
-		_ = v_0.Args[1]
-		v_0_0 := v_0.Args[0]
-		v_0_1 := v_0.Args[1]
-		for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 {
-			ptr := v_0_0
-			idx := v_0_1
-			mem := v_1
-			if !(ptr.Op != OpSB) {
-				continue
-			}
-			v.reset(Op386MOVSSloadidx1)
-			v.AuxInt = off
-			v.Aux = sym
-			v.AddArg3(ptr, idx, mem)
-			return true
-		}
-		break
-	}
-	return false
-}
-func rewriteValue386_Op386MOVSSloadidx1(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	// match: (MOVSSloadidx1 [c] {sym} (ADDLconst [d] ptr) idx mem)
-	// result: (MOVSSloadidx1 [int64(int32(c+d))] {sym} ptr idx mem)
-	for {
-		c := v.AuxInt
-		sym := v.Aux
-		if v_0.Op != Op386ADDLconst {
-			break
-		}
-		d := v_0.AuxInt
-		ptr := v_0.Args[0]
-		idx := v_1
-		mem := v_2
-		v.reset(Op386MOVSSloadidx1)
-		v.AuxInt = int64(int32(c + d))
-		v.Aux = sym
-		v.AddArg3(ptr, idx, mem)
-		return true
-	}
-	// match: (MOVSSloadidx1 [c] {sym} ptr (ADDLconst [d] idx) mem)
-	// result: (MOVSSloadidx1 [int64(int32(c+d))] {sym} ptr idx mem)
-	for {
-		c := v.AuxInt
-		sym := v.Aux
-		ptr := v_0
-		if v_1.Op != Op386ADDLconst {
-			break
-		}
-		d := v_1.AuxInt
-		idx := v_1.Args[0]
-		mem := v_2
-		v.reset(Op386MOVSSloadidx1)
-		v.AuxInt = int64(int32(c + d))
-		v.Aux = sym
-		v.AddArg3(ptr, idx, mem)
-		return true
-	}
-	return false
-}
-func rewriteValue386_Op386MOVSSloadidx4(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	// match: (MOVSSloadidx4 [c] {sym} (ADDLconst [d] ptr) idx mem)
-	// result: (MOVSSloadidx4 [int64(int32(c+d))] {sym} ptr idx mem)
-	for {
-		c := v.AuxInt
-		sym := v.Aux
-		if v_0.Op != Op386ADDLconst {
-			break
-		}
-		d := v_0.AuxInt
-		ptr := v_0.Args[0]
-		idx := v_1
-		mem := v_2
-		v.reset(Op386MOVSSloadidx4)
-		v.AuxInt = int64(int32(c + d))
-		v.Aux = sym
-		v.AddArg3(ptr, idx, mem)
-		return true
-	}
-	// match: (MOVSSloadidx4 [c] {sym} ptr (ADDLconst [d] idx) mem)
-	// result: (MOVSSloadidx4 [int64(int32(c+4*d))] {sym} ptr idx mem)
-	for {
-		c := v.AuxInt
-		sym := v.Aux
-		ptr := v_0
-		if v_1.Op != Op386ADDLconst {
-			break
-		}
-		d := v_1.AuxInt
-		idx := v_1.Args[0]
-		mem := v_2
-		v.reset(Op386MOVSSloadidx4)
-		v.AuxInt = int64(int32(c + 4*d))
-		v.Aux = sym
-		v.AddArg3(ptr, idx, mem)
-		return true
-	}
 	return false
 }
 func rewriteValue386_Op386MOVSSstore(v *Value) bool {
@@ -7290,172 +5280,6 @@
 		v.AddArg3(base, val, mem)
 		return true
 	}
-	// match: (MOVSSstore [off1] {sym1} (LEAL1 [off2] {sym2} ptr idx) val mem)
-	// cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-	// result: (MOVSSstoreidx1 [off1+off2] {mergeSym(sym1,sym2)} ptr idx val mem)
-	for {
-		off1 := v.AuxInt
-		sym1 := v.Aux
-		if v_0.Op != Op386LEAL1 {
-			break
-		}
-		off2 := v_0.AuxInt
-		sym2 := v_0.Aux
-		idx := v_0.Args[1]
-		ptr := v_0.Args[0]
-		val := v_1
-		mem := v_2
-		if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
-			break
-		}
-		v.reset(Op386MOVSSstoreidx1)
-		v.AuxInt = off1 + off2
-		v.Aux = mergeSym(sym1, sym2)
-		v.AddArg4(ptr, idx, val, mem)
-		return true
-	}
-	// match: (MOVSSstore [off1] {sym1} (LEAL4 [off2] {sym2} ptr idx) val mem)
-	// cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-	// result: (MOVSSstoreidx4 [off1+off2] {mergeSym(sym1,sym2)} ptr idx val mem)
-	for {
-		off1 := v.AuxInt
-		sym1 := v.Aux
-		if v_0.Op != Op386LEAL4 {
-			break
-		}
-		off2 := v_0.AuxInt
-		sym2 := v_0.Aux
-		idx := v_0.Args[1]
-		ptr := v_0.Args[0]
-		val := v_1
-		mem := v_2
-		if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
-			break
-		}
-		v.reset(Op386MOVSSstoreidx4)
-		v.AuxInt = off1 + off2
-		v.Aux = mergeSym(sym1, sym2)
-		v.AddArg4(ptr, idx, val, mem)
-		return true
-	}
-	// match: (MOVSSstore [off] {sym} (ADDL ptr idx) val mem)
-	// cond: ptr.Op != OpSB
-	// result: (MOVSSstoreidx1 [off] {sym} ptr idx val mem)
-	for {
-		off := v.AuxInt
-		sym := v.Aux
-		if v_0.Op != Op386ADDL {
-			break
-		}
-		_ = v_0.Args[1]
-		v_0_0 := v_0.Args[0]
-		v_0_1 := v_0.Args[1]
-		for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 {
-			ptr := v_0_0
-			idx := v_0_1
-			val := v_1
-			mem := v_2
-			if !(ptr.Op != OpSB) {
-				continue
-			}
-			v.reset(Op386MOVSSstoreidx1)
-			v.AuxInt = off
-			v.Aux = sym
-			v.AddArg4(ptr, idx, val, mem)
-			return true
-		}
-		break
-	}
-	return false
-}
-func rewriteValue386_Op386MOVSSstoreidx1(v *Value) bool {
-	v_3 := v.Args[3]
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	// match: (MOVSSstoreidx1 [c] {sym} (ADDLconst [d] ptr) idx val mem)
-	// result: (MOVSSstoreidx1 [int64(int32(c+d))] {sym} ptr idx val mem)
-	for {
-		c := v.AuxInt
-		sym := v.Aux
-		if v_0.Op != Op386ADDLconst {
-			break
-		}
-		d := v_0.AuxInt
-		ptr := v_0.Args[0]
-		idx := v_1
-		val := v_2
-		mem := v_3
-		v.reset(Op386MOVSSstoreidx1)
-		v.AuxInt = int64(int32(c + d))
-		v.Aux = sym
-		v.AddArg4(ptr, idx, val, mem)
-		return true
-	}
-	// match: (MOVSSstoreidx1 [c] {sym} ptr (ADDLconst [d] idx) val mem)
-	// result: (MOVSSstoreidx1 [int64(int32(c+d))] {sym} ptr idx val mem)
-	for {
-		c := v.AuxInt
-		sym := v.Aux
-		ptr := v_0
-		if v_1.Op != Op386ADDLconst {
-			break
-		}
-		d := v_1.AuxInt
-		idx := v_1.Args[0]
-		val := v_2
-		mem := v_3
-		v.reset(Op386MOVSSstoreidx1)
-		v.AuxInt = int64(int32(c + d))
-		v.Aux = sym
-		v.AddArg4(ptr, idx, val, mem)
-		return true
-	}
-	return false
-}
-func rewriteValue386_Op386MOVSSstoreidx4(v *Value) bool {
-	v_3 := v.Args[3]
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	// match: (MOVSSstoreidx4 [c] {sym} (ADDLconst [d] ptr) idx val mem)
-	// result: (MOVSSstoreidx4 [int64(int32(c+d))] {sym} ptr idx val mem)
-	for {
-		c := v.AuxInt
-		sym := v.Aux
-		if v_0.Op != Op386ADDLconst {
-			break
-		}
-		d := v_0.AuxInt
-		ptr := v_0.Args[0]
-		idx := v_1
-		val := v_2
-		mem := v_3
-		v.reset(Op386MOVSSstoreidx4)
-		v.AuxInt = int64(int32(c + d))
-		v.Aux = sym
-		v.AddArg4(ptr, idx, val, mem)
-		return true
-	}
-	// match: (MOVSSstoreidx4 [c] {sym} ptr (ADDLconst [d] idx) val mem)
-	// result: (MOVSSstoreidx4 [int64(int32(c+4*d))] {sym} ptr idx val mem)
-	for {
-		c := v.AuxInt
-		sym := v.Aux
-		ptr := v_0
-		if v_1.Op != Op386ADDLconst {
-			break
-		}
-		d := v_1.AuxInt
-		idx := v_1.Args[0]
-		val := v_2
-		mem := v_3
-		v.reset(Op386MOVSSstoreidx4)
-		v.AuxInt = int64(int32(c + 4*d))
-		v.Aux = sym
-		v.AddArg4(ptr, idx, val, mem)
-		return true
-	}
 	return false
 }
 func rewriteValue386_Op386MOVWLSX(v *Value) bool {
@@ -7579,54 +5403,6 @@
 		v0.AddArg2(ptr, mem)
 		return true
 	}
-	// match: (MOVWLZX x:(MOVWloadidx1 [off] {sym} ptr idx mem))
-	// cond: x.Uses == 1 && clobber(x)
-	// result: @x.Block (MOVWloadidx1 <v.Type> [off] {sym} ptr idx mem)
-	for {
-		x := v_0
-		if x.Op != Op386MOVWloadidx1 {
-			break
-		}
-		off := x.AuxInt
-		sym := x.Aux
-		mem := x.Args[2]
-		ptr := x.Args[0]
-		idx := x.Args[1]
-		if !(x.Uses == 1 && clobber(x)) {
-			break
-		}
-		b = x.Block
-		v0 := b.NewValue0(v.Pos, Op386MOVWloadidx1, v.Type)
-		v.copyOf(v0)
-		v0.AuxInt = off
-		v0.Aux = sym
-		v0.AddArg3(ptr, idx, mem)
-		return true
-	}
-	// match: (MOVWLZX x:(MOVWloadidx2 [off] {sym} ptr idx mem))
-	// cond: x.Uses == 1 && clobber(x)
-	// result: @x.Block (MOVWloadidx2 <v.Type> [off] {sym} ptr idx mem)
-	for {
-		x := v_0
-		if x.Op != Op386MOVWloadidx2 {
-			break
-		}
-		off := x.AuxInt
-		sym := x.Aux
-		mem := x.Args[2]
-		ptr := x.Args[0]
-		idx := x.Args[1]
-		if !(x.Uses == 1 && clobber(x)) {
-			break
-		}
-		b = x.Block
-		v0 := b.NewValue0(v.Pos, Op386MOVWloadidx2, v.Type)
-		v.copyOf(v0)
-		v0.AuxInt = off
-		v0.Aux = sym
-		v0.AddArg3(ptr, idx, mem)
-		return true
-	}
 	// match: (MOVWLZX (ANDLconst [c] x))
 	// result: (ANDLconst [c & 0xffff] x)
 	for {
@@ -7711,78 +5487,117 @@
 		v.AddArg2(base, mem)
 		return true
 	}
-	// match: (MOVWload [off1] {sym1} (LEAL1 [off2] {sym2} ptr idx) mem)
-	// cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-	// result: (MOVWloadidx1 [off1+off2] {mergeSym(sym1,sym2)} ptr idx mem)
+	// match: (MOVWload [i0] {s0} l:(LEAL1 [i1] {s1} x y) mem)
+	// cond: i1 != 0 && is32Bit(i0+i1)
+	// result: (MOVWload [i0+i1] {s0} (LEAL1 <l.Type> [0] {s1} x y) mem)
 	for {
-		off1 := v.AuxInt
-		sym1 := v.Aux
-		if v_0.Op != Op386LEAL1 {
+		i0 := v.AuxInt
+		s0 := v.Aux
+		l := v_0
+		if l.Op != Op386LEAL1 {
 			break
 		}
-		off2 := v_0.AuxInt
-		sym2 := v_0.Aux
-		idx := v_0.Args[1]
-		ptr := v_0.Args[0]
+		i1 := l.AuxInt
+		s1 := l.Aux
+		y := l.Args[1]
+		x := l.Args[0]
 		mem := v_1
-		if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+		if !(i1 != 0 && is32Bit(i0+i1)) {
 			break
 		}
-		v.reset(Op386MOVWloadidx1)
-		v.AuxInt = off1 + off2
-		v.Aux = mergeSym(sym1, sym2)
-		v.AddArg3(ptr, idx, mem)
+		v.reset(Op386MOVWload)
+		v.AuxInt = i0 + i1
+		v.Aux = s0
+		v0 := b.NewValue0(v.Pos, Op386LEAL1, l.Type)
+		v0.AuxInt = 0
+		v0.Aux = s1
+		v0.AddArg2(x, y)
+		v.AddArg2(v0, mem)
 		return true
 	}
-	// match: (MOVWload [off1] {sym1} (LEAL2 [off2] {sym2} ptr idx) mem)
-	// cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-	// result: (MOVWloadidx2 [off1+off2] {mergeSym(sym1,sym2)} ptr idx mem)
+	// match: (MOVWload [i0] {s0} l:(LEAL2 [i1] {s1} x y) mem)
+	// cond: i1 != 0 && is32Bit(i0+i1)
+	// result: (MOVWload [i0+i1] {s0} (LEAL2 <l.Type> [0] {s1} x y) mem)
 	for {
-		off1 := v.AuxInt
-		sym1 := v.Aux
-		if v_0.Op != Op386LEAL2 {
+		i0 := v.AuxInt
+		s0 := v.Aux
+		l := v_0
+		if l.Op != Op386LEAL2 {
 			break
 		}
-		off2 := v_0.AuxInt
-		sym2 := v_0.Aux
-		idx := v_0.Args[1]
-		ptr := v_0.Args[0]
+		i1 := l.AuxInt
+		s1 := l.Aux
+		y := l.Args[1]
+		x := l.Args[0]
 		mem := v_1
-		if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+		if !(i1 != 0 && is32Bit(i0+i1)) {
 			break
 		}
-		v.reset(Op386MOVWloadidx2)
-		v.AuxInt = off1 + off2
-		v.Aux = mergeSym(sym1, sym2)
-		v.AddArg3(ptr, idx, mem)
+		v.reset(Op386MOVWload)
+		v.AuxInt = i0 + i1
+		v.Aux = s0
+		v0 := b.NewValue0(v.Pos, Op386LEAL2, l.Type)
+		v0.AuxInt = 0
+		v0.Aux = s1
+		v0.AddArg2(x, y)
+		v.AddArg2(v0, mem)
 		return true
 	}
-	// match: (MOVWload [off] {sym} (ADDL ptr idx) mem)
-	// cond: ptr.Op != OpSB
-	// result: (MOVWloadidx1 [off] {sym} ptr idx mem)
+	// match: (MOVWload [i0] {s0} l:(LEAL4 [i1] {s1} x y) mem)
+	// cond: i1 != 0 && is32Bit(i0+i1)
+	// result: (MOVWload [i0+i1] {s0} (LEAL4 <l.Type> [0] {s1} x y) mem)
 	for {
-		off := v.AuxInt
-		sym := v.Aux
-		if v_0.Op != Op386ADDL {
+		i0 := v.AuxInt
+		s0 := v.Aux
+		l := v_0
+		if l.Op != Op386LEAL4 {
 			break
 		}
-		_ = v_0.Args[1]
-		v_0_0 := v_0.Args[0]
-		v_0_1 := v_0.Args[1]
-		for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 {
-			ptr := v_0_0
-			idx := v_0_1
-			mem := v_1
-			if !(ptr.Op != OpSB) {
-				continue
-			}
-			v.reset(Op386MOVWloadidx1)
-			v.AuxInt = off
-			v.Aux = sym
-			v.AddArg3(ptr, idx, mem)
-			return true
+		i1 := l.AuxInt
+		s1 := l.Aux
+		y := l.Args[1]
+		x := l.Args[0]
+		mem := v_1
+		if !(i1 != 0 && is32Bit(i0+i1)) {
+			break
 		}
-		break
+		v.reset(Op386MOVWload)
+		v.AuxInt = i0 + i1
+		v.Aux = s0
+		v0 := b.NewValue0(v.Pos, Op386LEAL4, l.Type)
+		v0.AuxInt = 0
+		v0.Aux = s1
+		v0.AddArg2(x, y)
+		v.AddArg2(v0, mem)
+		return true
+	}
+	// match: (MOVWload [i0] {s0} l:(LEAL8 [i1] {s1} x y) mem)
+	// cond: i1 != 0 && is32Bit(i0+i1)
+	// result: (MOVWload [i0+i1] {s0} (LEAL8 <l.Type> [0] {s1} x y) mem)
+	for {
+		i0 := v.AuxInt
+		s0 := v.Aux
+		l := v_0
+		if l.Op != Op386LEAL8 {
+			break
+		}
+		i1 := l.AuxInt
+		s1 := l.Aux
+		y := l.Args[1]
+		x := l.Args[0]
+		mem := v_1
+		if !(i1 != 0 && is32Bit(i0+i1)) {
+			break
+		}
+		v.reset(Op386MOVWload)
+		v.AuxInt = i0 + i1
+		v.Aux = s0
+		v0 := b.NewValue0(v.Pos, Op386LEAL8, l.Type)
+		v0.AuxInt = 0
+		v0.Aux = s1
+		v0.AddArg2(x, y)
+		v.AddArg2(v0, mem)
+		return true
 	}
 	// match: (MOVWload [off] {sym} (SB) _)
 	// cond: symIsRO(sym)
@@ -7799,116 +5614,6 @@
 	}
 	return false
 }
-func rewriteValue386_Op386MOVWloadidx1(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	// match: (MOVWloadidx1 [c] {sym} ptr (SHLLconst [1] idx) mem)
-	// result: (MOVWloadidx2 [c] {sym} ptr idx mem)
-	for {
-		c := v.AuxInt
-		sym := v.Aux
-		for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-			ptr := v_0
-			if v_1.Op != Op386SHLLconst || v_1.AuxInt != 1 {
-				continue
-			}
-			idx := v_1.Args[0]
-			mem := v_2
-			v.reset(Op386MOVWloadidx2)
-			v.AuxInt = c
-			v.Aux = sym
-			v.AddArg3(ptr, idx, mem)
-			return true
-		}
-		break
-	}
-	// match: (MOVWloadidx1 [c] {sym} (ADDLconst [d] ptr) idx mem)
-	// result: (MOVWloadidx1 [int64(int32(c+d))] {sym} ptr idx mem)
-	for {
-		c := v.AuxInt
-		sym := v.Aux
-		for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-			if v_0.Op != Op386ADDLconst {
-				continue
-			}
-			d := v_0.AuxInt
-			ptr := v_0.Args[0]
-			idx := v_1
-			mem := v_2
-			v.reset(Op386MOVWloadidx1)
-			v.AuxInt = int64(int32(c + d))
-			v.Aux = sym
-			v.AddArg3(ptr, idx, mem)
-			return true
-		}
-		break
-	}
-	// match: (MOVWloadidx1 [c] {sym} ptr (ADDLconst [d] idx) mem)
-	// result: (MOVWloadidx1 [int64(int32(c+d))] {sym} ptr idx mem)
-	for {
-		c := v.AuxInt
-		sym := v.Aux
-		for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-			ptr := v_0
-			if v_1.Op != Op386ADDLconst {
-				continue
-			}
-			d := v_1.AuxInt
-			idx := v_1.Args[0]
-			mem := v_2
-			v.reset(Op386MOVWloadidx1)
-			v.AuxInt = int64(int32(c + d))
-			v.Aux = sym
-			v.AddArg3(ptr, idx, mem)
-			return true
-		}
-		break
-	}
-	return false
-}
-func rewriteValue386_Op386MOVWloadidx2(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	// match: (MOVWloadidx2 [c] {sym} (ADDLconst [d] ptr) idx mem)
-	// result: (MOVWloadidx2 [int64(int32(c+d))] {sym} ptr idx mem)
-	for {
-		c := v.AuxInt
-		sym := v.Aux
-		if v_0.Op != Op386ADDLconst {
-			break
-		}
-		d := v_0.AuxInt
-		ptr := v_0.Args[0]
-		idx := v_1
-		mem := v_2
-		v.reset(Op386MOVWloadidx2)
-		v.AuxInt = int64(int32(c + d))
-		v.Aux = sym
-		v.AddArg3(ptr, idx, mem)
-		return true
-	}
-	// match: (MOVWloadidx2 [c] {sym} ptr (ADDLconst [d] idx) mem)
-	// result: (MOVWloadidx2 [int64(int32(c+2*d))] {sym} ptr idx mem)
-	for {
-		c := v.AuxInt
-		sym := v.Aux
-		ptr := v_0
-		if v_1.Op != Op386ADDLconst {
-			break
-		}
-		d := v_1.AuxInt
-		idx := v_1.Args[0]
-		mem := v_2
-		v.reset(Op386MOVWloadidx2)
-		v.AuxInt = int64(int32(c + 2*d))
-		v.Aux = sym
-		v.AddArg3(ptr, idx, mem)
-		return true
-	}
-	return false
-}
 func rewriteValue386_Op386MOVWstore(v *Value) bool {
 	v_2 := v.Args[2]
 	v_1 := v.Args[1]
@@ -8015,89 +5720,13 @@
 		v.AddArg3(base, val, mem)
 		return true
 	}
-	// match: (MOVWstore [off1] {sym1} (LEAL1 [off2] {sym2} ptr idx) val mem)
-	// cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-	// result: (MOVWstoreidx1 [off1+off2] {mergeSym(sym1,sym2)} ptr idx val mem)
-	for {
-		off1 := v.AuxInt
-		sym1 := v.Aux
-		if v_0.Op != Op386LEAL1 {
-			break
-		}
-		off2 := v_0.AuxInt
-		sym2 := v_0.Aux
-		idx := v_0.Args[1]
-		ptr := v_0.Args[0]
-		val := v_1
-		mem := v_2
-		if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
-			break
-		}
-		v.reset(Op386MOVWstoreidx1)
-		v.AuxInt = off1 + off2
-		v.Aux = mergeSym(sym1, sym2)
-		v.AddArg4(ptr, idx, val, mem)
-		return true
-	}
-	// match: (MOVWstore [off1] {sym1} (LEAL2 [off2] {sym2} ptr idx) val mem)
-	// cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-	// result: (MOVWstoreidx2 [off1+off2] {mergeSym(sym1,sym2)} ptr idx val mem)
-	for {
-		off1 := v.AuxInt
-		sym1 := v.Aux
-		if v_0.Op != Op386LEAL2 {
-			break
-		}
-		off2 := v_0.AuxInt
-		sym2 := v_0.Aux
-		idx := v_0.Args[1]
-		ptr := v_0.Args[0]
-		val := v_1
-		mem := v_2
-		if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
-			break
-		}
-		v.reset(Op386MOVWstoreidx2)
-		v.AuxInt = off1 + off2
-		v.Aux = mergeSym(sym1, sym2)
-		v.AddArg4(ptr, idx, val, mem)
-		return true
-	}
-	// match: (MOVWstore [off] {sym} (ADDL ptr idx) val mem)
-	// cond: ptr.Op != OpSB
-	// result: (MOVWstoreidx1 [off] {sym} ptr idx val mem)
-	for {
-		off := v.AuxInt
-		sym := v.Aux
-		if v_0.Op != Op386ADDL {
-			break
-		}
-		_ = v_0.Args[1]
-		v_0_0 := v_0.Args[0]
-		v_0_1 := v_0.Args[1]
-		for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 {
-			ptr := v_0_0
-			idx := v_0_1
-			val := v_1
-			mem := v_2
-			if !(ptr.Op != OpSB) {
-				continue
-			}
-			v.reset(Op386MOVWstoreidx1)
-			v.AuxInt = off
-			v.Aux = sym
-			v.AddArg4(ptr, idx, val, mem)
-			return true
-		}
-		break
-	}
-	// match: (MOVWstore [i] {s} p (SHRLconst [16] w) x:(MOVWstore [i-2] {s} p w mem))
-	// cond: x.Uses == 1 && clobber(x)
-	// result: (MOVLstore [i-2] {s} p w mem)
+	// match: (MOVWstore [i] {s} p1 (SHRLconst [16] w) x:(MOVWstore [i-2] {s} p0 w mem))
+	// cond: x.Uses == 1 && same(p0, p1, 1) && clobber(x)
+	// result: (MOVLstore [i-2] {s} p0 w mem)
 	for {
 		i := v.AuxInt
 		s := v.Aux
-		p := v_0
+		p1 := v_0
 		if v_1.Op != Op386SHRLconst || v_1.AuxInt != 16 {
 			break
 		}
@@ -8107,22 +5736,23 @@
 			break
 		}
 		mem := x.Args[2]
-		if p != x.Args[0] || w != x.Args[1] || !(x.Uses == 1 && clobber(x)) {
+		p0 := x.Args[0]
+		if w != x.Args[1] || !(x.Uses == 1 && same(p0, p1, 1) && clobber(x)) {
 			break
 		}
 		v.reset(Op386MOVLstore)
 		v.AuxInt = i - 2
 		v.Aux = s
-		v.AddArg3(p, w, mem)
+		v.AddArg3(p0, w, mem)
 		return true
 	}
-	// match: (MOVWstore [i] {s} p (SHRLconst [j] w) x:(MOVWstore [i-2] {s} p w0:(SHRLconst [j-16] w) mem))
-	// cond: x.Uses == 1 && clobber(x)
-	// result: (MOVLstore [i-2] {s} p w0 mem)
+	// match: (MOVWstore [i] {s} p1 (SHRLconst [j] w) x:(MOVWstore [i-2] {s} p0 w0:(SHRLconst [j-16] w) mem))
+	// cond: x.Uses == 1 && same(p0, p1, 1) && clobber(x)
+	// result: (MOVLstore [i-2] {s} p0 w0 mem)
 	for {
 		i := v.AuxInt
 		s := v.Aux
-		p := v_0
+		p1 := v_0
 		if v_1.Op != Op386SHRLconst {
 			break
 		}
@@ -8133,17 +5763,131 @@
 			break
 		}
 		mem := x.Args[2]
-		if p != x.Args[0] {
-			break
-		}
+		p0 := x.Args[0]
 		w0 := x.Args[1]
-		if w0.Op != Op386SHRLconst || w0.AuxInt != j-16 || w != w0.Args[0] || !(x.Uses == 1 && clobber(x)) {
+		if w0.Op != Op386SHRLconst || w0.AuxInt != j-16 || w != w0.Args[0] || !(x.Uses == 1 && same(p0, p1, 1) && clobber(x)) {
 			break
 		}
 		v.reset(Op386MOVLstore)
 		v.AuxInt = i - 2
 		v.Aux = s
-		v.AddArg3(p, w0, mem)
+		v.AddArg3(p0, w0, mem)
+		return true
+	}
+	// match: (MOVWstore [i0] {s0} l:(LEAL1 [i1] {s1} x y) val mem)
+	// cond: i1 != 0 && is32Bit(i0+i1)
+	// result: (MOVWstore [i0+i1] {s0} (LEAL1 <l.Type> [0] {s1} x y) val mem)
+	for {
+		i0 := v.AuxInt
+		s0 := v.Aux
+		l := v_0
+		if l.Op != Op386LEAL1 {
+			break
+		}
+		i1 := l.AuxInt
+		s1 := l.Aux
+		y := l.Args[1]
+		x := l.Args[0]
+		val := v_1
+		mem := v_2
+		if !(i1 != 0 && is32Bit(i0+i1)) {
+			break
+		}
+		v.reset(Op386MOVWstore)
+		v.AuxInt = i0 + i1
+		v.Aux = s0
+		v0 := b.NewValue0(v.Pos, Op386LEAL1, l.Type)
+		v0.AuxInt = 0
+		v0.Aux = s1
+		v0.AddArg2(x, y)
+		v.AddArg3(v0, val, mem)
+		return true
+	}
+	// match: (MOVWstore [i0] {s0} l:(LEAL2 [i1] {s1} x y) val mem)
+	// cond: i1 != 0 && is32Bit(i0+i1)
+	// result: (MOVWstore [i0+i1] {s0} (LEAL2 <l.Type> [0] {s1} x y) val mem)
+	for {
+		i0 := v.AuxInt
+		s0 := v.Aux
+		l := v_0
+		if l.Op != Op386LEAL2 {
+			break
+		}
+		i1 := l.AuxInt
+		s1 := l.Aux
+		y := l.Args[1]
+		x := l.Args[0]
+		val := v_1
+		mem := v_2
+		if !(i1 != 0 && is32Bit(i0+i1)) {
+			break
+		}
+		v.reset(Op386MOVWstore)
+		v.AuxInt = i0 + i1
+		v.Aux = s0
+		v0 := b.NewValue0(v.Pos, Op386LEAL2, l.Type)
+		v0.AuxInt = 0
+		v0.Aux = s1
+		v0.AddArg2(x, y)
+		v.AddArg3(v0, val, mem)
+		return true
+	}
+	// match: (MOVWstore [i0] {s0} l:(LEAL4 [i1] {s1} x y) val mem)
+	// cond: i1 != 0 && is32Bit(i0+i1)
+	// result: (MOVWstore [i0+i1] {s0} (LEAL4 <l.Type> [0] {s1} x y) val mem)
+	for {
+		i0 := v.AuxInt
+		s0 := v.Aux
+		l := v_0
+		if l.Op != Op386LEAL4 {
+			break
+		}
+		i1 := l.AuxInt
+		s1 := l.Aux
+		y := l.Args[1]
+		x := l.Args[0]
+		val := v_1
+		mem := v_2
+		if !(i1 != 0 && is32Bit(i0+i1)) {
+			break
+		}
+		v.reset(Op386MOVWstore)
+		v.AuxInt = i0 + i1
+		v.Aux = s0
+		v0 := b.NewValue0(v.Pos, Op386LEAL4, l.Type)
+		v0.AuxInt = 0
+		v0.Aux = s1
+		v0.AddArg2(x, y)
+		v.AddArg3(v0, val, mem)
+		return true
+	}
+	// match: (MOVWstore [i0] {s0} l:(LEAL8 [i1] {s1} x y) val mem)
+	// cond: i1 != 0 && is32Bit(i0+i1)
+	// result: (MOVWstore [i0+i1] {s0} (LEAL8 <l.Type> [0] {s1} x y) val mem)
+	for {
+		i0 := v.AuxInt
+		s0 := v.Aux
+		l := v_0
+		if l.Op != Op386LEAL8 {
+			break
+		}
+		i1 := l.AuxInt
+		s1 := l.Aux
+		y := l.Args[1]
+		x := l.Args[0]
+		val := v_1
+		mem := v_2
+		if !(i1 != 0 && is32Bit(i0+i1)) {
+			break
+		}
+		v.reset(Op386MOVWstore)
+		v.AuxInt = i0 + i1
+		v.Aux = s0
+		v0 := b.NewValue0(v.Pos, Op386LEAL8, l.Type)
+		v0.AuxInt = 0
+		v0.Aux = s1
+		v0.AddArg2(x, y)
+		v.AddArg3(v0, val, mem)
 		return true
 	}
 	return false
@@ -8196,76 +5940,13 @@
 		v.AddArg2(ptr, mem)
 		return true
 	}
-	// match: (MOVWstoreconst [x] {sym1} (LEAL1 [off] {sym2} ptr idx) mem)
-	// cond: canMergeSym(sym1, sym2)
-	// result: (MOVWstoreconstidx1 [ValAndOff(x).add(off)] {mergeSym(sym1,sym2)} ptr idx mem)
-	for {
-		x := v.AuxInt
-		sym1 := v.Aux
-		if v_0.Op != Op386LEAL1 {
-			break
-		}
-		off := v_0.AuxInt
-		sym2 := v_0.Aux
-		idx := v_0.Args[1]
-		ptr := v_0.Args[0]
-		mem := v_1
-		if !(canMergeSym(sym1, sym2)) {
-			break
-		}
-		v.reset(Op386MOVWstoreconstidx1)
-		v.AuxInt = ValAndOff(x).add(off)
-		v.Aux = mergeSym(sym1, sym2)
-		v.AddArg3(ptr, idx, mem)
-		return true
-	}
-	// match: (MOVWstoreconst [x] {sym1} (LEAL2 [off] {sym2} ptr idx) mem)
-	// cond: canMergeSym(sym1, sym2)
-	// result: (MOVWstoreconstidx2 [ValAndOff(x).add(off)] {mergeSym(sym1,sym2)} ptr idx mem)
-	for {
-		x := v.AuxInt
-		sym1 := v.Aux
-		if v_0.Op != Op386LEAL2 {
-			break
-		}
-		off := v_0.AuxInt
-		sym2 := v_0.Aux
-		idx := v_0.Args[1]
-		ptr := v_0.Args[0]
-		mem := v_1
-		if !(canMergeSym(sym1, sym2)) {
-			break
-		}
-		v.reset(Op386MOVWstoreconstidx2)
-		v.AuxInt = ValAndOff(x).add(off)
-		v.Aux = mergeSym(sym1, sym2)
-		v.AddArg3(ptr, idx, mem)
-		return true
-	}
-	// match: (MOVWstoreconst [x] {sym} (ADDL ptr idx) mem)
-	// result: (MOVWstoreconstidx1 [x] {sym} ptr idx mem)
-	for {
-		x := v.AuxInt
-		sym := v.Aux
-		if v_0.Op != Op386ADDL {
-			break
-		}
-		idx := v_0.Args[1]
-		ptr := v_0.Args[0]
-		mem := v_1
-		v.reset(Op386MOVWstoreconstidx1)
-		v.AuxInt = x
-		v.Aux = sym
-		v.AddArg3(ptr, idx, mem)
-		return true
-	}
-	// match: (MOVWstoreconst [c] {s} p x:(MOVWstoreconst [a] {s} p mem))
-	// cond: x.Uses == 1 && ValAndOff(a).Off() + 2 == ValAndOff(c).Off() && clobber(x)
-	// result: (MOVLstoreconst [makeValAndOff(ValAndOff(a).Val()&0xffff | ValAndOff(c).Val()<<16, ValAndOff(a).Off())] {s} p mem)
+	// match: (MOVWstoreconst [c] {s} p1 x:(MOVWstoreconst [a] {s} p0 mem))
+	// cond: x.Uses == 1 && ValAndOff(a).Off() + 2 == ValAndOff(c).Off() && same(p0, p1, 1) && clobber(x)
+	// result: (MOVLstoreconst [makeValAndOff(ValAndOff(a).Val()&0xffff | ValAndOff(c).Val()<<16, ValAndOff(a).Off())] {s} p0 mem)
 	for {
 		c := v.AuxInt
 		s := v.Aux
-		p := v_0
+		p1 := v_0
 		x := v_1
 		if x.Op != Op386MOVWstoreconst {
 			break
@@ -8275,22 +5956,23 @@
 			break
 		}
 		mem := x.Args[1]
-		if p != x.Args[0] || !(x.Uses == 1 && ValAndOff(a).Off()+2 == ValAndOff(c).Off() && clobber(x)) {
+		p0 := x.Args[0]
+		if !(x.Uses == 1 && ValAndOff(a).Off()+2 == ValAndOff(c).Off() && same(p0, p1, 1) && clobber(x)) {
 			break
 		}
 		v.reset(Op386MOVLstoreconst)
 		v.AuxInt = makeValAndOff(ValAndOff(a).Val()&0xffff|ValAndOff(c).Val()<<16, ValAndOff(a).Off())
 		v.Aux = s
-		v.AddArg2(p, mem)
+		v.AddArg2(p0, mem)
 		return true
 	}
-	// match: (MOVWstoreconst [a] {s} p x:(MOVWstoreconst [c] {s} p mem))
-	// cond: x.Uses == 1 && ValAndOff(a).Off() + 2 == ValAndOff(c).Off() && clobber(x)
-	// result: (MOVLstoreconst [makeValAndOff(ValAndOff(a).Val()&0xffff | ValAndOff(c).Val()<<16, ValAndOff(a).Off())] {s} p mem)
+	// match: (MOVWstoreconst [a] {s} p1 x:(MOVWstoreconst [c] {s} p0 mem))
+	// cond: x.Uses == 1 && ValAndOff(a).Off() + 2 == ValAndOff(c).Off() && same(p0, p1, 1) && clobber(x)
+	// result: (MOVLstoreconst [makeValAndOff(ValAndOff(a).Val()&0xffff | ValAndOff(c).Val()<<16, ValAndOff(a).Off())] {s} p0 mem)
 	for {
 		a := v.AuxInt
 		s := v.Aux
-		p := v_0
+		p1 := v_0
 		x := v_1
 		if x.Op != Op386MOVWstoreconst {
 			break
@@ -8300,422 +5982,14 @@
 			break
 		}
 		mem := x.Args[1]
-		if p != x.Args[0] || !(x.Uses == 1 && ValAndOff(a).Off()+2 == ValAndOff(c).Off() && clobber(x)) {
+		p0 := x.Args[0]
+		if !(x.Uses == 1 && ValAndOff(a).Off()+2 == ValAndOff(c).Off() && same(p0, p1, 1) && clobber(x)) {
 			break
 		}
 		v.reset(Op386MOVLstoreconst)
 		v.AuxInt = makeValAndOff(ValAndOff(a).Val()&0xffff|ValAndOff(c).Val()<<16, ValAndOff(a).Off())
 		v.Aux = s
-		v.AddArg2(p, mem)
-		return true
-	}
-	return false
-}
-func rewriteValue386_Op386MOVWstoreconstidx1(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	// match: (MOVWstoreconstidx1 [c] {sym} ptr (SHLLconst [1] idx) mem)
-	// result: (MOVWstoreconstidx2 [c] {sym} ptr idx mem)
-	for {
-		c := v.AuxInt
-		sym := v.Aux
-		ptr := v_0
-		if v_1.Op != Op386SHLLconst || v_1.AuxInt != 1 {
-			break
-		}
-		idx := v_1.Args[0]
-		mem := v_2
-		v.reset(Op386MOVWstoreconstidx2)
-		v.AuxInt = c
-		v.Aux = sym
-		v.AddArg3(ptr, idx, mem)
-		return true
-	}
-	// match: (MOVWstoreconstidx1 [x] {sym} (ADDLconst [c] ptr) idx mem)
-	// result: (MOVWstoreconstidx1 [ValAndOff(x).add(c)] {sym} ptr idx mem)
-	for {
-		x := v.AuxInt
-		sym := v.Aux
-		if v_0.Op != Op386ADDLconst {
-			break
-		}
-		c := v_0.AuxInt
-		ptr := v_0.Args[0]
-		idx := v_1
-		mem := v_2
-		v.reset(Op386MOVWstoreconstidx1)
-		v.AuxInt = ValAndOff(x).add(c)
-		v.Aux = sym
-		v.AddArg3(ptr, idx, mem)
-		return true
-	}
-	// match: (MOVWstoreconstidx1 [x] {sym} ptr (ADDLconst [c] idx) mem)
-	// result: (MOVWstoreconstidx1 [ValAndOff(x).add(c)] {sym} ptr idx mem)
-	for {
-		x := v.AuxInt
-		sym := v.Aux
-		ptr := v_0
-		if v_1.Op != Op386ADDLconst {
-			break
-		}
-		c := v_1.AuxInt
-		idx := v_1.Args[0]
-		mem := v_2
-		v.reset(Op386MOVWstoreconstidx1)
-		v.AuxInt = ValAndOff(x).add(c)
-		v.Aux = sym
-		v.AddArg3(ptr, idx, mem)
-		return true
-	}
-	// match: (MOVWstoreconstidx1 [c] {s} p i x:(MOVWstoreconstidx1 [a] {s} p i mem))
-	// cond: x.Uses == 1 && ValAndOff(a).Off() + 2 == ValAndOff(c).Off() && clobber(x)
-	// result: (MOVLstoreconstidx1 [makeValAndOff(ValAndOff(a).Val()&0xffff | ValAndOff(c).Val()<<16, ValAndOff(a).Off())] {s} p i mem)
-	for {
-		c := v.AuxInt
-		s := v.Aux
-		p := v_0
-		i := v_1
-		x := v_2
-		if x.Op != Op386MOVWstoreconstidx1 {
-			break
-		}
-		a := x.AuxInt
-		if x.Aux != s {
-			break
-		}
-		mem := x.Args[2]
-		if p != x.Args[0] || i != x.Args[1] || !(x.Uses == 1 && ValAndOff(a).Off()+2 == ValAndOff(c).Off() && clobber(x)) {
-			break
-		}
-		v.reset(Op386MOVLstoreconstidx1)
-		v.AuxInt = makeValAndOff(ValAndOff(a).Val()&0xffff|ValAndOff(c).Val()<<16, ValAndOff(a).Off())
-		v.Aux = s
-		v.AddArg3(p, i, mem)
-		return true
-	}
-	return false
-}
-func rewriteValue386_Op386MOVWstoreconstidx2(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (MOVWstoreconstidx2 [x] {sym} (ADDLconst [c] ptr) idx mem)
-	// result: (MOVWstoreconstidx2 [ValAndOff(x).add(c)] {sym} ptr idx mem)
-	for {
-		x := v.AuxInt
-		sym := v.Aux
-		if v_0.Op != Op386ADDLconst {
-			break
-		}
-		c := v_0.AuxInt
-		ptr := v_0.Args[0]
-		idx := v_1
-		mem := v_2
-		v.reset(Op386MOVWstoreconstidx2)
-		v.AuxInt = ValAndOff(x).add(c)
-		v.Aux = sym
-		v.AddArg3(ptr, idx, mem)
-		return true
-	}
-	// match: (MOVWstoreconstidx2 [x] {sym} ptr (ADDLconst [c] idx) mem)
-	// result: (MOVWstoreconstidx2 [ValAndOff(x).add(2*c)] {sym} ptr idx mem)
-	for {
-		x := v.AuxInt
-		sym := v.Aux
-		ptr := v_0
-		if v_1.Op != Op386ADDLconst {
-			break
-		}
-		c := v_1.AuxInt
-		idx := v_1.Args[0]
-		mem := v_2
-		v.reset(Op386MOVWstoreconstidx2)
-		v.AuxInt = ValAndOff(x).add(2 * c)
-		v.Aux = sym
-		v.AddArg3(ptr, idx, mem)
-		return true
-	}
-	// match: (MOVWstoreconstidx2 [c] {s} p i x:(MOVWstoreconstidx2 [a] {s} p i mem))
-	// cond: x.Uses == 1 && ValAndOff(a).Off() + 2 == ValAndOff(c).Off() && clobber(x)
-	// result: (MOVLstoreconstidx1 [makeValAndOff(ValAndOff(a).Val()&0xffff | ValAndOff(c).Val()<<16, ValAndOff(a).Off())] {s} p (SHLLconst <i.Type> [1] i) mem)
-	for {
-		c := v.AuxInt
-		s := v.Aux
-		p := v_0
-		i := v_1
-		x := v_2
-		if x.Op != Op386MOVWstoreconstidx2 {
-			break
-		}
-		a := x.AuxInt
-		if x.Aux != s {
-			break
-		}
-		mem := x.Args[2]
-		if p != x.Args[0] || i != x.Args[1] || !(x.Uses == 1 && ValAndOff(a).Off()+2 == ValAndOff(c).Off() && clobber(x)) {
-			break
-		}
-		v.reset(Op386MOVLstoreconstidx1)
-		v.AuxInt = makeValAndOff(ValAndOff(a).Val()&0xffff|ValAndOff(c).Val()<<16, ValAndOff(a).Off())
-		v.Aux = s
-		v0 := b.NewValue0(v.Pos, Op386SHLLconst, i.Type)
-		v0.AuxInt = 1
-		v0.AddArg(i)
-		v.AddArg3(p, v0, mem)
-		return true
-	}
-	return false
-}
-func rewriteValue386_Op386MOVWstoreidx1(v *Value) bool {
-	v_3 := v.Args[3]
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	// match: (MOVWstoreidx1 [c] {sym} ptr (SHLLconst [1] idx) val mem)
-	// result: (MOVWstoreidx2 [c] {sym} ptr idx val mem)
-	for {
-		c := v.AuxInt
-		sym := v.Aux
-		for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-			ptr := v_0
-			if v_1.Op != Op386SHLLconst || v_1.AuxInt != 1 {
-				continue
-			}
-			idx := v_1.Args[0]
-			val := v_2
-			mem := v_3
-			v.reset(Op386MOVWstoreidx2)
-			v.AuxInt = c
-			v.Aux = sym
-			v.AddArg4(ptr, idx, val, mem)
-			return true
-		}
-		break
-	}
-	// match: (MOVWstoreidx1 [c] {sym} (ADDLconst [d] ptr) idx val mem)
-	// result: (MOVWstoreidx1 [int64(int32(c+d))] {sym} ptr idx val mem)
-	for {
-		c := v.AuxInt
-		sym := v.Aux
-		for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-			if v_0.Op != Op386ADDLconst {
-				continue
-			}
-			d := v_0.AuxInt
-			ptr := v_0.Args[0]
-			idx := v_1
-			val := v_2
-			mem := v_3
-			v.reset(Op386MOVWstoreidx1)
-			v.AuxInt = int64(int32(c + d))
-			v.Aux = sym
-			v.AddArg4(ptr, idx, val, mem)
-			return true
-		}
-		break
-	}
-	// match: (MOVWstoreidx1 [c] {sym} ptr (ADDLconst [d] idx) val mem)
-	// result: (MOVWstoreidx1 [int64(int32(c+d))] {sym} ptr idx val mem)
-	for {
-		c := v.AuxInt
-		sym := v.Aux
-		for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-			ptr := v_0
-			if v_1.Op != Op386ADDLconst {
-				continue
-			}
-			d := v_1.AuxInt
-			idx := v_1.Args[0]
-			val := v_2
-			mem := v_3
-			v.reset(Op386MOVWstoreidx1)
-			v.AuxInt = int64(int32(c + d))
-			v.Aux = sym
-			v.AddArg4(ptr, idx, val, mem)
-			return true
-		}
-		break
-	}
-	// match: (MOVWstoreidx1 [i] {s} p idx (SHRLconst [16] w) x:(MOVWstoreidx1 [i-2] {s} p idx w mem))
-	// cond: x.Uses == 1 && clobber(x)
-	// result: (MOVLstoreidx1 [i-2] {s} p idx w mem)
-	for {
-		i := v.AuxInt
-		s := v.Aux
-		for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-			p := v_0
-			idx := v_1
-			if v_2.Op != Op386SHRLconst || v_2.AuxInt != 16 {
-				continue
-			}
-			w := v_2.Args[0]
-			x := v_3
-			if x.Op != Op386MOVWstoreidx1 || x.AuxInt != i-2 || x.Aux != s {
-				continue
-			}
-			mem := x.Args[3]
-			x_0 := x.Args[0]
-			x_1 := x.Args[1]
-			for _i1 := 0; _i1 <= 1; _i1, x_0, x_1 = _i1+1, x_1, x_0 {
-				if p != x_0 || idx != x_1 || w != x.Args[2] || !(x.Uses == 1 && clobber(x)) {
-					continue
-				}
-				v.reset(Op386MOVLstoreidx1)
-				v.AuxInt = i - 2
-				v.Aux = s
-				v.AddArg4(p, idx, w, mem)
-				return true
-			}
-		}
-		break
-	}
-	// match: (MOVWstoreidx1 [i] {s} p idx (SHRLconst [j] w) x:(MOVWstoreidx1 [i-2] {s} p idx w0:(SHRLconst [j-16] w) mem))
-	// cond: x.Uses == 1 && clobber(x)
-	// result: (MOVLstoreidx1 [i-2] {s} p idx w0 mem)
-	for {
-		i := v.AuxInt
-		s := v.Aux
-		for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-			p := v_0
-			idx := v_1
-			if v_2.Op != Op386SHRLconst {
-				continue
-			}
-			j := v_2.AuxInt
-			w := v_2.Args[0]
-			x := v_3
-			if x.Op != Op386MOVWstoreidx1 || x.AuxInt != i-2 || x.Aux != s {
-				continue
-			}
-			mem := x.Args[3]
-			x_0 := x.Args[0]
-			x_1 := x.Args[1]
-			for _i1 := 0; _i1 <= 1; _i1, x_0, x_1 = _i1+1, x_1, x_0 {
-				if p != x_0 || idx != x_1 {
-					continue
-				}
-				w0 := x.Args[2]
-				if w0.Op != Op386SHRLconst || w0.AuxInt != j-16 || w != w0.Args[0] || !(x.Uses == 1 && clobber(x)) {
-					continue
-				}
-				v.reset(Op386MOVLstoreidx1)
-				v.AuxInt = i - 2
-				v.Aux = s
-				v.AddArg4(p, idx, w0, mem)
-				return true
-			}
-		}
-		break
-	}
-	return false
-}
-func rewriteValue386_Op386MOVWstoreidx2(v *Value) bool {
-	v_3 := v.Args[3]
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (MOVWstoreidx2 [c] {sym} (ADDLconst [d] ptr) idx val mem)
-	// result: (MOVWstoreidx2 [int64(int32(c+d))] {sym} ptr idx val mem)
-	for {
-		c := v.AuxInt
-		sym := v.Aux
-		if v_0.Op != Op386ADDLconst {
-			break
-		}
-		d := v_0.AuxInt
-		ptr := v_0.Args[0]
-		idx := v_1
-		val := v_2
-		mem := v_3
-		v.reset(Op386MOVWstoreidx2)
-		v.AuxInt = int64(int32(c + d))
-		v.Aux = sym
-		v.AddArg4(ptr, idx, val, mem)
-		return true
-	}
-	// match: (MOVWstoreidx2 [c] {sym} ptr (ADDLconst [d] idx) val mem)
-	// result: (MOVWstoreidx2 [int64(int32(c+2*d))] {sym} ptr idx val mem)
-	for {
-		c := v.AuxInt
-		sym := v.Aux
-		ptr := v_0
-		if v_1.Op != Op386ADDLconst {
-			break
-		}
-		d := v_1.AuxInt
-		idx := v_1.Args[0]
-		val := v_2
-		mem := v_3
-		v.reset(Op386MOVWstoreidx2)
-		v.AuxInt = int64(int32(c + 2*d))
-		v.Aux = sym
-		v.AddArg4(ptr, idx, val, mem)
-		return true
-	}
-	// match: (MOVWstoreidx2 [i] {s} p idx (SHRLconst [16] w) x:(MOVWstoreidx2 [i-2] {s} p idx w mem))
-	// cond: x.Uses == 1 && clobber(x)
-	// result: (MOVLstoreidx1 [i-2] {s} p (SHLLconst <idx.Type> [1] idx) w mem)
-	for {
-		i := v.AuxInt
-		s := v.Aux
-		p := v_0
-		idx := v_1
-		if v_2.Op != Op386SHRLconst || v_2.AuxInt != 16 {
-			break
-		}
-		w := v_2.Args[0]
-		x := v_3
-		if x.Op != Op386MOVWstoreidx2 || x.AuxInt != i-2 || x.Aux != s {
-			break
-		}
-		mem := x.Args[3]
-		if p != x.Args[0] || idx != x.Args[1] || w != x.Args[2] || !(x.Uses == 1 && clobber(x)) {
-			break
-		}
-		v.reset(Op386MOVLstoreidx1)
-		v.AuxInt = i - 2
-		v.Aux = s
-		v0 := b.NewValue0(v.Pos, Op386SHLLconst, idx.Type)
-		v0.AuxInt = 1
-		v0.AddArg(idx)
-		v.AddArg4(p, v0, w, mem)
-		return true
-	}
-	// match: (MOVWstoreidx2 [i] {s} p idx (SHRLconst [j] w) x:(MOVWstoreidx2 [i-2] {s} p idx w0:(SHRLconst [j-16] w) mem))
-	// cond: x.Uses == 1 && clobber(x)
-	// result: (MOVLstoreidx1 [i-2] {s} p (SHLLconst <idx.Type> [1] idx) w0 mem)
-	for {
-		i := v.AuxInt
-		s := v.Aux
-		p := v_0
-		idx := v_1
-		if v_2.Op != Op386SHRLconst {
-			break
-		}
-		j := v_2.AuxInt
-		w := v_2.Args[0]
-		x := v_3
-		if x.Op != Op386MOVWstoreidx2 || x.AuxInt != i-2 || x.Aux != s {
-			break
-		}
-		mem := x.Args[3]
-		if p != x.Args[0] || idx != x.Args[1] {
-			break
-		}
-		w0 := x.Args[2]
-		if w0.Op != Op386SHRLconst || w0.AuxInt != j-16 || w != w0.Args[0] || !(x.Uses == 1 && clobber(x)) {
-			break
-		}
-		v.reset(Op386MOVLstoreidx1)
-		v.AuxInt = i - 2
-		v.Aux = s
-		v0 := b.NewValue0(v.Pos, Op386SHLLconst, idx.Type)
-		v0.AuxInt = 1
-		v0.AddArg(idx)
-		v.AddArg4(p, v0, w0, mem)
+		v.AddArg2(p0, mem)
 		return true
 	}
 	return false
@@ -8764,32 +6038,6 @@
 		}
 		break
 	}
-	// match: (MULL x l:(MOVLloadidx4 [off] {sym} ptr idx mem))
-	// cond: canMergeLoadClobber(v, l, x) && clobber(l)
-	// result: (MULLloadidx4 x [off] {sym} ptr idx mem)
-	for {
-		for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-			x := v_0
-			l := v_1
-			if l.Op != Op386MOVLloadidx4 {
-				continue
-			}
-			off := l.AuxInt
-			sym := l.Aux
-			mem := l.Args[2]
-			ptr := l.Args[0]
-			idx := l.Args[1]
-			if !(canMergeLoadClobber(v, l, x) && clobber(l)) {
-				continue
-			}
-			v.reset(Op386MULLloadidx4)
-			v.AuxInt = off
-			v.Aux = sym
-			v.AddArg4(x, ptr, idx, mem)
-			return true
-		}
-		break
-	}
 	return false
 }
 func rewriteValue386_Op386MULLconst(v *Value) bool {
@@ -9267,109 +6515,6 @@
 		v.AddArg3(val, base, mem)
 		return true
 	}
-	// match: (MULLload [off1] {sym1} val (LEAL4 [off2] {sym2} ptr idx) mem)
-	// cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-	// result: (MULLloadidx4 [off1+off2] {mergeSym(sym1,sym2)} val ptr idx mem)
-	for {
-		off1 := v.AuxInt
-		sym1 := v.Aux
-		val := v_0
-		if v_1.Op != Op386LEAL4 {
-			break
-		}
-		off2 := v_1.AuxInt
-		sym2 := v_1.Aux
-		idx := v_1.Args[1]
-		ptr := v_1.Args[0]
-		mem := v_2
-		if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
-			break
-		}
-		v.reset(Op386MULLloadidx4)
-		v.AuxInt = off1 + off2
-		v.Aux = mergeSym(sym1, sym2)
-		v.AddArg4(val, ptr, idx, mem)
-		return true
-	}
-	return false
-}
-func rewriteValue386_Op386MULLloadidx4(v *Value) bool {
-	v_3 := v.Args[3]
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	config := b.Func.Config
-	// match: (MULLloadidx4 [off1] {sym} val (ADDLconst [off2] base) idx mem)
-	// cond: is32Bit(off1+off2)
-	// result: (MULLloadidx4 [off1+off2] {sym} val base idx mem)
-	for {
-		off1 := v.AuxInt
-		sym := v.Aux
-		val := v_0
-		if v_1.Op != Op386ADDLconst {
-			break
-		}
-		off2 := v_1.AuxInt
-		base := v_1.Args[0]
-		idx := v_2
-		mem := v_3
-		if !(is32Bit(off1 + off2)) {
-			break
-		}
-		v.reset(Op386MULLloadidx4)
-		v.AuxInt = off1 + off2
-		v.Aux = sym
-		v.AddArg4(val, base, idx, mem)
-		return true
-	}
-	// match: (MULLloadidx4 [off1] {sym} val base (ADDLconst [off2] idx) mem)
-	// cond: is32Bit(off1+off2*4)
-	// result: (MULLloadidx4 [off1+off2*4] {sym} val base idx mem)
-	for {
-		off1 := v.AuxInt
-		sym := v.Aux
-		val := v_0
-		base := v_1
-		if v_2.Op != Op386ADDLconst {
-			break
-		}
-		off2 := v_2.AuxInt
-		idx := v_2.Args[0]
-		mem := v_3
-		if !(is32Bit(off1 + off2*4)) {
-			break
-		}
-		v.reset(Op386MULLloadidx4)
-		v.AuxInt = off1 + off2*4
-		v.Aux = sym
-		v.AddArg4(val, base, idx, mem)
-		return true
-	}
-	// match: (MULLloadidx4 [off1] {sym1} val (LEAL [off2] {sym2} base) idx mem)
-	// cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared)
-	// result: (MULLloadidx4 [off1+off2] {mergeSym(sym1,sym2)} val base idx mem)
-	for {
-		off1 := v.AuxInt
-		sym1 := v.Aux
-		val := v_0
-		if v_1.Op != Op386LEAL {
-			break
-		}
-		off2 := v_1.AuxInt
-		sym2 := v_1.Aux
-		base := v_1.Args[0]
-		idx := v_2
-		mem := v_3
-		if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared)) {
-			break
-		}
-		v.reset(Op386MULLloadidx4)
-		v.AuxInt = off1 + off2
-		v.Aux = mergeSym(sym1, sym2)
-		v.AddArg4(val, base, idx, mem)
-		return true
-	}
 	return false
 }
 func rewriteValue386_Op386MULSD(v *Value) bool {
@@ -9692,32 +6837,6 @@
 		}
 		break
 	}
-	// match: (ORL x l:(MOVLloadidx4 [off] {sym} ptr idx mem))
-	// cond: canMergeLoadClobber(v, l, x) && clobber(l)
-	// result: (ORLloadidx4 x [off] {sym} ptr idx mem)
-	for {
-		for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-			x := v_0
-			l := v_1
-			if l.Op != Op386MOVLloadidx4 {
-				continue
-			}
-			off := l.AuxInt
-			sym := l.Aux
-			mem := l.Args[2]
-			ptr := l.Args[0]
-			idx := l.Args[1]
-			if !(canMergeLoadClobber(v, l, x) && clobber(l)) {
-				continue
-			}
-			v.reset(Op386ORLloadidx4)
-			v.AuxInt = off
-			v.Aux = sym
-			v.AddArg4(x, ptr, idx, mem)
-			return true
-		}
-		break
-	}
 	// match: (ORL x x)
 	// result: x
 	for {
@@ -9728,9 +6847,9 @@
 		v.copyOf(x)
 		return true
 	}
-	// match: (ORL x0:(MOVBload [i0] {s} p mem) s0:(SHLLconst [8] x1:(MOVBload [i1] {s} p mem)))
-	// cond: i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && mergePoint(b,x0,x1) != nil && clobber(x0, x1, s0)
-	// result: @mergePoint(b,x0,x1) (MOVWload [i0] {s} p mem)
+	// match: (ORL x0:(MOVBload [i0] {s} p0 mem) s0:(SHLLconst [8] x1:(MOVBload [i1] {s} p1 mem)))
+	// cond: i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && same(p0, p1, 1) && mergePoint(b,x0,x1) != nil && clobber(x0, x1, s0)
+	// result: @mergePoint(b,x0,x1) (MOVWload [i0] {s} p0 mem)
 	for {
 		for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
 			x0 := v_0
@@ -9740,7 +6859,7 @@
 			i0 := x0.AuxInt
 			s := x0.Aux
 			mem := x0.Args[1]
-			p := x0.Args[0]
+			p0 := x0.Args[0]
 			s0 := v_1
 			if s0.Op != Op386SHLLconst || s0.AuxInt != 8 {
 				continue
@@ -9754,7 +6873,8 @@
 				continue
 			}
 			_ = x1.Args[1]
-			if p != x1.Args[0] || mem != x1.Args[1] || !(i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0, x1, s0)) {
+			p1 := x1.Args[0]
+			if mem != x1.Args[1] || !(i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && same(p0, p1, 1) && mergePoint(b, x0, x1) != nil && clobber(x0, x1, s0)) {
 				continue
 			}
 			b = mergePoint(b, x0, x1)
@@ -9762,14 +6882,14 @@
 			v.copyOf(v0)
 			v0.AuxInt = i0
 			v0.Aux = s
-			v0.AddArg2(p, mem)
+			v0.AddArg2(p0, mem)
 			return true
 		}
 		break
 	}
-	// match: (ORL o0:(ORL x0:(MOVWload [i0] {s} p mem) s0:(SHLLconst [16] x1:(MOVBload [i2] {s} p mem))) s1:(SHLLconst [24] x2:(MOVBload [i3] {s} p mem)))
-	// cond: i2 == i0+2 && i3 == i0+3 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && o0.Uses == 1 && mergePoint(b,x0,x1,x2) != nil && clobber(x0, x1, x2, s0, s1, o0)
-	// result: @mergePoint(b,x0,x1,x2) (MOVLload [i0] {s} p mem)
+	// match: (ORL o0:(ORL x0:(MOVWload [i0] {s} p0 mem) s0:(SHLLconst [16] x1:(MOVBload [i2] {s} p1 mem))) s1:(SHLLconst [24] x2:(MOVBload [i3] {s} p2 mem)))
+	// cond: i2 == i0+2 && i3 == i0+3 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && o0.Uses == 1 && same(p0, p1, 1) && same(p1, p2, 1) && mergePoint(b,x0,x1,x2) != nil && clobber(x0, x1, x2, s0, s1, o0)
+	// result: @mergePoint(b,x0,x1,x2) (MOVLload [i0] {s} p0 mem)
 	for {
 		for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
 			o0 := v_0
@@ -9787,7 +6907,7 @@
 				i0 := x0.AuxInt
 				s := x0.Aux
 				mem := x0.Args[1]
-				p := x0.Args[0]
+				p0 := x0.Args[0]
 				s0 := o0_1
 				if s0.Op != Op386SHLLconst || s0.AuxInt != 16 {
 					continue
@@ -9801,7 +6921,8 @@
 					continue
 				}
 				_ = x1.Args[1]
-				if p != x1.Args[0] || mem != x1.Args[1] {
+				p1 := x1.Args[0]
+				if mem != x1.Args[1] {
 					continue
 				}
 				s1 := v_1
@@ -9817,7 +6938,8 @@
 					continue
 				}
 				_ = x2.Args[1]
-				if p != x2.Args[0] || mem != x2.Args[1] || !(i2 == i0+2 && i3 == i0+3 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && o0.Uses == 1 && mergePoint(b, x0, x1, x2) != nil && clobber(x0, x1, x2, s0, s1, o0)) {
+				p2 := x2.Args[0]
+				if mem != x2.Args[1] || !(i2 == i0+2 && i3 == i0+3 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && o0.Uses == 1 && same(p0, p1, 1) && same(p1, p2, 1) && mergePoint(b, x0, x1, x2) != nil && clobber(x0, x1, x2, s0, s1, o0)) {
 					continue
 				}
 				b = mergePoint(b, x0, x1, x2)
@@ -9825,137 +6947,12 @@
 				v.copyOf(v0)
 				v0.AuxInt = i0
 				v0.Aux = s
-				v0.AddArg2(p, mem)
+				v0.AddArg2(p0, mem)
 				return true
 			}
 		}
 		break
 	}
-	// match: (ORL x0:(MOVBloadidx1 [i0] {s} p idx mem) s0:(SHLLconst [8] x1:(MOVBloadidx1 [i1] {s} p idx mem)))
-	// cond: i1==i0+1 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && mergePoint(b,x0,x1) != nil && clobber(x0, x1, s0)
-	// result: @mergePoint(b,x0,x1) (MOVWloadidx1 <v.Type> [i0] {s} p idx mem)
-	for {
-		for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-			x0 := v_0
-			if x0.Op != Op386MOVBloadidx1 {
-				continue
-			}
-			i0 := x0.AuxInt
-			s := x0.Aux
-			mem := x0.Args[2]
-			x0_0 := x0.Args[0]
-			x0_1 := x0.Args[1]
-			for _i1 := 0; _i1 <= 1; _i1, x0_0, x0_1 = _i1+1, x0_1, x0_0 {
-				p := x0_0
-				idx := x0_1
-				s0 := v_1
-				if s0.Op != Op386SHLLconst || s0.AuxInt != 8 {
-					continue
-				}
-				x1 := s0.Args[0]
-				if x1.Op != Op386MOVBloadidx1 {
-					continue
-				}
-				i1 := x1.AuxInt
-				if x1.Aux != s {
-					continue
-				}
-				_ = x1.Args[2]
-				x1_0 := x1.Args[0]
-				x1_1 := x1.Args[1]
-				for _i2 := 0; _i2 <= 1; _i2, x1_0, x1_1 = _i2+1, x1_1, x1_0 {
-					if p != x1_0 || idx != x1_1 || mem != x1.Args[2] || !(i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0, x1, s0)) {
-						continue
-					}
-					b = mergePoint(b, x0, x1)
-					v0 := b.NewValue0(v.Pos, Op386MOVWloadidx1, v.Type)
-					v.copyOf(v0)
-					v0.AuxInt = i0
-					v0.Aux = s
-					v0.AddArg3(p, idx, mem)
-					return true
-				}
-			}
-		}
-		break
-	}
-	// match: (ORL o0:(ORL x0:(MOVWloadidx1 [i0] {s} p idx mem) s0:(SHLLconst [16] x1:(MOVBloadidx1 [i2] {s} p idx mem))) s1:(SHLLconst [24] x2:(MOVBloadidx1 [i3] {s} p idx mem)))
-	// cond: i2 == i0+2 && i3 == i0+3 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && o0.Uses == 1 && mergePoint(b,x0,x1,x2) != nil && clobber(x0, x1, x2, s0, s1, o0)
-	// result: @mergePoint(b,x0,x1,x2) (MOVLloadidx1 <v.Type> [i0] {s} p idx mem)
-	for {
-		for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-			o0 := v_0
-			if o0.Op != Op386ORL {
-				continue
-			}
-			_ = o0.Args[1]
-			o0_0 := o0.Args[0]
-			o0_1 := o0.Args[1]
-			for _i1 := 0; _i1 <= 1; _i1, o0_0, o0_1 = _i1+1, o0_1, o0_0 {
-				x0 := o0_0
-				if x0.Op != Op386MOVWloadidx1 {
-					continue
-				}
-				i0 := x0.AuxInt
-				s := x0.Aux
-				mem := x0.Args[2]
-				x0_0 := x0.Args[0]
-				x0_1 := x0.Args[1]
-				for _i2 := 0; _i2 <= 1; _i2, x0_0, x0_1 = _i2+1, x0_1, x0_0 {
-					p := x0_0
-					idx := x0_1
-					s0 := o0_1
-					if s0.Op != Op386SHLLconst || s0.AuxInt != 16 {
-						continue
-					}
-					x1 := s0.Args[0]
-					if x1.Op != Op386MOVBloadidx1 {
-						continue
-					}
-					i2 := x1.AuxInt
-					if x1.Aux != s {
-						continue
-					}
-					_ = x1.Args[2]
-					x1_0 := x1.Args[0]
-					x1_1 := x1.Args[1]
-					for _i3 := 0; _i3 <= 1; _i3, x1_0, x1_1 = _i3+1, x1_1, x1_0 {
-						if p != x1_0 || idx != x1_1 || mem != x1.Args[2] {
-							continue
-						}
-						s1 := v_1
-						if s1.Op != Op386SHLLconst || s1.AuxInt != 24 {
-							continue
-						}
-						x2 := s1.Args[0]
-						if x2.Op != Op386MOVBloadidx1 {
-							continue
-						}
-						i3 := x2.AuxInt
-						if x2.Aux != s {
-							continue
-						}
-						_ = x2.Args[2]
-						x2_0 := x2.Args[0]
-						x2_1 := x2.Args[1]
-						for _i4 := 0; _i4 <= 1; _i4, x2_0, x2_1 = _i4+1, x2_1, x2_0 {
-							if p != x2_0 || idx != x2_1 || mem != x2.Args[2] || !(i2 == i0+2 && i3 == i0+3 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && o0.Uses == 1 && mergePoint(b, x0, x1, x2) != nil && clobber(x0, x1, x2, s0, s1, o0)) {
-								continue
-							}
-							b = mergePoint(b, x0, x1, x2)
-							v0 := b.NewValue0(v.Pos, Op386MOVLloadidx1, v.Type)
-							v.copyOf(v0)
-							v0.AuxInt = i0
-							v0.Aux = s
-							v0.AddArg3(p, idx, mem)
-							return true
-						}
-					}
-				}
-			}
-		}
-		break
-	}
 	return false
 }
 func rewriteValue386_Op386ORLconst(v *Value) bool {
@@ -10048,81 +7045,6 @@
 	}
 	return false
 }
-func rewriteValue386_Op386ORLconstmodifyidx4(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	config := b.Func.Config
-	// match: (ORLconstmodifyidx4 [valoff1] {sym} (ADDLconst [off2] base) idx mem)
-	// cond: ValAndOff(valoff1).canAdd(off2)
-	// result: (ORLconstmodifyidx4 [ValAndOff(valoff1).add(off2)] {sym} base idx mem)
-	for {
-		valoff1 := v.AuxInt
-		sym := v.Aux
-		if v_0.Op != Op386ADDLconst {
-			break
-		}
-		off2 := v_0.AuxInt
-		base := v_0.Args[0]
-		idx := v_1
-		mem := v_2
-		if !(ValAndOff(valoff1).canAdd(off2)) {
-			break
-		}
-		v.reset(Op386ORLconstmodifyidx4)
-		v.AuxInt = ValAndOff(valoff1).add(off2)
-		v.Aux = sym
-		v.AddArg3(base, idx, mem)
-		return true
-	}
-	// match: (ORLconstmodifyidx4 [valoff1] {sym} base (ADDLconst [off2] idx) mem)
-	// cond: ValAndOff(valoff1).canAdd(off2*4)
-	// result: (ORLconstmodifyidx4 [ValAndOff(valoff1).add(off2*4)] {sym} base idx mem)
-	for {
-		valoff1 := v.AuxInt
-		sym := v.Aux
-		base := v_0
-		if v_1.Op != Op386ADDLconst {
-			break
-		}
-		off2 := v_1.AuxInt
-		idx := v_1.Args[0]
-		mem := v_2
-		if !(ValAndOff(valoff1).canAdd(off2 * 4)) {
-			break
-		}
-		v.reset(Op386ORLconstmodifyidx4)
-		v.AuxInt = ValAndOff(valoff1).add(off2 * 4)
-		v.Aux = sym
-		v.AddArg3(base, idx, mem)
-		return true
-	}
-	// match: (ORLconstmodifyidx4 [valoff1] {sym1} (LEAL [off2] {sym2} base) idx mem)
-	// cond: ValAndOff(valoff1).canAdd(off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared)
-	// result: (ORLconstmodifyidx4 [ValAndOff(valoff1).add(off2)] {mergeSym(sym1,sym2)} base idx mem)
-	for {
-		valoff1 := v.AuxInt
-		sym1 := v.Aux
-		if v_0.Op != Op386LEAL {
-			break
-		}
-		off2 := v_0.AuxInt
-		sym2 := v_0.Aux
-		base := v_0.Args[0]
-		idx := v_1
-		mem := v_2
-		if !(ValAndOff(valoff1).canAdd(off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared)) {
-			break
-		}
-		v.reset(Op386ORLconstmodifyidx4)
-		v.AuxInt = ValAndOff(valoff1).add(off2)
-		v.Aux = mergeSym(sym1, sym2)
-		v.AddArg3(base, idx, mem)
-		return true
-	}
-	return false
-}
 func rewriteValue386_Op386ORLload(v *Value) bool {
 	v_2 := v.Args[2]
 	v_1 := v.Args[1]
@@ -10174,109 +7096,6 @@
 		v.AddArg3(val, base, mem)
 		return true
 	}
-	// match: (ORLload [off1] {sym1} val (LEAL4 [off2] {sym2} ptr idx) mem)
-	// cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-	// result: (ORLloadidx4 [off1+off2] {mergeSym(sym1,sym2)} val ptr idx mem)
-	for {
-		off1 := v.AuxInt
-		sym1 := v.Aux
-		val := v_0
-		if v_1.Op != Op386LEAL4 {
-			break
-		}
-		off2 := v_1.AuxInt
-		sym2 := v_1.Aux
-		idx := v_1.Args[1]
-		ptr := v_1.Args[0]
-		mem := v_2
-		if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
-			break
-		}
-		v.reset(Op386ORLloadidx4)
-		v.AuxInt = off1 + off2
-		v.Aux = mergeSym(sym1, sym2)
-		v.AddArg4(val, ptr, idx, mem)
-		return true
-	}
-	return false
-}
-func rewriteValue386_Op386ORLloadidx4(v *Value) bool {
-	v_3 := v.Args[3]
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	config := b.Func.Config
-	// match: (ORLloadidx4 [off1] {sym} val (ADDLconst [off2] base) idx mem)
-	// cond: is32Bit(off1+off2)
-	// result: (ORLloadidx4 [off1+off2] {sym} val base idx mem)
-	for {
-		off1 := v.AuxInt
-		sym := v.Aux
-		val := v_0
-		if v_1.Op != Op386ADDLconst {
-			break
-		}
-		off2 := v_1.AuxInt
-		base := v_1.Args[0]
-		idx := v_2
-		mem := v_3
-		if !(is32Bit(off1 + off2)) {
-			break
-		}
-		v.reset(Op386ORLloadidx4)
-		v.AuxInt = off1 + off2
-		v.Aux = sym
-		v.AddArg4(val, base, idx, mem)
-		return true
-	}
-	// match: (ORLloadidx4 [off1] {sym} val base (ADDLconst [off2] idx) mem)
-	// cond: is32Bit(off1+off2*4)
-	// result: (ORLloadidx4 [off1+off2*4] {sym} val base idx mem)
-	for {
-		off1 := v.AuxInt
-		sym := v.Aux
-		val := v_0
-		base := v_1
-		if v_2.Op != Op386ADDLconst {
-			break
-		}
-		off2 := v_2.AuxInt
-		idx := v_2.Args[0]
-		mem := v_3
-		if !(is32Bit(off1 + off2*4)) {
-			break
-		}
-		v.reset(Op386ORLloadidx4)
-		v.AuxInt = off1 + off2*4
-		v.Aux = sym
-		v.AddArg4(val, base, idx, mem)
-		return true
-	}
-	// match: (ORLloadidx4 [off1] {sym1} val (LEAL [off2] {sym2} base) idx mem)
-	// cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared)
-	// result: (ORLloadidx4 [off1+off2] {mergeSym(sym1,sym2)} val base idx mem)
-	for {
-		off1 := v.AuxInt
-		sym1 := v.Aux
-		val := v_0
-		if v_1.Op != Op386LEAL {
-			break
-		}
-		off2 := v_1.AuxInt
-		sym2 := v_1.Aux
-		base := v_1.Args[0]
-		idx := v_2
-		mem := v_3
-		if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared)) {
-			break
-		}
-		v.reset(Op386ORLloadidx4)
-		v.AuxInt = off1 + off2
-		v.Aux = mergeSym(sym1, sym2)
-		v.AddArg4(val, base, idx, mem)
-		return true
-	}
 	return false
 }
 func rewriteValue386_Op386ORLmodify(v *Value) bool {
@@ -10332,107 +7151,6 @@
 	}
 	return false
 }
-func rewriteValue386_Op386ORLmodifyidx4(v *Value) bool {
-	v_3 := v.Args[3]
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	config := b.Func.Config
-	// match: (ORLmodifyidx4 [off1] {sym} (ADDLconst [off2] base) idx val mem)
-	// cond: is32Bit(off1+off2)
-	// result: (ORLmodifyidx4 [off1+off2] {sym} base idx val mem)
-	for {
-		off1 := v.AuxInt
-		sym := v.Aux
-		if v_0.Op != Op386ADDLconst {
-			break
-		}
-		off2 := v_0.AuxInt
-		base := v_0.Args[0]
-		idx := v_1
-		val := v_2
-		mem := v_3
-		if !(is32Bit(off1 + off2)) {
-			break
-		}
-		v.reset(Op386ORLmodifyidx4)
-		v.AuxInt = off1 + off2
-		v.Aux = sym
-		v.AddArg4(base, idx, val, mem)
-		return true
-	}
-	// match: (ORLmodifyidx4 [off1] {sym} base (ADDLconst [off2] idx) val mem)
-	// cond: is32Bit(off1+off2*4)
-	// result: (ORLmodifyidx4 [off1+off2*4] {sym} base idx val mem)
-	for {
-		off1 := v.AuxInt
-		sym := v.Aux
-		base := v_0
-		if v_1.Op != Op386ADDLconst {
-			break
-		}
-		off2 := v_1.AuxInt
-		idx := v_1.Args[0]
-		val := v_2
-		mem := v_3
-		if !(is32Bit(off1 + off2*4)) {
-			break
-		}
-		v.reset(Op386ORLmodifyidx4)
-		v.AuxInt = off1 + off2*4
-		v.Aux = sym
-		v.AddArg4(base, idx, val, mem)
-		return true
-	}
-	// match: (ORLmodifyidx4 [off1] {sym1} (LEAL [off2] {sym2} base) idx val mem)
-	// cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared)
-	// result: (ORLmodifyidx4 [off1+off2] {mergeSym(sym1,sym2)} base idx val mem)
-	for {
-		off1 := v.AuxInt
-		sym1 := v.Aux
-		if v_0.Op != Op386LEAL {
-			break
-		}
-		off2 := v_0.AuxInt
-		sym2 := v_0.Aux
-		base := v_0.Args[0]
-		idx := v_1
-		val := v_2
-		mem := v_3
-		if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared)) {
-			break
-		}
-		v.reset(Op386ORLmodifyidx4)
-		v.AuxInt = off1 + off2
-		v.Aux = mergeSym(sym1, sym2)
-		v.AddArg4(base, idx, val, mem)
-		return true
-	}
-	// match: (ORLmodifyidx4 [off] {sym} ptr idx (MOVLconst [c]) mem)
-	// cond: validValAndOff(c,off)
-	// result: (ORLconstmodifyidx4 [makeValAndOff(c,off)] {sym} ptr idx mem)
-	for {
-		off := v.AuxInt
-		sym := v.Aux
-		ptr := v_0
-		idx := v_1
-		if v_2.Op != Op386MOVLconst {
-			break
-		}
-		c := v_2.AuxInt
-		mem := v_3
-		if !(validValAndOff(c, off)) {
-			break
-		}
-		v.reset(Op386ORLconstmodifyidx4)
-		v.AuxInt = makeValAndOff(c, off)
-		v.Aux = sym
-		v.AddArg3(ptr, idx, mem)
-		return true
-	}
-	return false
-}
 func rewriteValue386_Op386ROLBconst(v *Value) bool {
 	v_0 := v.Args[0]
 	// match: (ROLBconst [c] (ROLBconst [d] x))
@@ -11629,29 +8347,6 @@
 		v.AddArg3(x, ptr, mem)
 		return true
 	}
-	// match: (SUBL x l:(MOVLloadidx4 [off] {sym} ptr idx mem))
-	// cond: canMergeLoadClobber(v, l, x) && clobber(l)
-	// result: (SUBLloadidx4 x [off] {sym} ptr idx mem)
-	for {
-		x := v_0
-		l := v_1
-		if l.Op != Op386MOVLloadidx4 {
-			break
-		}
-		off := l.AuxInt
-		sym := l.Aux
-		mem := l.Args[2]
-		ptr := l.Args[0]
-		idx := l.Args[1]
-		if !(canMergeLoadClobber(v, l, x) && clobber(l)) {
-			break
-		}
-		v.reset(Op386SUBLloadidx4)
-		v.AuxInt = off
-		v.Aux = sym
-		v.AddArg4(x, ptr, idx, mem)
-		return true
-	}
 	// match: (SUBL x x)
 	// result: (MOVLconst [0])
 	for {
@@ -11759,109 +8454,6 @@
 		v.AddArg3(val, base, mem)
 		return true
 	}
-	// match: (SUBLload [off1] {sym1} val (LEAL4 [off2] {sym2} ptr idx) mem)
-	// cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-	// result: (SUBLloadidx4 [off1+off2] {mergeSym(sym1,sym2)} val ptr idx mem)
-	for {
-		off1 := v.AuxInt
-		sym1 := v.Aux
-		val := v_0
-		if v_1.Op != Op386LEAL4 {
-			break
-		}
-		off2 := v_1.AuxInt
-		sym2 := v_1.Aux
-		idx := v_1.Args[1]
-		ptr := v_1.Args[0]
-		mem := v_2
-		if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
-			break
-		}
-		v.reset(Op386SUBLloadidx4)
-		v.AuxInt = off1 + off2
-		v.Aux = mergeSym(sym1, sym2)
-		v.AddArg4(val, ptr, idx, mem)
-		return true
-	}
-	return false
-}
-func rewriteValue386_Op386SUBLloadidx4(v *Value) bool {
-	v_3 := v.Args[3]
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	config := b.Func.Config
-	// match: (SUBLloadidx4 [off1] {sym} val (ADDLconst [off2] base) idx mem)
-	// cond: is32Bit(off1+off2)
-	// result: (SUBLloadidx4 [off1+off2] {sym} val base idx mem)
-	for {
-		off1 := v.AuxInt
-		sym := v.Aux
-		val := v_0
-		if v_1.Op != Op386ADDLconst {
-			break
-		}
-		off2 := v_1.AuxInt
-		base := v_1.Args[0]
-		idx := v_2
-		mem := v_3
-		if !(is32Bit(off1 + off2)) {
-			break
-		}
-		v.reset(Op386SUBLloadidx4)
-		v.AuxInt = off1 + off2
-		v.Aux = sym
-		v.AddArg4(val, base, idx, mem)
-		return true
-	}
-	// match: (SUBLloadidx4 [off1] {sym} val base (ADDLconst [off2] idx) mem)
-	// cond: is32Bit(off1+off2*4)
-	// result: (SUBLloadidx4 [off1+off2*4] {sym} val base idx mem)
-	for {
-		off1 := v.AuxInt
-		sym := v.Aux
-		val := v_0
-		base := v_1
-		if v_2.Op != Op386ADDLconst {
-			break
-		}
-		off2 := v_2.AuxInt
-		idx := v_2.Args[0]
-		mem := v_3
-		if !(is32Bit(off1 + off2*4)) {
-			break
-		}
-		v.reset(Op386SUBLloadidx4)
-		v.AuxInt = off1 + off2*4
-		v.Aux = sym
-		v.AddArg4(val, base, idx, mem)
-		return true
-	}
-	// match: (SUBLloadidx4 [off1] {sym1} val (LEAL [off2] {sym2} base) idx mem)
-	// cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared)
-	// result: (SUBLloadidx4 [off1+off2] {mergeSym(sym1,sym2)} val base idx mem)
-	for {
-		off1 := v.AuxInt
-		sym1 := v.Aux
-		val := v_0
-		if v_1.Op != Op386LEAL {
-			break
-		}
-		off2 := v_1.AuxInt
-		sym2 := v_1.Aux
-		base := v_1.Args[0]
-		idx := v_2
-		mem := v_3
-		if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared)) {
-			break
-		}
-		v.reset(Op386SUBLloadidx4)
-		v.AuxInt = off1 + off2
-		v.Aux = mergeSym(sym1, sym2)
-		v.AddArg4(val, base, idx, mem)
-		return true
-	}
 	return false
 }
 func rewriteValue386_Op386SUBLmodify(v *Value) bool {
@@ -11917,107 +8509,6 @@
 	}
 	return false
 }
-func rewriteValue386_Op386SUBLmodifyidx4(v *Value) bool {
-	v_3 := v.Args[3]
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	config := b.Func.Config
-	// match: (SUBLmodifyidx4 [off1] {sym} (ADDLconst [off2] base) idx val mem)
-	// cond: is32Bit(off1+off2)
-	// result: (SUBLmodifyidx4 [off1+off2] {sym} base idx val mem)
-	for {
-		off1 := v.AuxInt
-		sym := v.Aux
-		if v_0.Op != Op386ADDLconst {
-			break
-		}
-		off2 := v_0.AuxInt
-		base := v_0.Args[0]
-		idx := v_1
-		val := v_2
-		mem := v_3
-		if !(is32Bit(off1 + off2)) {
-			break
-		}
-		v.reset(Op386SUBLmodifyidx4)
-		v.AuxInt = off1 + off2
-		v.Aux = sym
-		v.AddArg4(base, idx, val, mem)
-		return true
-	}
-	// match: (SUBLmodifyidx4 [off1] {sym} base (ADDLconst [off2] idx) val mem)
-	// cond: is32Bit(off1+off2*4)
-	// result: (SUBLmodifyidx4 [off1+off2*4] {sym} base idx val mem)
-	for {
-		off1 := v.AuxInt
-		sym := v.Aux
-		base := v_0
-		if v_1.Op != Op386ADDLconst {
-			break
-		}
-		off2 := v_1.AuxInt
-		idx := v_1.Args[0]
-		val := v_2
-		mem := v_3
-		if !(is32Bit(off1 + off2*4)) {
-			break
-		}
-		v.reset(Op386SUBLmodifyidx4)
-		v.AuxInt = off1 + off2*4
-		v.Aux = sym
-		v.AddArg4(base, idx, val, mem)
-		return true
-	}
-	// match: (SUBLmodifyidx4 [off1] {sym1} (LEAL [off2] {sym2} base) idx val mem)
-	// cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared)
-	// result: (SUBLmodifyidx4 [off1+off2] {mergeSym(sym1,sym2)} base idx val mem)
-	for {
-		off1 := v.AuxInt
-		sym1 := v.Aux
-		if v_0.Op != Op386LEAL {
-			break
-		}
-		off2 := v_0.AuxInt
-		sym2 := v_0.Aux
-		base := v_0.Args[0]
-		idx := v_1
-		val := v_2
-		mem := v_3
-		if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared)) {
-			break
-		}
-		v.reset(Op386SUBLmodifyidx4)
-		v.AuxInt = off1 + off2
-		v.Aux = mergeSym(sym1, sym2)
-		v.AddArg4(base, idx, val, mem)
-		return true
-	}
-	// match: (SUBLmodifyidx4 [off] {sym} ptr idx (MOVLconst [c]) mem)
-	// cond: validValAndOff(-c,off)
-	// result: (ADDLconstmodifyidx4 [makeValAndOff(-c,off)] {sym} ptr idx mem)
-	for {
-		off := v.AuxInt
-		sym := v.Aux
-		ptr := v_0
-		idx := v_1
-		if v_2.Op != Op386MOVLconst {
-			break
-		}
-		c := v_2.AuxInt
-		mem := v_3
-		if !(validValAndOff(-c, off)) {
-			break
-		}
-		v.reset(Op386ADDLconstmodifyidx4)
-		v.AuxInt = makeValAndOff(-c, off)
-		v.Aux = sym
-		v.AddArg3(ptr, idx, mem)
-		return true
-	}
-	return false
-}
 func rewriteValue386_Op386SUBSD(v *Value) bool {
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
@@ -12300,32 +8791,6 @@
 		}
 		break
 	}
-	// match: (XORL x l:(MOVLloadidx4 [off] {sym} ptr idx mem))
-	// cond: canMergeLoadClobber(v, l, x) && clobber(l)
-	// result: (XORLloadidx4 x [off] {sym} ptr idx mem)
-	for {
-		for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-			x := v_0
-			l := v_1
-			if l.Op != Op386MOVLloadidx4 {
-				continue
-			}
-			off := l.AuxInt
-			sym := l.Aux
-			mem := l.Args[2]
-			ptr := l.Args[0]
-			idx := l.Args[1]
-			if !(canMergeLoadClobber(v, l, x) && clobber(l)) {
-				continue
-			}
-			v.reset(Op386XORLloadidx4)
-			v.AuxInt = off
-			v.Aux = sym
-			v.AddArg4(x, ptr, idx, mem)
-			return true
-		}
-		break
-	}
 	// match: (XORL x x)
 	// result: (MOVLconst [0])
 	for {
@@ -12431,81 +8896,6 @@
 	}
 	return false
 }
-func rewriteValue386_Op386XORLconstmodifyidx4(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	config := b.Func.Config
-	// match: (XORLconstmodifyidx4 [valoff1] {sym} (ADDLconst [off2] base) idx mem)
-	// cond: ValAndOff(valoff1).canAdd(off2)
-	// result: (XORLconstmodifyidx4 [ValAndOff(valoff1).add(off2)] {sym} base idx mem)
-	for {
-		valoff1 := v.AuxInt
-		sym := v.Aux
-		if v_0.Op != Op386ADDLconst {
-			break
-		}
-		off2 := v_0.AuxInt
-		base := v_0.Args[0]
-		idx := v_1
-		mem := v_2
-		if !(ValAndOff(valoff1).canAdd(off2)) {
-			break
-		}
-		v.reset(Op386XORLconstmodifyidx4)
-		v.AuxInt = ValAndOff(valoff1).add(off2)
-		v.Aux = sym
-		v.AddArg3(base, idx, mem)
-		return true
-	}
-	// match: (XORLconstmodifyidx4 [valoff1] {sym} base (ADDLconst [off2] idx) mem)
-	// cond: ValAndOff(valoff1).canAdd(off2*4)
-	// result: (XORLconstmodifyidx4 [ValAndOff(valoff1).add(off2*4)] {sym} base idx mem)
-	for {
-		valoff1 := v.AuxInt
-		sym := v.Aux
-		base := v_0
-		if v_1.Op != Op386ADDLconst {
-			break
-		}
-		off2 := v_1.AuxInt
-		idx := v_1.Args[0]
-		mem := v_2
-		if !(ValAndOff(valoff1).canAdd(off2 * 4)) {
-			break
-		}
-		v.reset(Op386XORLconstmodifyidx4)
-		v.AuxInt = ValAndOff(valoff1).add(off2 * 4)
-		v.Aux = sym
-		v.AddArg3(base, idx, mem)
-		return true
-	}
-	// match: (XORLconstmodifyidx4 [valoff1] {sym1} (LEAL [off2] {sym2} base) idx mem)
-	// cond: ValAndOff(valoff1).canAdd(off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared)
-	// result: (XORLconstmodifyidx4 [ValAndOff(valoff1).add(off2)] {mergeSym(sym1,sym2)} base idx mem)
-	for {
-		valoff1 := v.AuxInt
-		sym1 := v.Aux
-		if v_0.Op != Op386LEAL {
-			break
-		}
-		off2 := v_0.AuxInt
-		sym2 := v_0.Aux
-		base := v_0.Args[0]
-		idx := v_1
-		mem := v_2
-		if !(ValAndOff(valoff1).canAdd(off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared)) {
-			break
-		}
-		v.reset(Op386XORLconstmodifyidx4)
-		v.AuxInt = ValAndOff(valoff1).add(off2)
-		v.Aux = mergeSym(sym1, sym2)
-		v.AddArg3(base, idx, mem)
-		return true
-	}
-	return false
-}
 func rewriteValue386_Op386XORLload(v *Value) bool {
 	v_2 := v.Args[2]
 	v_1 := v.Args[1]
@@ -12557,109 +8947,6 @@
 		v.AddArg3(val, base, mem)
 		return true
 	}
-	// match: (XORLload [off1] {sym1} val (LEAL4 [off2] {sym2} ptr idx) mem)
-	// cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-	// result: (XORLloadidx4 [off1+off2] {mergeSym(sym1,sym2)} val ptr idx mem)
-	for {
-		off1 := v.AuxInt
-		sym1 := v.Aux
-		val := v_0
-		if v_1.Op != Op386LEAL4 {
-			break
-		}
-		off2 := v_1.AuxInt
-		sym2 := v_1.Aux
-		idx := v_1.Args[1]
-		ptr := v_1.Args[0]
-		mem := v_2
-		if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
-			break
-		}
-		v.reset(Op386XORLloadidx4)
-		v.AuxInt = off1 + off2
-		v.Aux = mergeSym(sym1, sym2)
-		v.AddArg4(val, ptr, idx, mem)
-		return true
-	}
-	return false
-}
-func rewriteValue386_Op386XORLloadidx4(v *Value) bool {
-	v_3 := v.Args[3]
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	config := b.Func.Config
-	// match: (XORLloadidx4 [off1] {sym} val (ADDLconst [off2] base) idx mem)
-	// cond: is32Bit(off1+off2)
-	// result: (XORLloadidx4 [off1+off2] {sym} val base idx mem)
-	for {
-		off1 := v.AuxInt
-		sym := v.Aux
-		val := v_0
-		if v_1.Op != Op386ADDLconst {
-			break
-		}
-		off2 := v_1.AuxInt
-		base := v_1.Args[0]
-		idx := v_2
-		mem := v_3
-		if !(is32Bit(off1 + off2)) {
-			break
-		}
-		v.reset(Op386XORLloadidx4)
-		v.AuxInt = off1 + off2
-		v.Aux = sym
-		v.AddArg4(val, base, idx, mem)
-		return true
-	}
-	// match: (XORLloadidx4 [off1] {sym} val base (ADDLconst [off2] idx) mem)
-	// cond: is32Bit(off1+off2*4)
-	// result: (XORLloadidx4 [off1+off2*4] {sym} val base idx mem)
-	for {
-		off1 := v.AuxInt
-		sym := v.Aux
-		val := v_0
-		base := v_1
-		if v_2.Op != Op386ADDLconst {
-			break
-		}
-		off2 := v_2.AuxInt
-		idx := v_2.Args[0]
-		mem := v_3
-		if !(is32Bit(off1 + off2*4)) {
-			break
-		}
-		v.reset(Op386XORLloadidx4)
-		v.AuxInt = off1 + off2*4
-		v.Aux = sym
-		v.AddArg4(val, base, idx, mem)
-		return true
-	}
-	// match: (XORLloadidx4 [off1] {sym1} val (LEAL [off2] {sym2} base) idx mem)
-	// cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared)
-	// result: (XORLloadidx4 [off1+off2] {mergeSym(sym1,sym2)} val base idx mem)
-	for {
-		off1 := v.AuxInt
-		sym1 := v.Aux
-		val := v_0
-		if v_1.Op != Op386LEAL {
-			break
-		}
-		off2 := v_1.AuxInt
-		sym2 := v_1.Aux
-		base := v_1.Args[0]
-		idx := v_2
-		mem := v_3
-		if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared)) {
-			break
-		}
-		v.reset(Op386XORLloadidx4)
-		v.AuxInt = off1 + off2
-		v.Aux = mergeSym(sym1, sym2)
-		v.AddArg4(val, base, idx, mem)
-		return true
-	}
 	return false
 }
 func rewriteValue386_Op386XORLmodify(v *Value) bool {
@@ -12715,107 +9002,6 @@
 	}
 	return false
 }
-func rewriteValue386_Op386XORLmodifyidx4(v *Value) bool {
-	v_3 := v.Args[3]
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	config := b.Func.Config
-	// match: (XORLmodifyidx4 [off1] {sym} (ADDLconst [off2] base) idx val mem)
-	// cond: is32Bit(off1+off2)
-	// result: (XORLmodifyidx4 [off1+off2] {sym} base idx val mem)
-	for {
-		off1 := v.AuxInt
-		sym := v.Aux
-		if v_0.Op != Op386ADDLconst {
-			break
-		}
-		off2 := v_0.AuxInt
-		base := v_0.Args[0]
-		idx := v_1
-		val := v_2
-		mem := v_3
-		if !(is32Bit(off1 + off2)) {
-			break
-		}
-		v.reset(Op386XORLmodifyidx4)
-		v.AuxInt = off1 + off2
-		v.Aux = sym
-		v.AddArg4(base, idx, val, mem)
-		return true
-	}
-	// match: (XORLmodifyidx4 [off1] {sym} base (ADDLconst [off2] idx) val mem)
-	// cond: is32Bit(off1+off2*4)
-	// result: (XORLmodifyidx4 [off1+off2*4] {sym} base idx val mem)
-	for {
-		off1 := v.AuxInt
-		sym := v.Aux
-		base := v_0
-		if v_1.Op != Op386ADDLconst {
-			break
-		}
-		off2 := v_1.AuxInt
-		idx := v_1.Args[0]
-		val := v_2
-		mem := v_3
-		if !(is32Bit(off1 + off2*4)) {
-			break
-		}
-		v.reset(Op386XORLmodifyidx4)
-		v.AuxInt = off1 + off2*4
-		v.Aux = sym
-		v.AddArg4(base, idx, val, mem)
-		return true
-	}
-	// match: (XORLmodifyidx4 [off1] {sym1} (LEAL [off2] {sym2} base) idx val mem)
-	// cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared)
-	// result: (XORLmodifyidx4 [off1+off2] {mergeSym(sym1,sym2)} base idx val mem)
-	for {
-		off1 := v.AuxInt
-		sym1 := v.Aux
-		if v_0.Op != Op386LEAL {
-			break
-		}
-		off2 := v_0.AuxInt
-		sym2 := v_0.Aux
-		base := v_0.Args[0]
-		idx := v_1
-		val := v_2
-		mem := v_3
-		if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared)) {
-			break
-		}
-		v.reset(Op386XORLmodifyidx4)
-		v.AuxInt = off1 + off2
-		v.Aux = mergeSym(sym1, sym2)
-		v.AddArg4(base, idx, val, mem)
-		return true
-	}
-	// match: (XORLmodifyidx4 [off] {sym} ptr idx (MOVLconst [c]) mem)
-	// cond: validValAndOff(c,off)
-	// result: (XORLconstmodifyidx4 [makeValAndOff(c,off)] {sym} ptr idx mem)
-	for {
-		off := v.AuxInt
-		sym := v.Aux
-		ptr := v_0
-		idx := v_1
-		if v_2.Op != Op386MOVLconst {
-			break
-		}
-		c := v_2.AuxInt
-		mem := v_3
-		if !(validValAndOff(c, off)) {
-			break
-		}
-		v.reset(Op386XORLconstmodifyidx4)
-		v.AuxInt = makeValAndOff(c, off)
-		v.Aux = sym
-		v.AddArg3(ptr, idx, mem)
-		return true
-	}
-	return false
-}
 func rewriteValue386_OpConstNil(v *Value) bool {
 	// match: (ConstNil)
 	// result: (MOVLconst [0])
diff --git a/test/codegen/memops.go b/test/codegen/memops.go
index 9d18153..0df1914 100644
--- a/test/codegen/memops.go
+++ b/test/codegen/memops.go
@@ -99,46 +99,61 @@
 func idxInt8(x, y []int8, i int) {
 	var t int8
 	// amd64: `MOVBL[SZ]X\t1\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*1\), [A-Z]+[0-9]*`
+	//   386: `MOVBL[SZ]X\t1\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*1\), [A-Z]+[0-9]*`
 	t = x[i+1]
 	// amd64: `MOVB\t[A-Z]+[0-9]*, 1\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*1\)`
+	//   386: `MOVB\t[A-Z]+[0-9]*, 1\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*1\)`
 	y[i+1] = t
 	// amd64: `MOVB\t[$]77, 1\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*1\)`
+	//   386: `MOVB\t[$]77, 1\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*1\)`
 	x[i+1] = 77
 }
 
 func idxInt16(x, y []int16, i int) {
 	var t int16
 	// amd64: `MOVWL[SZ]X\t2\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*2\), [A-Z]+[0-9]*`
+	//   386: `MOVWL[SZ]X\t2\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*2\), [A-Z]+[0-9]*`
 	t = x[i+1]
 	// amd64: `MOVW\t[A-Z]+[0-9]*, 2\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*2\)`
+	//   386: `MOVW\t[A-Z]+[0-9]*, 2\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*2\)`
 	y[i+1] = t
 	// amd64: `MOVWL[SZ]X\t2\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[12]\), [A-Z]+[0-9]*`
+	//   386: `MOVWL[SZ]X\t2\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[12]\), [A-Z]+[0-9]*`
 	t = x[16*i+1]
 	// amd64: `MOVW\t[A-Z]+[0-9]*, 2\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[12]\)`
+	//   386: `MOVW\t[A-Z]+[0-9]*, 2\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[12]\)`
 	y[16*i+1] = t
 	// amd64: `MOVW\t[$]77, 2\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*2\)`
+	//   386: `MOVW\t[$]77, 2\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*2\)`
 	x[i+1] = 77
 	// amd64: `MOVW\t[$]77, 2\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[12]\)`
+	//   386: `MOVW\t[$]77, 2\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[12]\)`
 	x[16*i+1] = 77
 }
 
 func idxInt32(x, y []int32, i int) {
 	var t int32
 	// amd64: `MOVL\t4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\), [A-Z]+[0-9]*`
+	//   386: `MOVL\t4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\), [A-Z]+[0-9]*`
 	t = x[i+1]
 	// amd64: `MOVL\t[A-Z]+[0-9]*, 4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\)`
+	//   386: `MOVL\t[A-Z]+[0-9]*, 4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\)`
 	y[i+1] = t
 	// amd64: `MOVL\t4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\), [A-Z]+[0-9]*`
 	t = x[2*i+1]
 	// amd64: `MOVL\t[A-Z]+[0-9]*, 4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\)`
 	y[2*i+1] = t
 	// amd64: `MOVL\t4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[14]\), [A-Z]+[0-9]*`
+	//   386: `MOVL\t4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[14]\), [A-Z]+[0-9]*`
 	t = x[16*i+1]
 	// amd64: `MOVL\t[A-Z]+[0-9]*, 4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[14]\)`
+	//   386: `MOVL\t[A-Z]+[0-9]*, 4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[14]\)`
 	y[16*i+1] = t
 	// amd64: `MOVL\t[$]77, 4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\)`
+	//   386: `MOVL\t[$]77, 4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\)`
 	x[i+1] = 77
 	// amd64: `MOVL\t[$]77, 4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[14]\)`
+	//   386: `MOVL\t[$]77, 4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[14]\)`
 	x[16*i+1] = 77
 }
 
@@ -160,24 +175,71 @@
 
 func idxFloat32(x, y []float32, i int) {
 	var t float32
-	// amd64: `MOVSS\t4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\), X[0-9]+`
+	//    amd64: `MOVSS\t4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\), X[0-9]+`
+	// 386/sse2: `MOVSS\t4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\), X[0-9]+`
 	t = x[i+1]
-	// amd64: `MOVSS\tX[0-9]+, 4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\)`
+	//    amd64: `MOVSS\tX[0-9]+, 4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\)`
+	// 386/sse2: `MOVSS\tX[0-9]+, 4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\)`
 	y[i+1] = t
-	// amd64: `MOVSS\t4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[14]\), X[0-9]+`
+	//    amd64: `MOVSS\t4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[14]\), X[0-9]+`
+	// 386/sse2: `MOVSS\t4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[14]\), X[0-9]+`
 	t = x[16*i+1]
-	// amd64: `MOVSS\tX[0-9]+, 4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[14]\)`
+	//    amd64: `MOVSS\tX[0-9]+, 4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[14]\)`
+	// 386/sse2: `MOVSS\tX[0-9]+, 4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[14]\)`
 	y[16*i+1] = t
 }
 
 func idxFloat64(x, y []float64, i int) {
 	var t float64
-	// amd64: `MOVSD\t8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\), X[0-9]+`
+	//    amd64: `MOVSD\t8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\), X[0-9]+`
+	// 386/sse2: `MOVSD\t8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\), X[0-9]+`
 	t = x[i+1]
-	// amd64: `MOVSD\tX[0-9]+, 8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\)`
+	//    amd64: `MOVSD\tX[0-9]+, 8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\)`
+	// 386/sse2: `MOVSD\tX[0-9]+, 8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\)`
 	y[i+1] = t
-	// amd64: `MOVSD\t8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[18]\), X[0-9]+`
+	//    amd64: `MOVSD\t8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[18]\), X[0-9]+`
+	// 386/sse2: `MOVSD\t8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[18]\), X[0-9]+`
 	t = x[16*i+1]
-	// amd64: `MOVSD\tX[0-9]+, 8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[18]\)`
+	//    amd64: `MOVSD\tX[0-9]+, 8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[18]\)`
+	// 386/sse2: `MOVSD\tX[0-9]+, 8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[18]\)`
 	y[16*i+1] = t
 }
+
+func idxLoadPlusOp(x []int32, i int) int32 {
+	s := x[0]
+	// 386: `ADDL\t4\([A-Z]+\)\([A-Z]+\*4\), [A-Z]+`
+	s += x[i+1]
+	// 386: `SUBL\t8\([A-Z]+\)\([A-Z]+\*4\), [A-Z]+`
+	s -= x[i+2]
+	// 386: `IMULL\t12\([A-Z]+\)\([A-Z]+\*4\), [A-Z]+`
+	s *= x[i+3]
+	// 386: `ANDL\t16\([A-Z]+\)\([A-Z]+\*4\), [A-Z]+`
+	s &= x[i+4]
+	// 386: `ORL\t20\([A-Z]+\)\([A-Z]+\*4\), [A-Z]+`
+	s |= x[i+5]
+	// 386: `XORL\t24\([A-Z]+\)\([A-Z]+\*4\), [A-Z]+`
+	s ^= x[i+6]
+	return s
+}
+
+func idxStorePlusOp(x []int32, i int, v int32) {
+	// 386: `ADDL\t[A-Z]+, 4\([A-Z]+\)\([A-Z]+\*4\)`
+	x[i+1] += v
+	// 386: `SUBL\t[A-Z]+, 8\([A-Z]+\)\([A-Z]+\*4\)`
+	x[i+2] -= v
+	// 386: `ANDL\t[A-Z]+, 12\([A-Z]+\)\([A-Z]+\*4\)`
+	x[i+3] &= v
+	// 386: `ORL\t[A-Z]+, 16\([A-Z]+\)\([A-Z]+\*4\)`
+	x[i+4] |= v
+	// 386: `XORL\t[A-Z]+, 20\([A-Z]+\)\([A-Z]+\*4\)`
+	x[i+5] ^= v
+
+	// 386: `ADDL\t[$]77, 24\([A-Z]+\)\([A-Z]+\*4\)`
+	x[i+6] += 77
+	// 386: `ANDL\t[$]77, 28\([A-Z]+\)\([A-Z]+\*4\)`
+	x[i+7] &= 77
+	// 386: `ORL\t[$]77, 32\([A-Z]+\)\([A-Z]+\*4\)`
+	x[i+8] |= 77
+	// 386: `XORL\t[$]77, 36\([A-Z]+\)\([A-Z]+\*4\)`
+	x[i+9] ^= 77
+}