cmd/compile: convert 386 port to use addressing modes pass (take 2)

Retrying CL 222782, with a fix that will hopefully stop the random crashing.

The issue with the previous CL is that it does pointer arithmetic
in a way that may briefly generate an out-of-bounds pointer. If an
interrupt happens to occur in that state, the referenced object may
be collected incorrectly.

Suppose there was code that did s[x+c].  The previous CL had a rule
to the effect of ptr + (x + c) -> c + (ptr + x).  But ptr+x is not
guaranteed to point to the same object as ptr. In contrast,
ptr+(x+c) is guaranteed to point to the same object as ptr, because
we would have already checked that x+c is in bounds.

For example, strconv.trim used to have this code:
  MOVZX -0x1(BX)(DX*1), BP
  CMPL $0x30, AL
After CL 222782, it had this code:
  LEAL 0(BX)(DX*1), BP
  CMPB $0x30, -0x1(BP)

An interrupt between those last two instructions could see BP pointing
outside the backing store of the slice involved.

It's really hard to actually demonstrate a bug. First, you need to
have an interrupt occur at exactly the right time. Then, there must
be no other pointers to the object in question. Since the interrupted
frame will be scanned conservatively, there can't even be a dead
pointer in another register or on the stack. (In the example above,
a bug can't happen because BX still holds the original pointer.)
Then, the object in question needs to be collected (or at least
scanned?) before the interrupted code continues.

This CL needs to handle load combining somewhat differently than CL 222782
because of the new restriction on arithmetic. That's the only real
difference (other than removing the bad rules) from that old CL.

This bug is also present in the amd64 rewrite rules, and we haven't
seen any crashing as a result. I will fix up that code similarly to
this one in a separate CL.

Update #37881

Change-Id: I5f0d584d9bef4696bfe89a61ef0a27c8d507329f
Reviewed-on: https://go-review.googlesource.com/c/go/+/225798
Run-TryBot: Keith Randall <khr@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Cherry Zhang <cherryyz@google.com>
diff --git a/src/cmd/compile/internal/ssa/addressingmodes.go b/src/cmd/compile/internal/ssa/addressingmodes.go
index 8874b56..2af8a4d 100644
--- a/src/cmd/compile/internal/ssa/addressingmodes.go
+++ b/src/cmd/compile/internal/ssa/addressingmodes.go
@@ -11,8 +11,8 @@
 	default:
 		// Most architectures can't do this.
 		return
-	case "amd64":
-		// TODO: 386, s390x?
+	case "amd64", "386":
+		// TODO: s390x?
 	}
 
 	var tmp []*Value
@@ -21,7 +21,17 @@
 			if !combineFirst[v.Op] {
 				continue
 			}
-			p := v.Args[0]
+			// All matched operations have the pointer in arg[0].
+			// All results have the pointer in arg[0] and the index in arg[1].
+			// *Except* for operations which update a register,
+			// which are marked with resultInArg0. Those have
+			// the pointer in arg[1], and the corresponding result op
+			// has the pointer in arg[1] and the index in arg[2].
+			ptrIndex := 0
+			if opcodeTable[v.Op].resultInArg0 {
+				ptrIndex = 1
+			}
+			p := v.Args[ptrIndex]
 			c, ok := combine[[2]Op{v.Op, p.Op}]
 			if !ok {
 				continue
@@ -71,10 +81,11 @@
 				f.Fatalf("unknown aux combining for %s and %s\n", v.Op, p.Op)
 			}
 			// Combine the operations.
-			tmp = append(tmp[:0], v.Args[1:]...)
+			tmp = append(tmp[:0], v.Args[:ptrIndex]...)
+			tmp = append(tmp, p.Args...)
+			tmp = append(tmp, v.Args[ptrIndex+1:]...)
 			v.resetArgs()
 			v.Op = c
-			v.AddArgs(p.Args...)
 			v.AddArgs(tmp...)
 		}
 	}
@@ -97,6 +108,7 @@
 //   x.Args[0].Args + x.Args[1:]
 // Additionally, the Aux/AuxInt from x.Args[0] is merged into x.
 var combine = map[[2]Op]Op{
+	// amd64
 	[2]Op{OpAMD64MOVBload, OpAMD64ADDQ}:  OpAMD64MOVBloadidx1,
 	[2]Op{OpAMD64MOVWload, OpAMD64ADDQ}:  OpAMD64MOVWloadidx1,
 	[2]Op{OpAMD64MOVLload, OpAMD64ADDQ}:  OpAMD64MOVLloadidx1,
@@ -150,5 +162,64 @@
 	[2]Op{OpAMD64MOVQstoreconst, OpAMD64LEAQ1}: OpAMD64MOVQstoreconstidx1,
 	[2]Op{OpAMD64MOVQstoreconst, OpAMD64LEAQ8}: OpAMD64MOVQstoreconstidx8,
 
-	// TODO: 386
+	// 386
+	[2]Op{Op386MOVBload, Op386ADDL}:  Op386MOVBloadidx1,
+	[2]Op{Op386MOVWload, Op386ADDL}:  Op386MOVWloadidx1,
+	[2]Op{Op386MOVLload, Op386ADDL}:  Op386MOVLloadidx1,
+	[2]Op{Op386MOVSSload, Op386ADDL}: Op386MOVSSloadidx1,
+	[2]Op{Op386MOVSDload, Op386ADDL}: Op386MOVSDloadidx1,
+
+	[2]Op{Op386MOVBstore, Op386ADDL}:  Op386MOVBstoreidx1,
+	[2]Op{Op386MOVWstore, Op386ADDL}:  Op386MOVWstoreidx1,
+	[2]Op{Op386MOVLstore, Op386ADDL}:  Op386MOVLstoreidx1,
+	[2]Op{Op386MOVSSstore, Op386ADDL}: Op386MOVSSstoreidx1,
+	[2]Op{Op386MOVSDstore, Op386ADDL}: Op386MOVSDstoreidx1,
+
+	[2]Op{Op386MOVBstoreconst, Op386ADDL}: Op386MOVBstoreconstidx1,
+	[2]Op{Op386MOVWstoreconst, Op386ADDL}: Op386MOVWstoreconstidx1,
+	[2]Op{Op386MOVLstoreconst, Op386ADDL}: Op386MOVLstoreconstidx1,
+
+	[2]Op{Op386MOVBload, Op386LEAL1}:  Op386MOVBloadidx1,
+	[2]Op{Op386MOVWload, Op386LEAL1}:  Op386MOVWloadidx1,
+	[2]Op{Op386MOVWload, Op386LEAL2}:  Op386MOVWloadidx2,
+	[2]Op{Op386MOVLload, Op386LEAL1}:  Op386MOVLloadidx1,
+	[2]Op{Op386MOVLload, Op386LEAL4}:  Op386MOVLloadidx4,
+	[2]Op{Op386MOVSSload, Op386LEAL1}: Op386MOVSSloadidx1,
+	[2]Op{Op386MOVSSload, Op386LEAL4}: Op386MOVSSloadidx4,
+	[2]Op{Op386MOVSDload, Op386LEAL1}: Op386MOVSDloadidx1,
+	[2]Op{Op386MOVSDload, Op386LEAL8}: Op386MOVSDloadidx8,
+
+	[2]Op{Op386MOVBstore, Op386LEAL1}:  Op386MOVBstoreidx1,
+	[2]Op{Op386MOVWstore, Op386LEAL1}:  Op386MOVWstoreidx1,
+	[2]Op{Op386MOVWstore, Op386LEAL2}:  Op386MOVWstoreidx2,
+	[2]Op{Op386MOVLstore, Op386LEAL1}:  Op386MOVLstoreidx1,
+	[2]Op{Op386MOVLstore, Op386LEAL4}:  Op386MOVLstoreidx4,
+	[2]Op{Op386MOVSSstore, Op386LEAL1}: Op386MOVSSstoreidx1,
+	[2]Op{Op386MOVSSstore, Op386LEAL4}: Op386MOVSSstoreidx4,
+	[2]Op{Op386MOVSDstore, Op386LEAL1}: Op386MOVSDstoreidx1,
+	[2]Op{Op386MOVSDstore, Op386LEAL8}: Op386MOVSDstoreidx8,
+
+	[2]Op{Op386MOVBstoreconst, Op386LEAL1}: Op386MOVBstoreconstidx1,
+	[2]Op{Op386MOVWstoreconst, Op386LEAL1}: Op386MOVWstoreconstidx1,
+	[2]Op{Op386MOVWstoreconst, Op386LEAL2}: Op386MOVWstoreconstidx2,
+	[2]Op{Op386MOVLstoreconst, Op386LEAL1}: Op386MOVLstoreconstidx1,
+	[2]Op{Op386MOVLstoreconst, Op386LEAL4}: Op386MOVLstoreconstidx4,
+
+	[2]Op{Op386ADDLload, Op386LEAL4}: Op386ADDLloadidx4,
+	[2]Op{Op386SUBLload, Op386LEAL4}: Op386SUBLloadidx4,
+	[2]Op{Op386MULLload, Op386LEAL4}: Op386MULLloadidx4,
+	[2]Op{Op386ANDLload, Op386LEAL4}: Op386ANDLloadidx4,
+	[2]Op{Op386ORLload, Op386LEAL4}:  Op386ORLloadidx4,
+	[2]Op{Op386XORLload, Op386LEAL4}: Op386XORLloadidx4,
+
+	[2]Op{Op386ADDLmodify, Op386LEAL4}: Op386ADDLmodifyidx4,
+	[2]Op{Op386SUBLmodify, Op386LEAL4}: Op386SUBLmodifyidx4,
+	[2]Op{Op386ANDLmodify, Op386LEAL4}: Op386ANDLmodifyidx4,
+	[2]Op{Op386ORLmodify, Op386LEAL4}:  Op386ORLmodifyidx4,
+	[2]Op{Op386XORLmodify, Op386LEAL4}: Op386XORLmodifyidx4,
+
+	[2]Op{Op386ADDLconstmodify, Op386LEAL4}: Op386ADDLconstmodifyidx4,
+	[2]Op{Op386ANDLconstmodify, Op386LEAL4}: Op386ANDLconstmodifyidx4,
+	[2]Op{Op386ORLconstmodify, Op386LEAL4}:  Op386ORLconstmodifyidx4,
+	[2]Op{Op386XORLconstmodify, Op386LEAL4}: Op386XORLconstmodifyidx4,
 }
diff --git a/src/cmd/compile/internal/ssa/gen/386.rules b/src/cmd/compile/internal/ssa/gen/386.rules
index 64a6cba..2c48994 100644
--- a/src/cmd/compile/internal/ssa/gen/386.rules
+++ b/src/cmd/compile/internal/ssa/gen/386.rules
@@ -588,10 +588,6 @@
 (MOVWLSX x:(MOVWload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVWLSXload <v.Type> [off] {sym} ptr mem)
 (MOVWLZX x:(MOVWload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVWload <v.Type> [off] {sym} ptr mem)
 
-(MOVBLZX x:(MOVBloadidx1 [off] {sym} ptr idx mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVBloadidx1 <v.Type> [off] {sym} ptr idx mem)
-(MOVWLZX x:(MOVWloadidx1 [off] {sym} ptr idx mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVWloadidx1 <v.Type> [off] {sym} ptr idx mem)
-(MOVWLZX x:(MOVWloadidx2 [off] {sym} ptr idx mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVWloadidx2 <v.Type> [off] {sym} ptr idx mem)
-
 // replace load from same location as preceding store with zero/sign extension (or copy in case of full width)
 (MOVBload [off] {sym} ptr (MOVBstore [off2] {sym2} ptr2 x _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) -> (MOVBLZX x)
 (MOVWload [off] {sym} ptr (MOVWstore [off2] {sym2} ptr2 x _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) -> (MOVWLZX x)
@@ -611,34 +607,22 @@
 
 // fold constants into memory operations
 // Note that this is not always a good idea because if not all the uses of
-// the ADDQconst get eliminated, we still have to compute the ADDQconst and we now
-// have potentially two live values (ptr and (ADDQconst [off] ptr)) instead of one.
+// the ADDLconst get eliminated, we still have to compute the ADDLconst and we now
+// have potentially two live values (ptr and (ADDLconst [off] ptr)) instead of one.
 // Nevertheless, let's do it!
 (MOV(L|W|B|SS|SD)load  [off1] {sym} (ADDLconst [off2] ptr) mem) && is32Bit(off1+off2) -> (MOV(L|W|B|SS|SD)load  [off1+off2] {sym} ptr mem)
 (MOV(L|W|B|SS|SD)store  [off1] {sym} (ADDLconst [off2] ptr) val mem) && is32Bit(off1+off2) -> (MOV(L|W|B|SS|SD)store  [off1+off2] {sym} ptr val mem)
 
 ((ADD|SUB|MUL|AND|OR|XOR)Lload [off1] {sym} val (ADDLconst [off2] base) mem) && is32Bit(off1+off2) ->
 	((ADD|SUB|MUL|AND|OR|XOR)Lload [off1+off2] {sym} val base mem)
-((ADD|SUB|MUL|AND|OR|XOR)Lloadidx4 [off1] {sym} val (ADDLconst [off2] base) idx mem) && is32Bit(off1+off2) ->
-	((ADD|SUB|MUL|AND|OR|XOR)Lloadidx4 [off1+off2] {sym} val base idx mem)
-((ADD|SUB|MUL|AND|OR|XOR)Lloadidx4 [off1] {sym} val base (ADDLconst [off2] idx) mem) && is32Bit(off1+off2*4) ->
-	((ADD|SUB|MUL|AND|OR|XOR)Lloadidx4 [off1+off2*4] {sym} val base idx mem)
 ((ADD|SUB|MUL|DIV)SSload [off1] {sym} val (ADDLconst [off2] base) mem) && is32Bit(off1+off2) ->
 	((ADD|SUB|MUL|DIV)SSload [off1+off2] {sym} val base mem)
 ((ADD|SUB|MUL|DIV)SDload [off1] {sym} val (ADDLconst [off2] base) mem) && is32Bit(off1+off2) ->
 	((ADD|SUB|MUL|DIV)SDload [off1+off2] {sym} val base mem)
 ((ADD|SUB|AND|OR|XOR)Lmodify [off1] {sym} (ADDLconst [off2] base) val mem) && is32Bit(off1+off2) ->
 	((ADD|SUB|AND|OR|XOR)Lmodify [off1+off2] {sym} base val mem)
-((ADD|SUB|AND|OR|XOR)Lmodifyidx4 [off1] {sym} (ADDLconst [off2] base) idx val mem) && is32Bit(off1+off2) ->
-	((ADD|SUB|AND|OR|XOR)Lmodifyidx4 [off1+off2] {sym} base idx val mem)
-((ADD|SUB|AND|OR|XOR)Lmodifyidx4 [off1] {sym} base (ADDLconst [off2] idx) val mem) && is32Bit(off1+off2*4) ->
-	((ADD|SUB|AND|OR|XOR)Lmodifyidx4 [off1+off2*4] {sym} base idx val mem)
 ((ADD|AND|OR|XOR)Lconstmodify [valoff1] {sym} (ADDLconst [off2] base) mem) && ValAndOff(valoff1).canAdd(off2) ->
 	((ADD|AND|OR|XOR)Lconstmodify [ValAndOff(valoff1).add(off2)] {sym} base mem)
-((ADD|AND|OR|XOR)Lconstmodifyidx4 [valoff1] {sym} (ADDLconst [off2] base) idx mem) && ValAndOff(valoff1).canAdd(off2) ->
-	((ADD|AND|OR|XOR)Lconstmodifyidx4 [ValAndOff(valoff1).add(off2)] {sym} base idx mem)
-((ADD|AND|OR|XOR)Lconstmodifyidx4 [valoff1] {sym} base (ADDLconst [off2] idx) mem) && ValAndOff(valoff1).canAdd(off2*4) ->
-	((ADD|AND|OR|XOR)Lconstmodifyidx4 [ValAndOff(valoff1).add(off2*4)] {sym} base idx mem)
 
 // Fold constants into stores.
 (MOVLstore [off] {sym} ptr (MOVLconst [c]) mem) && validOff(off) ->
@@ -652,7 +636,7 @@
 (MOV(L|W|B)storeconst [sc] {s} (ADDLconst [off] ptr) mem) && ValAndOff(sc).canAdd(off) ->
 	(MOV(L|W|B)storeconst [ValAndOff(sc).add(off)] {s} ptr mem)
 
-// We need to fold LEAQ into the MOVx ops so that the live variable analysis knows
+// We need to fold LEAL into the MOVx ops so that the live variable analysis knows
 // what variables are being read/written by the ops.
 // Note: we turn off this merging for operations on globals when building
 // position-independent code (when Flag_shared is set).
@@ -672,31 +656,9 @@
   && (ptr.Op != OpSB || !config.ctxt.Flag_shared) ->
 	(MOV(L|W|B)storeconst [ValAndOff(sc).add(off)] {mergeSym(sym1, sym2)} ptr mem)
 
-// generating indexed loads and stores
-(MOV(B|W|L|SS|SD)load [off1] {sym1} (LEAL1 [off2] {sym2} ptr idx) mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
-	(MOV(B|W|L|SS|SD)loadidx1 [off1+off2] {mergeSym(sym1,sym2)} ptr idx mem)
-(MOVWload [off1] {sym1} (LEAL2 [off2] {sym2} ptr idx) mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
-	(MOVWloadidx2 [off1+off2] {mergeSym(sym1,sym2)} ptr idx mem)
-(MOV(L|SS)load [off1] {sym1} (LEAL4 [off2] {sym2} ptr idx) mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
-	(MOV(L|SS)loadidx4 [off1+off2] {mergeSym(sym1,sym2)} ptr idx mem)
-(MOVSDload [off1] {sym1} (LEAL8 [off2] {sym2} ptr idx) mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
-	(MOVSDloadidx8 [off1+off2] {mergeSym(sym1,sym2)} ptr idx mem)
-
-(MOV(B|W|L|SS|SD)store [off1] {sym1} (LEAL1 [off2] {sym2} ptr idx) val mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
-	(MOV(B|W|L|SS|SD)storeidx1 [off1+off2] {mergeSym(sym1,sym2)} ptr idx val mem)
-(MOVWstore [off1] {sym1} (LEAL2 [off2] {sym2} ptr idx) val mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
-	(MOVWstoreidx2 [off1+off2] {mergeSym(sym1,sym2)} ptr idx val mem)
-(MOV(L|SS)store [off1] {sym1} (LEAL4 [off2] {sym2} ptr idx) val mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
-	(MOV(L|SS)storeidx4 [off1+off2] {mergeSym(sym1,sym2)} ptr idx val mem)
-(MOVSDstore [off1] {sym1} (LEAL8 [off2] {sym2} ptr idx) val mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
-	(MOVSDstoreidx8 [off1+off2] {mergeSym(sym1,sym2)} ptr idx val mem)
-
 ((ADD|SUB|MUL|AND|OR|XOR)Lload [off1] {sym1} val (LEAL [off2] {sym2} base) mem)
 	&& is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared) ->
 	((ADD|SUB|MUL|AND|OR|XOR)Lload [off1+off2] {mergeSym(sym1,sym2)} val base mem)
-((ADD|SUB|MUL|AND|OR|XOR)Lloadidx4 [off1] {sym1} val (LEAL [off2] {sym2} base) idx mem)
-	&& is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared) ->
-	((ADD|SUB|MUL|AND|OR|XOR)Lloadidx4 [off1+off2] {mergeSym(sym1,sym2)} val base idx mem)
 ((ADD|SUB|MUL|DIV)SSload [off1] {sym1} val (LEAL [off2] {sym2} base) mem)
 	&& is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared) ->
 	((ADD|SUB|MUL|DIV)SSload [off1+off2] {mergeSym(sym1,sym2)} val base mem)
@@ -706,97 +668,20 @@
 ((ADD|SUB|AND|OR|XOR)Lmodify [off1] {sym1} (LEAL [off2] {sym2} base) val mem)
 	&& is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared) ->
 	((ADD|SUB|AND|OR|XOR)Lmodify [off1+off2] {mergeSym(sym1,sym2)} base val mem)
-((ADD|SUB|AND|OR|XOR)Lmodifyidx4 [off1] {sym1} (LEAL [off2] {sym2} base) idx val mem)
-	&& is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared) ->
-	((ADD|SUB|AND|OR|XOR)Lmodifyidx4 [off1+off2] {mergeSym(sym1,sym2)} base idx val mem)
 ((ADD|AND|OR|XOR)Lconstmodify [valoff1] {sym1} (LEAL [off2] {sym2} base) mem)
 	&& ValAndOff(valoff1).canAdd(off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared) ->
 	((ADD|AND|OR|XOR)Lconstmodify [ValAndOff(valoff1).add(off2)] {mergeSym(sym1,sym2)} base mem)
-((ADD|AND|OR|XOR)Lconstmodifyidx4 [valoff1] {sym1} (LEAL [off2] {sym2} base) idx mem)
-	&& ValAndOff(valoff1).canAdd(off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared) ->
-	((ADD|AND|OR|XOR)Lconstmodifyidx4 [ValAndOff(valoff1).add(off2)] {mergeSym(sym1,sym2)} base idx mem)
-
-(MOV(B|W|L|SS|SD)load [off] {sym} (ADDL ptr idx) mem) && ptr.Op != OpSB -> (MOV(B|W|L|SS|SD)loadidx1 [off] {sym} ptr idx mem)
-(MOV(B|W|L|SS|SD)store [off] {sym} (ADDL ptr idx) val mem) && ptr.Op != OpSB -> (MOV(B|W|L|SS|SD)storeidx1 [off] {sym} ptr idx val mem)
-
-(MOV(B|W|L)storeconst [x] {sym1} (LEAL1 [off] {sym2} ptr idx) mem) && canMergeSym(sym1, sym2) ->
-	(MOV(B|W|L)storeconstidx1 [ValAndOff(x).add(off)] {mergeSym(sym1,sym2)} ptr idx mem)
-(MOVWstoreconst [x] {sym1} (LEAL2 [off] {sym2} ptr idx) mem) && canMergeSym(sym1, sym2) ->
-	(MOVWstoreconstidx2 [ValAndOff(x).add(off)] {mergeSym(sym1,sym2)} ptr idx mem)
-(MOVLstoreconst [x] {sym1} (LEAL4 [off] {sym2} ptr idx) mem) && canMergeSym(sym1, sym2) ->
-	(MOVLstoreconstidx4 [ValAndOff(x).add(off)] {mergeSym(sym1,sym2)} ptr idx mem)
-
-(MOV(B|W|L)storeconst [x] {sym} (ADDL ptr idx) mem) -> (MOV(B|W|L)storeconstidx1 [x] {sym} ptr idx mem)
-
-// combine SHLL into indexed loads and stores
-(MOVWloadidx1 [c] {sym} ptr (SHLLconst [1] idx) mem) -> (MOVWloadidx2 [c] {sym} ptr idx mem)
-(MOVLloadidx1 [c] {sym} ptr (SHLLconst [2] idx) mem) -> (MOVLloadidx4 [c] {sym} ptr idx mem)
-(MOVWstoreidx1 [c] {sym} ptr (SHLLconst [1] idx) val mem) -> (MOVWstoreidx2 [c] {sym} ptr idx val mem)
-(MOVLstoreidx1 [c] {sym} ptr (SHLLconst [2] idx) val mem) -> (MOVLstoreidx4 [c] {sym} ptr idx val mem)
-(MOVWstoreconstidx1 [c] {sym} ptr (SHLLconst [1] idx) mem) -> (MOVWstoreconstidx2 [c] {sym} ptr idx mem)
-(MOVLstoreconstidx1 [c] {sym} ptr (SHLLconst [2] idx) mem) -> (MOVLstoreconstidx4 [c] {sym} ptr idx mem)
-
-// combine ADDL into indexed loads and stores
-(MOV(B|W|L|SS|SD)loadidx1 [c] {sym} (ADDLconst [d] ptr) idx mem) -> (MOV(B|W|L|SS|SD)loadidx1 [int64(int32(c+d))] {sym} ptr idx mem)
-(MOVWloadidx2 [c] {sym} (ADDLconst [d] ptr) idx mem) -> (MOVWloadidx2 [int64(int32(c+d))] {sym} ptr idx mem)
-(MOV(L|SS)loadidx4 [c] {sym} (ADDLconst [d] ptr) idx mem) -> (MOV(L|SS)loadidx4 [int64(int32(c+d))] {sym} ptr idx mem)
-(MOVSDloadidx8 [c] {sym} (ADDLconst [d] ptr) idx mem) -> (MOVSDloadidx8 [int64(int32(c+d))] {sym} ptr idx mem)
-
-(MOV(B|W|L|SS|SD)storeidx1 [c] {sym} (ADDLconst [d] ptr) idx val mem) -> (MOV(B|W|L|SS|SD)storeidx1 [int64(int32(c+d))] {sym} ptr idx val mem)
-(MOVWstoreidx2 [c] {sym} (ADDLconst [d] ptr) idx val mem) -> (MOVWstoreidx2 [int64(int32(c+d))] {sym} ptr idx val mem)
-(MOV(L|SS)storeidx4 [c] {sym} (ADDLconst [d] ptr) idx val mem) -> (MOV(L|SS)storeidx4 [int64(int32(c+d))] {sym} ptr idx val mem)
-(MOVSDstoreidx8 [c] {sym} (ADDLconst [d] ptr) idx val mem) -> (MOVSDstoreidx8 [int64(int32(c+d))] {sym} ptr idx val mem)
-
-(MOV(B|W|L|SS|SD)loadidx1 [c] {sym} ptr (ADDLconst [d] idx) mem) -> (MOV(B|W|L|SS|SD)loadidx1 [int64(int32(c+d))] {sym} ptr idx mem)
-(MOVWloadidx2 [c] {sym} ptr (ADDLconst [d] idx) mem) -> (MOVWloadidx2 [int64(int32(c+2*d))] {sym} ptr idx mem)
-(MOV(L|SS)loadidx4 [c] {sym} ptr (ADDLconst [d] idx) mem) -> (MOV(L|SS)loadidx4 [int64(int32(c+4*d))] {sym} ptr idx mem)
-(MOVSDloadidx8 [c] {sym} ptr (ADDLconst [d] idx) mem) -> (MOVSDloadidx8 [int64(int32(c+8*d))] {sym} ptr idx mem)
-
-(MOV(B|W|L|SS|SD)storeidx1 [c] {sym} ptr (ADDLconst [d] idx) val mem) -> (MOV(B|W|L|SS|SD)storeidx1  [int64(int32(c+d))]   {sym} ptr idx val mem)
-(MOVWstoreidx2 [c] {sym} ptr (ADDLconst [d] idx) val mem) -> (MOVWstoreidx2  [int64(int32(c+2*d))] {sym} ptr idx val mem)
-(MOV(L|SS)storeidx4 [c] {sym} ptr (ADDLconst [d] idx) val mem) -> (MOV(L|SS)storeidx4 [int64(int32(c+4*d))] {sym} ptr idx val mem)
-(MOVSDstoreidx8 [c] {sym} ptr (ADDLconst [d] idx) val mem) -> (MOVSDstoreidx8 [int64(int32(c+8*d))] {sym} ptr idx val mem)
 
 // Merge load/store to op
 ((ADD|AND|OR|XOR|SUB|MUL)L x l:(MOVLload [off] {sym} ptr mem)) && canMergeLoadClobber(v, l, x) && clobber(l) -> ((ADD|AND|OR|XOR|SUB|MUL)Lload x [off] {sym} ptr mem)
-((ADD|AND|OR|XOR|SUB|MUL)L x l:(MOVLloadidx4 [off] {sym} ptr idx mem)) && canMergeLoadClobber(v, l, x) && clobber(l) ->
-	((ADD|AND|OR|XOR|SUB|MUL)Lloadidx4 x [off] {sym} ptr idx mem)
-((ADD|SUB|MUL|AND|OR|XOR)Lload [off1] {sym1} val (LEAL4 [off2] {sym2} ptr idx) mem)
-	&& is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
-	((ADD|SUB|MUL|AND|OR|XOR)Lloadidx4 [off1+off2] {mergeSym(sym1,sym2)} val ptr idx mem)
 ((ADD|SUB|MUL|DIV)SD x l:(MOVSDload [off] {sym} ptr mem)) && canMergeLoadClobber(v, l, x) && !config.use387 && clobber(l) -> ((ADD|SUB|MUL|DIV)SDload x [off] {sym} ptr mem)
 ((ADD|SUB|MUL|DIV)SS x l:(MOVSSload [off] {sym} ptr mem)) && canMergeLoadClobber(v, l, x) && !config.use387 && clobber(l) -> ((ADD|SUB|MUL|DIV)SSload x [off] {sym} ptr mem)
 (MOVLstore {sym} [off] ptr y:((ADD|AND|OR|XOR)Lload x [off] {sym} ptr mem) mem) && y.Uses==1 && clobber(y) -> ((ADD|AND|OR|XOR)Lmodify [off] {sym} ptr x mem)
 (MOVLstore {sym} [off] ptr y:((ADD|SUB|AND|OR|XOR)L l:(MOVLload [off] {sym} ptr mem) x) mem) && y.Uses==1 && l.Uses==1 && clobber(y, l) ->
 	((ADD|SUB|AND|OR|XOR)Lmodify [off] {sym} ptr x mem)
-(MOVLstoreidx4 {sym} [off] ptr idx y:((ADD|AND|OR|XOR)Lloadidx4 x [off] {sym} ptr idx mem) mem) && y.Uses==1 && clobber(y) ->
-	((ADD|AND|OR|XOR)Lmodifyidx4 [off] {sym} ptr idx x mem)
-(MOVLstoreidx4 {sym} [off] ptr idx y:((ADD|SUB|AND|OR|XOR)L l:(MOVLloadidx4 [off] {sym} ptr idx mem) x) mem) && y.Uses==1 && l.Uses==1 && clobber(y, l) ->
-	((ADD|SUB|AND|OR|XOR)Lmodifyidx4 [off] {sym} ptr idx x mem)
 (MOVLstore {sym} [off] ptr y:((ADD|AND|OR|XOR)Lconst [c] l:(MOVLload [off] {sym} ptr mem)) mem)
 	&& y.Uses==1 && l.Uses==1 && clobber(y, l) && validValAndOff(c,off) ->
 	((ADD|AND|OR|XOR)Lconstmodify [makeValAndOff(c,off)] {sym} ptr mem)
-(MOVLstoreidx4 {sym} [off] ptr idx y:((ADD|AND|OR|XOR)Lconst [c] l:(MOVLloadidx4 [off] {sym} ptr idx mem)) mem)
-	&& y.Uses==1 && l.Uses==1 && clobber(y, l) && validValAndOff(c,off) ->
-	((ADD|AND|OR|XOR)Lconstmodifyidx4 [makeValAndOff(c,off)] {sym} ptr idx mem)
-((ADD|AND|OR|XOR)Lmodifyidx4 [off] {sym} ptr idx (MOVLconst [c]) mem) && validValAndOff(c,off) ->
-	((ADD|AND|OR|XOR)Lconstmodifyidx4 [makeValAndOff(c,off)] {sym} ptr idx mem)
-(SUBLmodifyidx4 [off] {sym} ptr idx (MOVLconst [c]) mem) && validValAndOff(-c,off) ->
-	(ADDLconstmodifyidx4 [makeValAndOff(-c,off)] {sym} ptr idx mem)
-
-(MOV(B|W|L)storeconstidx1 [x] {sym} (ADDLconst [c] ptr) idx mem) ->
-	(MOV(B|W|L)storeconstidx1 [ValAndOff(x).add(c)] {sym} ptr idx mem)
-(MOVWstoreconstidx2 [x] {sym} (ADDLconst [c] ptr) idx mem) ->
-	(MOVWstoreconstidx2 [ValAndOff(x).add(c)] {sym} ptr idx mem)
-(MOVLstoreconstidx4 [x] {sym} (ADDLconst [c] ptr) idx mem) ->
-	(MOVLstoreconstidx4 [ValAndOff(x).add(c)] {sym} ptr idx mem)
-
-(MOV(B|W|L)storeconstidx1 [x] {sym} ptr (ADDLconst [c] idx) mem) ->
-	(MOV(B|W|L)storeconstidx1 [ValAndOff(x).add(c)] {sym} ptr idx mem)
-(MOVWstoreconstidx2 [x] {sym} ptr (ADDLconst [c] idx) mem) ->
-	(MOVWstoreconstidx2 [ValAndOff(x).add(2*c)] {sym} ptr idx mem)
-(MOVLstoreconstidx4 [x] {sym} ptr (ADDLconst [c] idx) mem) ->
-	(MOVLstoreconstidx4 [ValAndOff(x).add(4*c)] {sym} ptr idx mem)
 
 // fold LEALs together
 (LEAL [off1] {sym1} (LEAL [off2] {sym2} x)) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
@@ -826,6 +711,16 @@
 (LEAL [off1] {sym1} (LEAL8 [off2] {sym2} x y)) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
       (LEAL8 [off1+off2] {mergeSym(sym1,sym2)} x y)
 
+// LEAL[1248] into LEAL[1248]. Only some such merges are possible.
+(LEAL1 [off1] {sym1} x (LEAL1 [off2] {sym2} y y)) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
+      (LEAL2 [off1+off2] {mergeSym(sym1, sym2)} x y)
+(LEAL1 [off1] {sym1} x (LEAL1 [off2] {sym2} x y)) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
+      (LEAL2 [off1+off2] {mergeSym(sym1, sym2)} y x)
+(LEAL2 [off1] {sym} x (LEAL1 [off2] {nil} y y)) && is32Bit(off1+2*off2) ->
+      (LEAL4 [off1+2*off2] {sym} x y)
+(LEAL4 [off1] {sym} x (LEAL1 [off2] {nil} y y)) && is32Bit(off1+4*off2) ->
+      (LEAL8 [off1+4*off2] {sym} x y)
+
 // Absorb InvertFlags into branches.
 (LT (InvertFlags cmp) yes no) -> (GT cmp yes no)
 (GT (InvertFlags cmp) yes no) -> (LT cmp yes no)
@@ -1039,6 +934,9 @@
 // TEST %reg,%reg is shorter than CMP
 (CMP(L|W|B)const x [0]) -> (TEST(L|W|B) x x)
 
+// Convert LEAL1 back to ADDL if we can
+(LEAL1 [0] {nil} x y) -> (ADDL x y)
+
 // Combining byte loads into larger (unaligned) loads.
 // There are many ways these combinations could occur.  This is
 // designed to match the way encoding/binary.LittleEndian does it.
@@ -1052,6 +950,16 @@
   && clobber(x0, x1, s0)
   -> @mergePoint(b,x0,x1) (MOVWload [i0] {s} p mem)
 
+(ORL                  x0:(MOVBload [i] {s} p0 mem)
+    s0:(SHLLconst [8] x1:(MOVBload [i] {s} p1 mem)))
+  && x0.Uses == 1
+  && x1.Uses == 1
+  && s0.Uses == 1
+  && sequentialAddresses(p0, p1, 1)
+  && mergePoint(b,x0,x1) != nil
+  && clobber(x0, x1, s0)
+  -> @mergePoint(b,x0,x1) (MOVWload [i] {s} p0 mem)
+
 (ORL o0:(ORL
                        x0:(MOVWload [i0] {s} p mem)
     s0:(SHLLconst [16] x1:(MOVBload [i2] {s} p mem)))
@@ -1068,31 +976,21 @@
   && clobber(x0, x1, x2, s0, s1, o0)
   -> @mergePoint(b,x0,x1,x2) (MOVLload [i0] {s} p mem)
 
-(ORL                  x0:(MOVBloadidx1 [i0] {s} p idx mem)
-    s0:(SHLLconst [8] x1:(MOVBloadidx1 [i1] {s} p idx mem)))
-  && i1==i0+1
-  && x0.Uses == 1
-  && x1.Uses == 1
-  && s0.Uses == 1
-  && mergePoint(b,x0,x1) != nil
-  && clobber(x0, x1, s0)
-  -> @mergePoint(b,x0,x1) (MOVWloadidx1 <v.Type> [i0] {s} p idx mem)
-
 (ORL o0:(ORL
-                       x0:(MOVWloadidx1 [i0] {s} p idx mem)
-    s0:(SHLLconst [16] x1:(MOVBloadidx1 [i2] {s} p idx mem)))
-    s1:(SHLLconst [24] x2:(MOVBloadidx1 [i3] {s} p idx mem)))
-  && i2 == i0+2
-  && i3 == i0+3
+                       x0:(MOVWload [i] {s} p0 mem)
+    s0:(SHLLconst [16] x1:(MOVBload [i] {s} p1 mem)))
+    s1:(SHLLconst [24] x2:(MOVBload [i] {s} p2 mem)))
   && x0.Uses == 1
   && x1.Uses == 1
   && x2.Uses == 1
   && s0.Uses == 1
   && s1.Uses == 1
   && o0.Uses == 1
+  && sequentialAddresses(p0, p1, 2)
+  && sequentialAddresses(p1, p2, 1)
   && mergePoint(b,x0,x1,x2) != nil
   && clobber(x0, x1, x2, s0, s1, o0)
-  -> @mergePoint(b,x0,x1,x2) (MOVLloadidx1 <v.Type> [i0] {s} p idx mem)
+  -> @mergePoint(b,x0,x1,x2) (MOVLload [i] {s} p0 mem)
 
 // Combine constant stores into larger (unaligned) stores.
 (MOVBstoreconst [c] {s} p x:(MOVBstoreconst [a] {s} p mem))
@@ -1105,6 +1003,20 @@
   && ValAndOff(a).Off() + 1 == ValAndOff(c).Off()
   && clobber(x)
   -> (MOVWstoreconst [makeValAndOff(ValAndOff(a).Val()&0xff | ValAndOff(c).Val()<<8, ValAndOff(a).Off())] {s} p mem)
+
+(MOVBstoreconst [c] {s} p1 x:(MOVBstoreconst [a] {s} p0 mem))
+  && x.Uses == 1
+  && ValAndOff(a).Off() == ValAndOff(c).Off()
+  && sequentialAddresses(p0, p1, 1)
+  && clobber(x)
+  -> (MOVWstoreconst [makeValAndOff(ValAndOff(a).Val()&0xff | ValAndOff(c).Val()<<8, ValAndOff(a).Off())] {s} p0 mem)
+(MOVBstoreconst [a] {s} p0 x:(MOVBstoreconst [c] {s} p1 mem))
+  && x.Uses == 1
+  && ValAndOff(a).Off() == ValAndOff(c).Off()
+  && sequentialAddresses(p0, p1, 1)
+  && clobber(x)
+  -> (MOVWstoreconst [makeValAndOff(ValAndOff(a).Val()&0xff | ValAndOff(c).Val()<<8, ValAndOff(a).Off())] {s} p0 mem)
+
 (MOVWstoreconst [c] {s} p x:(MOVWstoreconst [a] {s} p mem))
   && x.Uses == 1
   && ValAndOff(a).Off() + 2 == ValAndOff(c).Off()
@@ -1116,22 +1028,18 @@
   && clobber(x)
   -> (MOVLstoreconst [makeValAndOff(ValAndOff(a).Val()&0xffff | ValAndOff(c).Val()<<16, ValAndOff(a).Off())] {s} p mem)
 
-(MOVBstoreconstidx1 [c] {s} p i x:(MOVBstoreconstidx1 [a] {s} p i mem))
+(MOVWstoreconst [c] {s} p1 x:(MOVWstoreconst [a] {s} p0 mem))
   && x.Uses == 1
-  && ValAndOff(a).Off() + 1 == ValAndOff(c).Off()
+  && ValAndOff(a).Off() == ValAndOff(c).Off()
+  && sequentialAddresses(p0, p1, 2)
   && clobber(x)
-  -> (MOVWstoreconstidx1 [makeValAndOff(ValAndOff(a).Val()&0xff | ValAndOff(c).Val()<<8, ValAndOff(a).Off())] {s} p i mem)
-(MOVWstoreconstidx1 [c] {s} p i x:(MOVWstoreconstidx1 [a] {s} p i mem))
+  -> (MOVLstoreconst [makeValAndOff(ValAndOff(a).Val()&0xffff | ValAndOff(c).Val()<<16, ValAndOff(a).Off())] {s} p0 mem)
+(MOVWstoreconst [a] {s} p0 x:(MOVWstoreconst [c] {s} p1 mem))
   && x.Uses == 1
-  && ValAndOff(a).Off() + 2 == ValAndOff(c).Off()
+  && ValAndOff(a).Off() == ValAndOff(c).Off()
+  && sequentialAddresses(p0, p1, 2)
   && clobber(x)
-  -> (MOVLstoreconstidx1 [makeValAndOff(ValAndOff(a).Val()&0xffff | ValAndOff(c).Val()<<16, ValAndOff(a).Off())] {s} p i mem)
-
-(MOVWstoreconstidx2 [c] {s} p i x:(MOVWstoreconstidx2 [a] {s} p i mem))
-  && x.Uses == 1
-  && ValAndOff(a).Off() + 2 == ValAndOff(c).Off()
-  && clobber(x)
-  -> (MOVLstoreconstidx1 [makeValAndOff(ValAndOff(a).Val()&0xffff | ValAndOff(c).Val()<<16, ValAndOff(a).Off())] {s} p (SHLLconst <i.Type> [1] i) mem)
+  -> (MOVLstoreconst [makeValAndOff(ValAndOff(a).Val()&0xffff | ValAndOff(c).Val()<<16, ValAndOff(a).Off())] {s} p0 mem)
 
 // Combine stores into larger (unaligned) stores.
 (MOVBstore [i] {s} p (SHR(W|L)const [8] w) x:(MOVBstore [i-1] {s} p w mem))
@@ -1146,6 +1054,23 @@
   && x.Uses == 1
   && clobber(x)
   -> (MOVWstore [i-1] {s} p w0 mem)
+
+(MOVBstore [i] {s} p1 (SHR(W|L)const [8] w) x:(MOVBstore [i] {s} p0 w mem))
+  && x.Uses == 1
+  && sequentialAddresses(p0, p1, 1)
+  && clobber(x)
+  -> (MOVWstore [i] {s} p0 w mem)
+(MOVBstore [i] {s} p0 w x:(MOVBstore {s} [i] p1 (SHR(W|L)const [8] w) mem))
+  && x.Uses == 1
+  && sequentialAddresses(p0, p1, 1)
+  && clobber(x)
+  -> (MOVWstore [i] {s} p0 w mem)
+(MOVBstore [i] {s} p1 (SHRLconst [j] w) x:(MOVBstore [i] {s} p0 w0:(SHRLconst [j-8] w) mem))
+  && x.Uses == 1
+  && sequentialAddresses(p0, p1, 1)
+  && clobber(x)
+  -> (MOVWstore [i] {s} p0 w0 mem)
+
 (MOVWstore [i] {s} p (SHRLconst [16] w) x:(MOVWstore [i-2] {s} p w mem))
   && x.Uses == 1
   && clobber(x)
@@ -1155,35 +1080,16 @@
   && clobber(x)
   -> (MOVLstore [i-2] {s} p w0 mem)
 
-(MOVBstoreidx1 [i] {s} p idx (SHR(L|W)const [8] w) x:(MOVBstoreidx1 [i-1] {s} p idx w mem))
+(MOVWstore [i] {s} p1 (SHRLconst [16] w) x:(MOVWstore [i] {s} p0 w mem))
   && x.Uses == 1
+  && sequentialAddresses(p0, p1, 2)
   && clobber(x)
-  -> (MOVWstoreidx1 [i-1] {s} p idx w mem)
-(MOVBstoreidx1 [i] {s} p idx w x:(MOVBstoreidx1 [i+1] {s} p idx (SHR(L|W)const [8] w) mem))
+  -> (MOVLstore [i] {s} p0 w mem)
+(MOVWstore [i] {s} p1 (SHRLconst [j] w) x:(MOVWstore [i] {s} p0 w0:(SHRLconst [j-16] w) mem))
   && x.Uses == 1
+  && sequentialAddresses(p0, p1, 2)
   && clobber(x)
-  -> (MOVWstoreidx1 [i] {s} p idx w mem)
-(MOVBstoreidx1 [i] {s} p idx (SHRLconst [j] w) x:(MOVBstoreidx1 [i-1] {s} p idx w0:(SHRLconst [j-8] w) mem))
-  && x.Uses == 1
-  && clobber(x)
-  -> (MOVWstoreidx1 [i-1] {s} p idx w0 mem)
-(MOVWstoreidx1 [i] {s} p idx (SHRLconst [16] w) x:(MOVWstoreidx1 [i-2] {s} p idx w mem))
-  && x.Uses == 1
-  && clobber(x)
-  -> (MOVLstoreidx1 [i-2] {s} p idx w mem)
-(MOVWstoreidx1 [i] {s} p idx (SHRLconst [j] w) x:(MOVWstoreidx1 [i-2] {s} p idx w0:(SHRLconst [j-16] w) mem))
-  && x.Uses == 1
-  && clobber(x)
-  -> (MOVLstoreidx1 [i-2] {s} p idx w0 mem)
-
-(MOVWstoreidx2 [i] {s} p idx (SHRLconst [16] w) x:(MOVWstoreidx2 [i-2] {s} p idx w mem))
-  && x.Uses == 1
-  && clobber(x)
-  -> (MOVLstoreidx1 [i-2] {s} p (SHLLconst <idx.Type> [1] idx) w mem)
-(MOVWstoreidx2 [i] {s} p idx (SHRLconst [j] w) x:(MOVWstoreidx2 [i-2] {s} p idx w0:(SHRLconst [j-16] w) mem))
-  && x.Uses == 1
-  && clobber(x)
-  -> (MOVLstoreidx1 [i-2] {s} p (SHLLconst <idx.Type> [1] idx) w0 mem)
+  -> (MOVLstore [i] {s} p0 w0 mem)
 
 // For PIC, break floating-point constant loading into two instructions so we have
 // a register to use for holding the address of the constant pool entry.
diff --git a/src/cmd/compile/internal/ssa/gen/AMD64.rules b/src/cmd/compile/internal/ssa/gen/AMD64.rules
index 306847d..ca5962f 100644
--- a/src/cmd/compile/internal/ssa/gen/AMD64.rules
+++ b/src/cmd/compile/internal/ssa/gen/AMD64.rules
@@ -1604,6 +1604,7 @@
 
 // Move constants offsets from LEAQx up into load. This lets the above combining
 // rules discover indexed load-combining instances.
+//TODO:remove! These rules are bad.
 (MOV(B|W|L|Q)load [i0] {s0} l:(LEAQ1 [i1] {s1} x y) mem) && i1 != 0 && is32Bit(i0+i1)
 -> (MOV(B|W|L|Q)load [i0+i1] {s0} (LEAQ1 <l.Type> [0] {s1} x y) mem)
 (MOV(B|W|L|Q)load [i0] {s0} l:(LEAQ2 [i1] {s1} x y) mem) && i1 != 0 && is32Bit(i0+i1)
diff --git a/src/cmd/compile/internal/ssa/gen/generic.rules b/src/cmd/compile/internal/ssa/gen/generic.rules
index 8ec22d8..8a3c8ee 100644
--- a/src/cmd/compile/internal/ssa/gen/generic.rules
+++ b/src/cmd/compile/internal/ssa/gen/generic.rules
@@ -917,7 +917,7 @@
 (If (ConstBool [c]) yes no) && c == 0 -> (First no yes)
 
 // Get rid of Convert ops for pointer arithmetic on unsafe.Pointer.
-(Convert (Add(64|32) (Convert ptr mem) off) mem) -> (Add(64|32) ptr off)
+(Convert (Add(64|32) (Convert ptr mem) off) mem) -> (AddPtr ptr off)
 (Convert (Convert ptr mem) mem) -> ptr
 
 // strength reduction of divide by a constant.
@@ -1780,6 +1780,10 @@
 // is constant, which pushes constants to the outside
 // of the expression. At that point, any constant-folding
 // opportunities should be obvious.
+// Note: don't include AddPtr here! In order to maintain the
+// invariant that pointers must stay within the pointed-to object,
+// we can't pull part of a pointer computation above the AddPtr.
+// See issue 37881.
 
 // x + (C + z) -> C + (x + z)
 (Add64 (Add64 i:(Const64 <t>) z) x) && (z.Op != OpConst64 && x.Op != OpConst64) -> (Add64 i (Add64 <t> z x))
diff --git a/src/cmd/compile/internal/ssa/gen/rulegen.go b/src/cmd/compile/internal/ssa/gen/rulegen.go
index 8e88d0b..3caa060 100644
--- a/src/cmd/compile/internal/ssa/gen/rulegen.go
+++ b/src/cmd/compile/internal/ssa/gen/rulegen.go
@@ -715,6 +715,11 @@
 
 // declared reports if the body contains a Declare with the given name.
 func (w *bodyBase) declared(name string) bool {
+	if name == "nil" {
+		// Treat "nil" as having already been declared.
+		// This lets us use nil to match an aux field.
+		return true
+	}
 	for _, s := range w.list {
 		if decl, ok := s.(*Declare); ok && decl.name == name {
 			return true
diff --git a/src/cmd/compile/internal/ssa/rewrite.go b/src/cmd/compile/internal/ssa/rewrite.go
index b3e7d34..fc03f0d 100644
--- a/src/cmd/compile/internal/ssa/rewrite.go
+++ b/src/cmd/compile/internal/ssa/rewrite.go
@@ -1248,9 +1248,25 @@
 	return byteorder.Uint64(buf)
 }
 
+// sequentialAddresses reports true if it can prove that x + n == y
+func sequentialAddresses(x, y *Value, n int64) bool {
+	if x.Op == Op386ADDL && y.Op == Op386LEAL1 && y.AuxInt == n && y.Aux == nil &&
+		(x.Args[0] == y.Args[0] && x.Args[1] == y.Args[1] ||
+			x.Args[0] == y.Args[1] && x.Args[1] == y.Args[0]) {
+		return true
+	}
+	if x.Op == Op386LEAL1 && y.Op == Op386LEAL1 && y.AuxInt == x.AuxInt+n && x.Aux == y.Aux &&
+		(x.Args[0] == y.Args[0] && x.Args[1] == y.Args[1] ||
+			x.Args[0] == y.Args[1] && x.Args[1] == y.Args[0]) {
+		return true
+	}
+	return false
+}
+
 // same reports whether x and y are the same value.
 // It checks to a maximum depth of d, so it may report
 // a false negative.
+// TODO: remove when amd64 port is switched to using sequentialAddresses
 func same(x, y *Value, depth int) bool {
 	if x == y {
 		return true
diff --git a/src/cmd/compile/internal/ssa/rewrite386.go b/src/cmd/compile/internal/ssa/rewrite386.go
index 8b2da94..2a0a92b 100644
--- a/src/cmd/compile/internal/ssa/rewrite386.go
+++ b/src/cmd/compile/internal/ssa/rewrite386.go
@@ -18,16 +18,10 @@
 		return rewriteValue386_Op386ADDLconst(v)
 	case Op386ADDLconstmodify:
 		return rewriteValue386_Op386ADDLconstmodify(v)
-	case Op386ADDLconstmodifyidx4:
-		return rewriteValue386_Op386ADDLconstmodifyidx4(v)
 	case Op386ADDLload:
 		return rewriteValue386_Op386ADDLload(v)
-	case Op386ADDLloadidx4:
-		return rewriteValue386_Op386ADDLloadidx4(v)
 	case Op386ADDLmodify:
 		return rewriteValue386_Op386ADDLmodify(v)
-	case Op386ADDLmodifyidx4:
-		return rewriteValue386_Op386ADDLmodifyidx4(v)
 	case Op386ADDSD:
 		return rewriteValue386_Op386ADDSD(v)
 	case Op386ADDSDload:
@@ -42,16 +36,10 @@
 		return rewriteValue386_Op386ANDLconst(v)
 	case Op386ANDLconstmodify:
 		return rewriteValue386_Op386ANDLconstmodify(v)
-	case Op386ANDLconstmodifyidx4:
-		return rewriteValue386_Op386ANDLconstmodifyidx4(v)
 	case Op386ANDLload:
 		return rewriteValue386_Op386ANDLload(v)
-	case Op386ANDLloadidx4:
-		return rewriteValue386_Op386ANDLloadidx4(v)
 	case Op386ANDLmodify:
 		return rewriteValue386_Op386ANDLmodify(v)
-	case Op386ANDLmodifyidx4:
-		return rewriteValue386_Op386ANDLmodifyidx4(v)
 	case Op386CMPB:
 		return rewriteValue386_Op386CMPB(v)
 	case Op386CMPBconst:
@@ -96,62 +84,28 @@
 		return rewriteValue386_Op386MOVBLZX(v)
 	case Op386MOVBload:
 		return rewriteValue386_Op386MOVBload(v)
-	case Op386MOVBloadidx1:
-		return rewriteValue386_Op386MOVBloadidx1(v)
 	case Op386MOVBstore:
 		return rewriteValue386_Op386MOVBstore(v)
 	case Op386MOVBstoreconst:
 		return rewriteValue386_Op386MOVBstoreconst(v)
-	case Op386MOVBstoreconstidx1:
-		return rewriteValue386_Op386MOVBstoreconstidx1(v)
-	case Op386MOVBstoreidx1:
-		return rewriteValue386_Op386MOVBstoreidx1(v)
 	case Op386MOVLload:
 		return rewriteValue386_Op386MOVLload(v)
-	case Op386MOVLloadidx1:
-		return rewriteValue386_Op386MOVLloadidx1(v)
-	case Op386MOVLloadidx4:
-		return rewriteValue386_Op386MOVLloadidx4(v)
 	case Op386MOVLstore:
 		return rewriteValue386_Op386MOVLstore(v)
 	case Op386MOVLstoreconst:
 		return rewriteValue386_Op386MOVLstoreconst(v)
-	case Op386MOVLstoreconstidx1:
-		return rewriteValue386_Op386MOVLstoreconstidx1(v)
-	case Op386MOVLstoreconstidx4:
-		return rewriteValue386_Op386MOVLstoreconstidx4(v)
-	case Op386MOVLstoreidx1:
-		return rewriteValue386_Op386MOVLstoreidx1(v)
-	case Op386MOVLstoreidx4:
-		return rewriteValue386_Op386MOVLstoreidx4(v)
 	case Op386MOVSDconst:
 		return rewriteValue386_Op386MOVSDconst(v)
 	case Op386MOVSDload:
 		return rewriteValue386_Op386MOVSDload(v)
-	case Op386MOVSDloadidx1:
-		return rewriteValue386_Op386MOVSDloadidx1(v)
-	case Op386MOVSDloadidx8:
-		return rewriteValue386_Op386MOVSDloadidx8(v)
 	case Op386MOVSDstore:
 		return rewriteValue386_Op386MOVSDstore(v)
-	case Op386MOVSDstoreidx1:
-		return rewriteValue386_Op386MOVSDstoreidx1(v)
-	case Op386MOVSDstoreidx8:
-		return rewriteValue386_Op386MOVSDstoreidx8(v)
 	case Op386MOVSSconst:
 		return rewriteValue386_Op386MOVSSconst(v)
 	case Op386MOVSSload:
 		return rewriteValue386_Op386MOVSSload(v)
-	case Op386MOVSSloadidx1:
-		return rewriteValue386_Op386MOVSSloadidx1(v)
-	case Op386MOVSSloadidx4:
-		return rewriteValue386_Op386MOVSSloadidx4(v)
 	case Op386MOVSSstore:
 		return rewriteValue386_Op386MOVSSstore(v)
-	case Op386MOVSSstoreidx1:
-		return rewriteValue386_Op386MOVSSstoreidx1(v)
-	case Op386MOVSSstoreidx4:
-		return rewriteValue386_Op386MOVSSstoreidx4(v)
 	case Op386MOVWLSX:
 		return rewriteValue386_Op386MOVWLSX(v)
 	case Op386MOVWLSXload:
@@ -160,30 +114,16 @@
 		return rewriteValue386_Op386MOVWLZX(v)
 	case Op386MOVWload:
 		return rewriteValue386_Op386MOVWload(v)
-	case Op386MOVWloadidx1:
-		return rewriteValue386_Op386MOVWloadidx1(v)
-	case Op386MOVWloadidx2:
-		return rewriteValue386_Op386MOVWloadidx2(v)
 	case Op386MOVWstore:
 		return rewriteValue386_Op386MOVWstore(v)
 	case Op386MOVWstoreconst:
 		return rewriteValue386_Op386MOVWstoreconst(v)
-	case Op386MOVWstoreconstidx1:
-		return rewriteValue386_Op386MOVWstoreconstidx1(v)
-	case Op386MOVWstoreconstidx2:
-		return rewriteValue386_Op386MOVWstoreconstidx2(v)
-	case Op386MOVWstoreidx1:
-		return rewriteValue386_Op386MOVWstoreidx1(v)
-	case Op386MOVWstoreidx2:
-		return rewriteValue386_Op386MOVWstoreidx2(v)
 	case Op386MULL:
 		return rewriteValue386_Op386MULL(v)
 	case Op386MULLconst:
 		return rewriteValue386_Op386MULLconst(v)
 	case Op386MULLload:
 		return rewriteValue386_Op386MULLload(v)
-	case Op386MULLloadidx4:
-		return rewriteValue386_Op386MULLloadidx4(v)
 	case Op386MULSD:
 		return rewriteValue386_Op386MULSD(v)
 	case Op386MULSDload:
@@ -202,16 +142,10 @@
 		return rewriteValue386_Op386ORLconst(v)
 	case Op386ORLconstmodify:
 		return rewriteValue386_Op386ORLconstmodify(v)
-	case Op386ORLconstmodifyidx4:
-		return rewriteValue386_Op386ORLconstmodifyidx4(v)
 	case Op386ORLload:
 		return rewriteValue386_Op386ORLload(v)
-	case Op386ORLloadidx4:
-		return rewriteValue386_Op386ORLloadidx4(v)
 	case Op386ORLmodify:
 		return rewriteValue386_Op386ORLmodify(v)
-	case Op386ORLmodifyidx4:
-		return rewriteValue386_Op386ORLmodifyidx4(v)
 	case Op386ROLBconst:
 		return rewriteValue386_Op386ROLBconst(v)
 	case Op386ROLLconst:
@@ -278,12 +212,8 @@
 		return rewriteValue386_Op386SUBLconst(v)
 	case Op386SUBLload:
 		return rewriteValue386_Op386SUBLload(v)
-	case Op386SUBLloadidx4:
-		return rewriteValue386_Op386SUBLloadidx4(v)
 	case Op386SUBLmodify:
 		return rewriteValue386_Op386SUBLmodify(v)
-	case Op386SUBLmodifyidx4:
-		return rewriteValue386_Op386SUBLmodifyidx4(v)
 	case Op386SUBSD:
 		return rewriteValue386_Op386SUBSD(v)
 	case Op386SUBSDload:
@@ -298,16 +228,10 @@
 		return rewriteValue386_Op386XORLconst(v)
 	case Op386XORLconstmodify:
 		return rewriteValue386_Op386XORLconstmodify(v)
-	case Op386XORLconstmodifyidx4:
-		return rewriteValue386_Op386XORLconstmodifyidx4(v)
 	case Op386XORLload:
 		return rewriteValue386_Op386XORLload(v)
-	case Op386XORLloadidx4:
-		return rewriteValue386_Op386XORLloadidx4(v)
 	case Op386XORLmodify:
 		return rewriteValue386_Op386XORLmodify(v)
-	case Op386XORLmodifyidx4:
-		return rewriteValue386_Op386XORLmodifyidx4(v)
 	case OpAdd16:
 		v.Op = Op386ADDL
 		return true
@@ -1042,32 +966,6 @@
 		}
 		break
 	}
-	// match: (ADDL x l:(MOVLloadidx4 [off] {sym} ptr idx mem))
-	// cond: canMergeLoadClobber(v, l, x) && clobber(l)
-	// result: (ADDLloadidx4 x [off] {sym} ptr idx mem)
-	for {
-		for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-			x := v_0
-			l := v_1
-			if l.Op != Op386MOVLloadidx4 {
-				continue
-			}
-			off := l.AuxInt
-			sym := l.Aux
-			mem := l.Args[2]
-			ptr := l.Args[0]
-			idx := l.Args[1]
-			if !(canMergeLoadClobber(v, l, x) && clobber(l)) {
-				continue
-			}
-			v.reset(Op386ADDLloadidx4)
-			v.AuxInt = off
-			v.Aux = sym
-			v.AddArg4(x, ptr, idx, mem)
-			return true
-		}
-		break
-	}
 	// match: (ADDL x (NEGL y))
 	// result: (SUBL x y)
 	for {
@@ -1316,81 +1214,6 @@
 	}
 	return false
 }
-func rewriteValue386_Op386ADDLconstmodifyidx4(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	config := b.Func.Config
-	// match: (ADDLconstmodifyidx4 [valoff1] {sym} (ADDLconst [off2] base) idx mem)
-	// cond: ValAndOff(valoff1).canAdd(off2)
-	// result: (ADDLconstmodifyidx4 [ValAndOff(valoff1).add(off2)] {sym} base idx mem)
-	for {
-		valoff1 := v.AuxInt
-		sym := v.Aux
-		if v_0.Op != Op386ADDLconst {
-			break
-		}
-		off2 := v_0.AuxInt
-		base := v_0.Args[0]
-		idx := v_1
-		mem := v_2
-		if !(ValAndOff(valoff1).canAdd(off2)) {
-			break
-		}
-		v.reset(Op386ADDLconstmodifyidx4)
-		v.AuxInt = ValAndOff(valoff1).add(off2)
-		v.Aux = sym
-		v.AddArg3(base, idx, mem)
-		return true
-	}
-	// match: (ADDLconstmodifyidx4 [valoff1] {sym} base (ADDLconst [off2] idx) mem)
-	// cond: ValAndOff(valoff1).canAdd(off2*4)
-	// result: (ADDLconstmodifyidx4 [ValAndOff(valoff1).add(off2*4)] {sym} base idx mem)
-	for {
-		valoff1 := v.AuxInt
-		sym := v.Aux
-		base := v_0
-		if v_1.Op != Op386ADDLconst {
-			break
-		}
-		off2 := v_1.AuxInt
-		idx := v_1.Args[0]
-		mem := v_2
-		if !(ValAndOff(valoff1).canAdd(off2 * 4)) {
-			break
-		}
-		v.reset(Op386ADDLconstmodifyidx4)
-		v.AuxInt = ValAndOff(valoff1).add(off2 * 4)
-		v.Aux = sym
-		v.AddArg3(base, idx, mem)
-		return true
-	}
-	// match: (ADDLconstmodifyidx4 [valoff1] {sym1} (LEAL [off2] {sym2} base) idx mem)
-	// cond: ValAndOff(valoff1).canAdd(off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared)
-	// result: (ADDLconstmodifyidx4 [ValAndOff(valoff1).add(off2)] {mergeSym(sym1,sym2)} base idx mem)
-	for {
-		valoff1 := v.AuxInt
-		sym1 := v.Aux
-		if v_0.Op != Op386LEAL {
-			break
-		}
-		off2 := v_0.AuxInt
-		sym2 := v_0.Aux
-		base := v_0.Args[0]
-		idx := v_1
-		mem := v_2
-		if !(ValAndOff(valoff1).canAdd(off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared)) {
-			break
-		}
-		v.reset(Op386ADDLconstmodifyidx4)
-		v.AuxInt = ValAndOff(valoff1).add(off2)
-		v.Aux = mergeSym(sym1, sym2)
-		v.AddArg3(base, idx, mem)
-		return true
-	}
-	return false
-}
 func rewriteValue386_Op386ADDLload(v *Value) bool {
 	v_2 := v.Args[2]
 	v_1 := v.Args[1]
@@ -1442,109 +1265,6 @@
 		v.AddArg3(val, base, mem)
 		return true
 	}
-	// match: (ADDLload [off1] {sym1} val (LEAL4 [off2] {sym2} ptr idx) mem)
-	// cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-	// result: (ADDLloadidx4 [off1+off2] {mergeSym(sym1,sym2)} val ptr idx mem)
-	for {
-		off1 := v.AuxInt
-		sym1 := v.Aux
-		val := v_0
-		if v_1.Op != Op386LEAL4 {
-			break
-		}
-		off2 := v_1.AuxInt
-		sym2 := v_1.Aux
-		idx := v_1.Args[1]
-		ptr := v_1.Args[0]
-		mem := v_2
-		if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
-			break
-		}
-		v.reset(Op386ADDLloadidx4)
-		v.AuxInt = off1 + off2
-		v.Aux = mergeSym(sym1, sym2)
-		v.AddArg4(val, ptr, idx, mem)
-		return true
-	}
-	return false
-}
-func rewriteValue386_Op386ADDLloadidx4(v *Value) bool {
-	v_3 := v.Args[3]
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	config := b.Func.Config
-	// match: (ADDLloadidx4 [off1] {sym} val (ADDLconst [off2] base) idx mem)
-	// cond: is32Bit(off1+off2)
-	// result: (ADDLloadidx4 [off1+off2] {sym} val base idx mem)
-	for {
-		off1 := v.AuxInt
-		sym := v.Aux
-		val := v_0
-		if v_1.Op != Op386ADDLconst {
-			break
-		}
-		off2 := v_1.AuxInt
-		base := v_1.Args[0]
-		idx := v_2
-		mem := v_3
-		if !(is32Bit(off1 + off2)) {
-			break
-		}
-		v.reset(Op386ADDLloadidx4)
-		v.AuxInt = off1 + off2
-		v.Aux = sym
-		v.AddArg4(val, base, idx, mem)
-		return true
-	}
-	// match: (ADDLloadidx4 [off1] {sym} val base (ADDLconst [off2] idx) mem)
-	// cond: is32Bit(off1+off2*4)
-	// result: (ADDLloadidx4 [off1+off2*4] {sym} val base idx mem)
-	for {
-		off1 := v.AuxInt
-		sym := v.Aux
-		val := v_0
-		base := v_1
-		if v_2.Op != Op386ADDLconst {
-			break
-		}
-		off2 := v_2.AuxInt
-		idx := v_2.Args[0]
-		mem := v_3
-		if !(is32Bit(off1 + off2*4)) {
-			break
-		}
-		v.reset(Op386ADDLloadidx4)
-		v.AuxInt = off1 + off2*4
-		v.Aux = sym
-		v.AddArg4(val, base, idx, mem)
-		return true
-	}
-	// match: (ADDLloadidx4 [off1] {sym1} val (LEAL [off2] {sym2} base) idx mem)
-	// cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared)
-	// result: (ADDLloadidx4 [off1+off2] {mergeSym(sym1,sym2)} val base idx mem)
-	for {
-		off1 := v.AuxInt
-		sym1 := v.Aux
-		val := v_0
-		if v_1.Op != Op386LEAL {
-			break
-		}
-		off2 := v_1.AuxInt
-		sym2 := v_1.Aux
-		base := v_1.Args[0]
-		idx := v_2
-		mem := v_3
-		if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared)) {
-			break
-		}
-		v.reset(Op386ADDLloadidx4)
-		v.AuxInt = off1 + off2
-		v.Aux = mergeSym(sym1, sym2)
-		v.AddArg4(val, base, idx, mem)
-		return true
-	}
 	return false
 }
 func rewriteValue386_Op386ADDLmodify(v *Value) bool {
@@ -1600,107 +1320,6 @@
 	}
 	return false
 }
-func rewriteValue386_Op386ADDLmodifyidx4(v *Value) bool {
-	v_3 := v.Args[3]
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	config := b.Func.Config
-	// match: (ADDLmodifyidx4 [off1] {sym} (ADDLconst [off2] base) idx val mem)
-	// cond: is32Bit(off1+off2)
-	// result: (ADDLmodifyidx4 [off1+off2] {sym} base idx val mem)
-	for {
-		off1 := v.AuxInt
-		sym := v.Aux
-		if v_0.Op != Op386ADDLconst {
-			break
-		}
-		off2 := v_0.AuxInt
-		base := v_0.Args[0]
-		idx := v_1
-		val := v_2
-		mem := v_3
-		if !(is32Bit(off1 + off2)) {
-			break
-		}
-		v.reset(Op386ADDLmodifyidx4)
-		v.AuxInt = off1 + off2
-		v.Aux = sym
-		v.AddArg4(base, idx, val, mem)
-		return true
-	}
-	// match: (ADDLmodifyidx4 [off1] {sym} base (ADDLconst [off2] idx) val mem)
-	// cond: is32Bit(off1+off2*4)
-	// result: (ADDLmodifyidx4 [off1+off2*4] {sym} base idx val mem)
-	for {
-		off1 := v.AuxInt
-		sym := v.Aux
-		base := v_0
-		if v_1.Op != Op386ADDLconst {
-			break
-		}
-		off2 := v_1.AuxInt
-		idx := v_1.Args[0]
-		val := v_2
-		mem := v_3
-		if !(is32Bit(off1 + off2*4)) {
-			break
-		}
-		v.reset(Op386ADDLmodifyidx4)
-		v.AuxInt = off1 + off2*4
-		v.Aux = sym
-		v.AddArg4(base, idx, val, mem)
-		return true
-	}
-	// match: (ADDLmodifyidx4 [off1] {sym1} (LEAL [off2] {sym2} base) idx val mem)
-	// cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared)
-	// result: (ADDLmodifyidx4 [off1+off2] {mergeSym(sym1,sym2)} base idx val mem)
-	for {
-		off1 := v.AuxInt
-		sym1 := v.Aux
-		if v_0.Op != Op386LEAL {
-			break
-		}
-		off2 := v_0.AuxInt
-		sym2 := v_0.Aux
-		base := v_0.Args[0]
-		idx := v_1
-		val := v_2
-		mem := v_3
-		if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared)) {
-			break
-		}
-		v.reset(Op386ADDLmodifyidx4)
-		v.AuxInt = off1 + off2
-		v.Aux = mergeSym(sym1, sym2)
-		v.AddArg4(base, idx, val, mem)
-		return true
-	}
-	// match: (ADDLmodifyidx4 [off] {sym} ptr idx (MOVLconst [c]) mem)
-	// cond: validValAndOff(c,off)
-	// result: (ADDLconstmodifyidx4 [makeValAndOff(c,off)] {sym} ptr idx mem)
-	for {
-		off := v.AuxInt
-		sym := v.Aux
-		ptr := v_0
-		idx := v_1
-		if v_2.Op != Op386MOVLconst {
-			break
-		}
-		c := v_2.AuxInt
-		mem := v_3
-		if !(validValAndOff(c, off)) {
-			break
-		}
-		v.reset(Op386ADDLconstmodifyidx4)
-		v.AuxInt = makeValAndOff(c, off)
-		v.Aux = sym
-		v.AddArg3(ptr, idx, mem)
-		return true
-	}
-	return false
-}
 func rewriteValue386_Op386ADDSD(v *Value) bool {
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
@@ -1915,32 +1534,6 @@
 		}
 		break
 	}
-	// match: (ANDL x l:(MOVLloadidx4 [off] {sym} ptr idx mem))
-	// cond: canMergeLoadClobber(v, l, x) && clobber(l)
-	// result: (ANDLloadidx4 x [off] {sym} ptr idx mem)
-	for {
-		for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-			x := v_0
-			l := v_1
-			if l.Op != Op386MOVLloadidx4 {
-				continue
-			}
-			off := l.AuxInt
-			sym := l.Aux
-			mem := l.Args[2]
-			ptr := l.Args[0]
-			idx := l.Args[1]
-			if !(canMergeLoadClobber(v, l, x) && clobber(l)) {
-				continue
-			}
-			v.reset(Op386ANDLloadidx4)
-			v.AuxInt = off
-			v.Aux = sym
-			v.AddArg4(x, ptr, idx, mem)
-			return true
-		}
-		break
-	}
 	// match: (ANDL x x)
 	// result: x
 	for {
@@ -2057,81 +1650,6 @@
 	}
 	return false
 }
-func rewriteValue386_Op386ANDLconstmodifyidx4(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	config := b.Func.Config
-	// match: (ANDLconstmodifyidx4 [valoff1] {sym} (ADDLconst [off2] base) idx mem)
-	// cond: ValAndOff(valoff1).canAdd(off2)
-	// result: (ANDLconstmodifyidx4 [ValAndOff(valoff1).add(off2)] {sym} base idx mem)
-	for {
-		valoff1 := v.AuxInt
-		sym := v.Aux
-		if v_0.Op != Op386ADDLconst {
-			break
-		}
-		off2 := v_0.AuxInt
-		base := v_0.Args[0]
-		idx := v_1
-		mem := v_2
-		if !(ValAndOff(valoff1).canAdd(off2)) {
-			break
-		}
-		v.reset(Op386ANDLconstmodifyidx4)
-		v.AuxInt = ValAndOff(valoff1).add(off2)
-		v.Aux = sym
-		v.AddArg3(base, idx, mem)
-		return true
-	}
-	// match: (ANDLconstmodifyidx4 [valoff1] {sym} base (ADDLconst [off2] idx) mem)
-	// cond: ValAndOff(valoff1).canAdd(off2*4)
-	// result: (ANDLconstmodifyidx4 [ValAndOff(valoff1).add(off2*4)] {sym} base idx mem)
-	for {
-		valoff1 := v.AuxInt
-		sym := v.Aux
-		base := v_0
-		if v_1.Op != Op386ADDLconst {
-			break
-		}
-		off2 := v_1.AuxInt
-		idx := v_1.Args[0]
-		mem := v_2
-		if !(ValAndOff(valoff1).canAdd(off2 * 4)) {
-			break
-		}
-		v.reset(Op386ANDLconstmodifyidx4)
-		v.AuxInt = ValAndOff(valoff1).add(off2 * 4)
-		v.Aux = sym
-		v.AddArg3(base, idx, mem)
-		return true
-	}
-	// match: (ANDLconstmodifyidx4 [valoff1] {sym1} (LEAL [off2] {sym2} base) idx mem)
-	// cond: ValAndOff(valoff1).canAdd(off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared)
-	// result: (ANDLconstmodifyidx4 [ValAndOff(valoff1).add(off2)] {mergeSym(sym1,sym2)} base idx mem)
-	for {
-		valoff1 := v.AuxInt
-		sym1 := v.Aux
-		if v_0.Op != Op386LEAL {
-			break
-		}
-		off2 := v_0.AuxInt
-		sym2 := v_0.Aux
-		base := v_0.Args[0]
-		idx := v_1
-		mem := v_2
-		if !(ValAndOff(valoff1).canAdd(off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared)) {
-			break
-		}
-		v.reset(Op386ANDLconstmodifyidx4)
-		v.AuxInt = ValAndOff(valoff1).add(off2)
-		v.Aux = mergeSym(sym1, sym2)
-		v.AddArg3(base, idx, mem)
-		return true
-	}
-	return false
-}
 func rewriteValue386_Op386ANDLload(v *Value) bool {
 	v_2 := v.Args[2]
 	v_1 := v.Args[1]
@@ -2183,109 +1701,6 @@
 		v.AddArg3(val, base, mem)
 		return true
 	}
-	// match: (ANDLload [off1] {sym1} val (LEAL4 [off2] {sym2} ptr idx) mem)
-	// cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-	// result: (ANDLloadidx4 [off1+off2] {mergeSym(sym1,sym2)} val ptr idx mem)
-	for {
-		off1 := v.AuxInt
-		sym1 := v.Aux
-		val := v_0
-		if v_1.Op != Op386LEAL4 {
-			break
-		}
-		off2 := v_1.AuxInt
-		sym2 := v_1.Aux
-		idx := v_1.Args[1]
-		ptr := v_1.Args[0]
-		mem := v_2
-		if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
-			break
-		}
-		v.reset(Op386ANDLloadidx4)
-		v.AuxInt = off1 + off2
-		v.Aux = mergeSym(sym1, sym2)
-		v.AddArg4(val, ptr, idx, mem)
-		return true
-	}
-	return false
-}
-func rewriteValue386_Op386ANDLloadidx4(v *Value) bool {
-	v_3 := v.Args[3]
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	config := b.Func.Config
-	// match: (ANDLloadidx4 [off1] {sym} val (ADDLconst [off2] base) idx mem)
-	// cond: is32Bit(off1+off2)
-	// result: (ANDLloadidx4 [off1+off2] {sym} val base idx mem)
-	for {
-		off1 := v.AuxInt
-		sym := v.Aux
-		val := v_0
-		if v_1.Op != Op386ADDLconst {
-			break
-		}
-		off2 := v_1.AuxInt
-		base := v_1.Args[0]
-		idx := v_2
-		mem := v_3
-		if !(is32Bit(off1 + off2)) {
-			break
-		}
-		v.reset(Op386ANDLloadidx4)
-		v.AuxInt = off1 + off2
-		v.Aux = sym
-		v.AddArg4(val, base, idx, mem)
-		return true
-	}
-	// match: (ANDLloadidx4 [off1] {sym} val base (ADDLconst [off2] idx) mem)
-	// cond: is32Bit(off1+off2*4)
-	// result: (ANDLloadidx4 [off1+off2*4] {sym} val base idx mem)
-	for {
-		off1 := v.AuxInt
-		sym := v.Aux
-		val := v_0
-		base := v_1
-		if v_2.Op != Op386ADDLconst {
-			break
-		}
-		off2 := v_2.AuxInt
-		idx := v_2.Args[0]
-		mem := v_3
-		if !(is32Bit(off1 + off2*4)) {
-			break
-		}
-		v.reset(Op386ANDLloadidx4)
-		v.AuxInt = off1 + off2*4
-		v.Aux = sym
-		v.AddArg4(val, base, idx, mem)
-		return true
-	}
-	// match: (ANDLloadidx4 [off1] {sym1} val (LEAL [off2] {sym2} base) idx mem)
-	// cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared)
-	// result: (ANDLloadidx4 [off1+off2] {mergeSym(sym1,sym2)} val base idx mem)
-	for {
-		off1 := v.AuxInt
-		sym1 := v.Aux
-		val := v_0
-		if v_1.Op != Op386LEAL {
-			break
-		}
-		off2 := v_1.AuxInt
-		sym2 := v_1.Aux
-		base := v_1.Args[0]
-		idx := v_2
-		mem := v_3
-		if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared)) {
-			break
-		}
-		v.reset(Op386ANDLloadidx4)
-		v.AuxInt = off1 + off2
-		v.Aux = mergeSym(sym1, sym2)
-		v.AddArg4(val, base, idx, mem)
-		return true
-	}
 	return false
 }
 func rewriteValue386_Op386ANDLmodify(v *Value) bool {
@@ -2341,107 +1756,6 @@
 	}
 	return false
 }
-func rewriteValue386_Op386ANDLmodifyidx4(v *Value) bool {
-	v_3 := v.Args[3]
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	config := b.Func.Config
-	// match: (ANDLmodifyidx4 [off1] {sym} (ADDLconst [off2] base) idx val mem)
-	// cond: is32Bit(off1+off2)
-	// result: (ANDLmodifyidx4 [off1+off2] {sym} base idx val mem)
-	for {
-		off1 := v.AuxInt
-		sym := v.Aux
-		if v_0.Op != Op386ADDLconst {
-			break
-		}
-		off2 := v_0.AuxInt
-		base := v_0.Args[0]
-		idx := v_1
-		val := v_2
-		mem := v_3
-		if !(is32Bit(off1 + off2)) {
-			break
-		}
-		v.reset(Op386ANDLmodifyidx4)
-		v.AuxInt = off1 + off2
-		v.Aux = sym
-		v.AddArg4(base, idx, val, mem)
-		return true
-	}
-	// match: (ANDLmodifyidx4 [off1] {sym} base (ADDLconst [off2] idx) val mem)
-	// cond: is32Bit(off1+off2*4)
-	// result: (ANDLmodifyidx4 [off1+off2*4] {sym} base idx val mem)
-	for {
-		off1 := v.AuxInt
-		sym := v.Aux
-		base := v_0
-		if v_1.Op != Op386ADDLconst {
-			break
-		}
-		off2 := v_1.AuxInt
-		idx := v_1.Args[0]
-		val := v_2
-		mem := v_3
-		if !(is32Bit(off1 + off2*4)) {
-			break
-		}
-		v.reset(Op386ANDLmodifyidx4)
-		v.AuxInt = off1 + off2*4
-		v.Aux = sym
-		v.AddArg4(base, idx, val, mem)
-		return true
-	}
-	// match: (ANDLmodifyidx4 [off1] {sym1} (LEAL [off2] {sym2} base) idx val mem)
-	// cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared)
-	// result: (ANDLmodifyidx4 [off1+off2] {mergeSym(sym1,sym2)} base idx val mem)
-	for {
-		off1 := v.AuxInt
-		sym1 := v.Aux
-		if v_0.Op != Op386LEAL {
-			break
-		}
-		off2 := v_0.AuxInt
-		sym2 := v_0.Aux
-		base := v_0.Args[0]
-		idx := v_1
-		val := v_2
-		mem := v_3
-		if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared)) {
-			break
-		}
-		v.reset(Op386ANDLmodifyidx4)
-		v.AuxInt = off1 + off2
-		v.Aux = mergeSym(sym1, sym2)
-		v.AddArg4(base, idx, val, mem)
-		return true
-	}
-	// match: (ANDLmodifyidx4 [off] {sym} ptr idx (MOVLconst [c]) mem)
-	// cond: validValAndOff(c,off)
-	// result: (ANDLconstmodifyidx4 [makeValAndOff(c,off)] {sym} ptr idx mem)
-	for {
-		off := v.AuxInt
-		sym := v.Aux
-		ptr := v_0
-		idx := v_1
-		if v_2.Op != Op386MOVLconst {
-			break
-		}
-		c := v_2.AuxInt
-		mem := v_3
-		if !(validValAndOff(c, off)) {
-			break
-		}
-		v.reset(Op386ANDLconstmodifyidx4)
-		v.AuxInt = makeValAndOff(c, off)
-		v.Aux = sym
-		v.AddArg3(ptr, idx, mem)
-		return true
-	}
-	return false
-}
 func rewriteValue386_Op386CMPB(v *Value) bool {
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
@@ -3768,6 +3082,76 @@
 		}
 		break
 	}
+	// match: (LEAL1 [off1] {sym1} x (LEAL1 [off2] {sym2} y y))
+	// cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
+	// result: (LEAL2 [off1+off2] {mergeSym(sym1, sym2)} x y)
+	for {
+		off1 := v.AuxInt
+		sym1 := v.Aux
+		for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
+			x := v_0
+			if v_1.Op != Op386LEAL1 {
+				continue
+			}
+			off2 := v_1.AuxInt
+			sym2 := v_1.Aux
+			y := v_1.Args[1]
+			if y != v_1.Args[0] || !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+				continue
+			}
+			v.reset(Op386LEAL2)
+			v.AuxInt = off1 + off2
+			v.Aux = mergeSym(sym1, sym2)
+			v.AddArg2(x, y)
+			return true
+		}
+		break
+	}
+	// match: (LEAL1 [off1] {sym1} x (LEAL1 [off2] {sym2} x y))
+	// cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
+	// result: (LEAL2 [off1+off2] {mergeSym(sym1, sym2)} y x)
+	for {
+		off1 := v.AuxInt
+		sym1 := v.Aux
+		for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
+			x := v_0
+			if v_1.Op != Op386LEAL1 {
+				continue
+			}
+			off2 := v_1.AuxInt
+			sym2 := v_1.Aux
+			_ = v_1.Args[1]
+			v_1_0 := v_1.Args[0]
+			v_1_1 := v_1.Args[1]
+			for _i1 := 0; _i1 <= 1; _i1, v_1_0, v_1_1 = _i1+1, v_1_1, v_1_0 {
+				if x != v_1_0 {
+					continue
+				}
+				y := v_1_1
+				if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+					continue
+				}
+				v.reset(Op386LEAL2)
+				v.AuxInt = off1 + off2
+				v.Aux = mergeSym(sym1, sym2)
+				v.AddArg2(y, x)
+				return true
+			}
+		}
+		break
+	}
+	// match: (LEAL1 [0] {nil} x y)
+	// result: (ADDL x y)
+	for {
+		if v.AuxInt != 0 || v.Aux != nil {
+			break
+		}
+		x := v_0
+		y := v_1
+		v.reset(Op386ADDL)
+		v.AddArg2(x, y)
+		return true
+	}
 	return false
 }
 func rewriteValue386_Op386LEAL2(v *Value) bool {
@@ -3869,6 +3253,30 @@
 		v.AddArg2(x, y)
 		return true
 	}
+	// match: (LEAL2 [off1] {sym} x (LEAL1 [off2] {nil} y y))
+	// cond: is32Bit(off1+2*off2)
+	// result: (LEAL4 [off1+2*off2] {sym} x y)
+	for {
+		off1 := v.AuxInt
+		sym := v.Aux
+		x := v_0
+		if v_1.Op != Op386LEAL1 {
+			break
+		}
+		off2 := v_1.AuxInt
+		if v_1.Aux != nil {
+			break
+		}
+		y := v_1.Args[1]
+		if y != v_1.Args[0] || !(is32Bit(off1 + 2*off2)) {
+			break
+		}
+		v.reset(Op386LEAL4)
+		v.AuxInt = off1 + 2*off2
+		v.Aux = sym
+		v.AddArg2(x, y)
+		return true
+	}
 	return false
 }
 func rewriteValue386_Op386LEAL4(v *Value) bool {
@@ -3954,6 +3362,30 @@
 		v.AddArg2(x, y)
 		return true
 	}
+	// match: (LEAL4 [off1] {sym} x (LEAL1 [off2] {nil} y y))
+	// cond: is32Bit(off1+4*off2)
+	// result: (LEAL8 [off1+4*off2] {sym} x y)
+	for {
+		off1 := v.AuxInt
+		sym := v.Aux
+		x := v_0
+		if v_1.Op != Op386LEAL1 {
+			break
+		}
+		off2 := v_1.AuxInt
+		if v_1.Aux != nil {
+			break
+		}
+		y := v_1.Args[1]
+		if y != v_1.Args[0] || !(is32Bit(off1 + 4*off2)) {
+			break
+		}
+		v.reset(Op386LEAL8)
+		v.AuxInt = off1 + 4*off2
+		v.Aux = sym
+		v.AddArg2(x, y)
+		return true
+	}
 	return false
 }
 func rewriteValue386_Op386LEAL8(v *Value) bool {
@@ -4146,30 +3578,6 @@
 		v0.AddArg2(ptr, mem)
 		return true
 	}
-	// match: (MOVBLZX x:(MOVBloadidx1 [off] {sym} ptr idx mem))
-	// cond: x.Uses == 1 && clobber(x)
-	// result: @x.Block (MOVBloadidx1 <v.Type> [off] {sym} ptr idx mem)
-	for {
-		x := v_0
-		if x.Op != Op386MOVBloadidx1 {
-			break
-		}
-		off := x.AuxInt
-		sym := x.Aux
-		mem := x.Args[2]
-		ptr := x.Args[0]
-		idx := x.Args[1]
-		if !(x.Uses == 1 && clobber(x)) {
-			break
-		}
-		b = x.Block
-		v0 := b.NewValue0(v.Pos, Op386MOVBloadidx1, v.Type)
-		v.copyOf(v0)
-		v0.AuxInt = off
-		v0.Aux = sym
-		v0.AddArg3(ptr, idx, mem)
-		return true
-	}
 	// match: (MOVBLZX (ANDLconst [c] x))
 	// result: (ANDLconst [c & 0xff] x)
 	for {
@@ -4254,56 +3662,6 @@
 		v.AddArg2(base, mem)
 		return true
 	}
-	// match: (MOVBload [off1] {sym1} (LEAL1 [off2] {sym2} ptr idx) mem)
-	// cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-	// result: (MOVBloadidx1 [off1+off2] {mergeSym(sym1,sym2)} ptr idx mem)
-	for {
-		off1 := v.AuxInt
-		sym1 := v.Aux
-		if v_0.Op != Op386LEAL1 {
-			break
-		}
-		off2 := v_0.AuxInt
-		sym2 := v_0.Aux
-		idx := v_0.Args[1]
-		ptr := v_0.Args[0]
-		mem := v_1
-		if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
-			break
-		}
-		v.reset(Op386MOVBloadidx1)
-		v.AuxInt = off1 + off2
-		v.Aux = mergeSym(sym1, sym2)
-		v.AddArg3(ptr, idx, mem)
-		return true
-	}
-	// match: (MOVBload [off] {sym} (ADDL ptr idx) mem)
-	// cond: ptr.Op != OpSB
-	// result: (MOVBloadidx1 [off] {sym} ptr idx mem)
-	for {
-		off := v.AuxInt
-		sym := v.Aux
-		if v_0.Op != Op386ADDL {
-			break
-		}
-		_ = v_0.Args[1]
-		v_0_0 := v_0.Args[0]
-		v_0_1 := v_0.Args[1]
-		for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 {
-			ptr := v_0_0
-			idx := v_0_1
-			mem := v_1
-			if !(ptr.Op != OpSB) {
-				continue
-			}
-			v.reset(Op386MOVBloadidx1)
-			v.AuxInt = off
-			v.Aux = sym
-			v.AddArg3(ptr, idx, mem)
-			return true
-		}
-		break
-	}
 	// match: (MOVBload [off] {sym} (SB) _)
 	// cond: symIsRO(sym)
 	// result: (MOVLconst [int64(read8(sym, off))])
@@ -4319,54 +3677,6 @@
 	}
 	return false
 }
-func rewriteValue386_Op386MOVBloadidx1(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	// match: (MOVBloadidx1 [c] {sym} (ADDLconst [d] ptr) idx mem)
-	// result: (MOVBloadidx1 [int64(int32(c+d))] {sym} ptr idx mem)
-	for {
-		c := v.AuxInt
-		sym := v.Aux
-		for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-			if v_0.Op != Op386ADDLconst {
-				continue
-			}
-			d := v_0.AuxInt
-			ptr := v_0.Args[0]
-			idx := v_1
-			mem := v_2
-			v.reset(Op386MOVBloadidx1)
-			v.AuxInt = int64(int32(c + d))
-			v.Aux = sym
-			v.AddArg3(ptr, idx, mem)
-			return true
-		}
-		break
-	}
-	// match: (MOVBloadidx1 [c] {sym} ptr (ADDLconst [d] idx) mem)
-	// result: (MOVBloadidx1 [int64(int32(c+d))] {sym} ptr idx mem)
-	for {
-		c := v.AuxInt
-		sym := v.Aux
-		for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-			ptr := v_0
-			if v_1.Op != Op386ADDLconst {
-				continue
-			}
-			d := v_1.AuxInt
-			idx := v_1.Args[0]
-			mem := v_2
-			v.reset(Op386MOVBloadidx1)
-			v.AuxInt = int64(int32(c + d))
-			v.Aux = sym
-			v.AddArg3(ptr, idx, mem)
-			return true
-		}
-		break
-	}
-	return false
-}
 func rewriteValue386_Op386MOVBstore(v *Value) bool {
 	v_2 := v.Args[2]
 	v_1 := v.Args[1]
@@ -4473,58 +3783,6 @@
 		v.AddArg3(base, val, mem)
 		return true
 	}
-	// match: (MOVBstore [off1] {sym1} (LEAL1 [off2] {sym2} ptr idx) val mem)
-	// cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-	// result: (MOVBstoreidx1 [off1+off2] {mergeSym(sym1,sym2)} ptr idx val mem)
-	for {
-		off1 := v.AuxInt
-		sym1 := v.Aux
-		if v_0.Op != Op386LEAL1 {
-			break
-		}
-		off2 := v_0.AuxInt
-		sym2 := v_0.Aux
-		idx := v_0.Args[1]
-		ptr := v_0.Args[0]
-		val := v_1
-		mem := v_2
-		if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
-			break
-		}
-		v.reset(Op386MOVBstoreidx1)
-		v.AuxInt = off1 + off2
-		v.Aux = mergeSym(sym1, sym2)
-		v.AddArg4(ptr, idx, val, mem)
-		return true
-	}
-	// match: (MOVBstore [off] {sym} (ADDL ptr idx) val mem)
-	// cond: ptr.Op != OpSB
-	// result: (MOVBstoreidx1 [off] {sym} ptr idx val mem)
-	for {
-		off := v.AuxInt
-		sym := v.Aux
-		if v_0.Op != Op386ADDL {
-			break
-		}
-		_ = v_0.Args[1]
-		v_0_0 := v_0.Args[0]
-		v_0_1 := v_0.Args[1]
-		for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 {
-			ptr := v_0_0
-			idx := v_0_1
-			val := v_1
-			mem := v_2
-			if !(ptr.Op != OpSB) {
-				continue
-			}
-			v.reset(Op386MOVBstoreidx1)
-			v.AuxInt = off
-			v.Aux = sym
-			v.AddArg4(ptr, idx, val, mem)
-			return true
-		}
-		break
-	}
 	// match: (MOVBstore [i] {s} p (SHRWconst [8] w) x:(MOVBstore [i-1] {s} p w mem))
 	// cond: x.Uses == 1 && clobber(x)
 	// result: (MOVWstore [i-1] {s} p w mem)
@@ -4657,6 +3915,134 @@
 		v.AddArg3(p, w0, mem)
 		return true
 	}
+	// match: (MOVBstore [i] {s} p1 (SHRWconst [8] w) x:(MOVBstore [i] {s} p0 w mem))
+	// cond: x.Uses == 1 && sequentialAddresses(p0, p1, 1) && clobber(x)
+	// result: (MOVWstore [i] {s} p0 w mem)
+	for {
+		i := v.AuxInt
+		s := v.Aux
+		p1 := v_0
+		if v_1.Op != Op386SHRWconst || v_1.AuxInt != 8 {
+			break
+		}
+		w := v_1.Args[0]
+		x := v_2
+		if x.Op != Op386MOVBstore || x.AuxInt != i || x.Aux != s {
+			break
+		}
+		mem := x.Args[2]
+		p0 := x.Args[0]
+		if w != x.Args[1] || !(x.Uses == 1 && sequentialAddresses(p0, p1, 1) && clobber(x)) {
+			break
+		}
+		v.reset(Op386MOVWstore)
+		v.AuxInt = i
+		v.Aux = s
+		v.AddArg3(p0, w, mem)
+		return true
+	}
+	// match: (MOVBstore [i] {s} p1 (SHRLconst [8] w) x:(MOVBstore [i] {s} p0 w mem))
+	// cond: x.Uses == 1 && sequentialAddresses(p0, p1, 1) && clobber(x)
+	// result: (MOVWstore [i] {s} p0 w mem)
+	for {
+		i := v.AuxInt
+		s := v.Aux
+		p1 := v_0
+		if v_1.Op != Op386SHRLconst || v_1.AuxInt != 8 {
+			break
+		}
+		w := v_1.Args[0]
+		x := v_2
+		if x.Op != Op386MOVBstore || x.AuxInt != i || x.Aux != s {
+			break
+		}
+		mem := x.Args[2]
+		p0 := x.Args[0]
+		if w != x.Args[1] || !(x.Uses == 1 && sequentialAddresses(p0, p1, 1) && clobber(x)) {
+			break
+		}
+		v.reset(Op386MOVWstore)
+		v.AuxInt = i
+		v.Aux = s
+		v.AddArg3(p0, w, mem)
+		return true
+	}
+	// match: (MOVBstore [i] {s} p0 w x:(MOVBstore {s} [i] p1 (SHRWconst [8] w) mem))
+	// cond: x.Uses == 1 && sequentialAddresses(p0, p1, 1) && clobber(x)
+	// result: (MOVWstore [i] {s} p0 w mem)
+	for {
+		i := v.AuxInt
+		s := v.Aux
+		p0 := v_0
+		w := v_1
+		x := v_2
+		if x.Op != Op386MOVBstore || x.AuxInt != i || x.Aux != s {
+			break
+		}
+		mem := x.Args[2]
+		p1 := x.Args[0]
+		x_1 := x.Args[1]
+		if x_1.Op != Op386SHRWconst || x_1.AuxInt != 8 || w != x_1.Args[0] || !(x.Uses == 1 && sequentialAddresses(p0, p1, 1) && clobber(x)) {
+			break
+		}
+		v.reset(Op386MOVWstore)
+		v.AuxInt = i
+		v.Aux = s
+		v.AddArg3(p0, w, mem)
+		return true
+	}
+	// match: (MOVBstore [i] {s} p0 w x:(MOVBstore {s} [i] p1 (SHRLconst [8] w) mem))
+	// cond: x.Uses == 1 && sequentialAddresses(p0, p1, 1) && clobber(x)
+	// result: (MOVWstore [i] {s} p0 w mem)
+	for {
+		i := v.AuxInt
+		s := v.Aux
+		p0 := v_0
+		w := v_1
+		x := v_2
+		if x.Op != Op386MOVBstore || x.AuxInt != i || x.Aux != s {
+			break
+		}
+		mem := x.Args[2]
+		p1 := x.Args[0]
+		x_1 := x.Args[1]
+		if x_1.Op != Op386SHRLconst || x_1.AuxInt != 8 || w != x_1.Args[0] || !(x.Uses == 1 && sequentialAddresses(p0, p1, 1) && clobber(x)) {
+			break
+		}
+		v.reset(Op386MOVWstore)
+		v.AuxInt = i
+		v.Aux = s
+		v.AddArg3(p0, w, mem)
+		return true
+	}
+	// match: (MOVBstore [i] {s} p1 (SHRLconst [j] w) x:(MOVBstore [i] {s} p0 w0:(SHRLconst [j-8] w) mem))
+	// cond: x.Uses == 1 && sequentialAddresses(p0, p1, 1) && clobber(x)
+	// result: (MOVWstore [i] {s} p0 w0 mem)
+	for {
+		i := v.AuxInt
+		s := v.Aux
+		p1 := v_0
+		if v_1.Op != Op386SHRLconst {
+			break
+		}
+		j := v_1.AuxInt
+		w := v_1.Args[0]
+		x := v_2
+		if x.Op != Op386MOVBstore || x.AuxInt != i || x.Aux != s {
+			break
+		}
+		mem := x.Args[2]
+		p0 := x.Args[0]
+		w0 := x.Args[1]
+		if w0.Op != Op386SHRLconst || w0.AuxInt != j-8 || w != w0.Args[0] || !(x.Uses == 1 && sequentialAddresses(p0, p1, 1) && clobber(x)) {
+			break
+		}
+		v.reset(Op386MOVWstore)
+		v.AuxInt = i
+		v.Aux = s
+		v.AddArg3(p0, w0, mem)
+		return true
+	}
 	return false
 }
 func rewriteValue386_Op386MOVBstoreconst(v *Value) bool {
@@ -4707,46 +4093,6 @@
 		v.AddArg2(ptr, mem)
 		return true
 	}
-	// match: (MOVBstoreconst [x] {sym1} (LEAL1 [off] {sym2} ptr idx) mem)
-	// cond: canMergeSym(sym1, sym2)
-	// result: (MOVBstoreconstidx1 [ValAndOff(x).add(off)] {mergeSym(sym1,sym2)} ptr idx mem)
-	for {
-		x := v.AuxInt
-		sym1 := v.Aux
-		if v_0.Op != Op386LEAL1 {
-			break
-		}
-		off := v_0.AuxInt
-		sym2 := v_0.Aux
-		idx := v_0.Args[1]
-		ptr := v_0.Args[0]
-		mem := v_1
-		if !(canMergeSym(sym1, sym2)) {
-			break
-		}
-		v.reset(Op386MOVBstoreconstidx1)
-		v.AuxInt = ValAndOff(x).add(off)
-		v.Aux = mergeSym(sym1, sym2)
-		v.AddArg3(ptr, idx, mem)
-		return true
-	}
-	// match: (MOVBstoreconst [x] {sym} (ADDL ptr idx) mem)
-	// result: (MOVBstoreconstidx1 [x] {sym} ptr idx mem)
-	for {
-		x := v.AuxInt
-		sym := v.Aux
-		if v_0.Op != Op386ADDL {
-			break
-		}
-		idx := v_0.Args[1]
-		ptr := v_0.Args[0]
-		mem := v_1
-		v.reset(Op386MOVBstoreconstidx1)
-		v.AuxInt = x
-		v.Aux = sym
-		v.AddArg3(ptr, idx, mem)
-		return true
-	}
 	// match: (MOVBstoreconst [c] {s} p x:(MOVBstoreconst [a] {s} p mem))
 	// cond: x.Uses == 1 && ValAndOff(a).Off() + 1 == ValAndOff(c).Off() && clobber(x)
 	// result: (MOVWstoreconst [makeValAndOff(ValAndOff(a).Val()&0xff | ValAndOff(c).Val()<<8, ValAndOff(a).Off())] {s} p mem)
@@ -4797,296 +4143,57 @@
 		v.AddArg2(p, mem)
 		return true
 	}
-	return false
-}
-func rewriteValue386_Op386MOVBstoreconstidx1(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	// match: (MOVBstoreconstidx1 [x] {sym} (ADDLconst [c] ptr) idx mem)
-	// result: (MOVBstoreconstidx1 [ValAndOff(x).add(c)] {sym} ptr idx mem)
-	for {
-		x := v.AuxInt
-		sym := v.Aux
-		if v_0.Op != Op386ADDLconst {
-			break
-		}
-		c := v_0.AuxInt
-		ptr := v_0.Args[0]
-		idx := v_1
-		mem := v_2
-		v.reset(Op386MOVBstoreconstidx1)
-		v.AuxInt = ValAndOff(x).add(c)
-		v.Aux = sym
-		v.AddArg3(ptr, idx, mem)
-		return true
-	}
-	// match: (MOVBstoreconstidx1 [x] {sym} ptr (ADDLconst [c] idx) mem)
-	// result: (MOVBstoreconstidx1 [ValAndOff(x).add(c)] {sym} ptr idx mem)
-	for {
-		x := v.AuxInt
-		sym := v.Aux
-		ptr := v_0
-		if v_1.Op != Op386ADDLconst {
-			break
-		}
-		c := v_1.AuxInt
-		idx := v_1.Args[0]
-		mem := v_2
-		v.reset(Op386MOVBstoreconstidx1)
-		v.AuxInt = ValAndOff(x).add(c)
-		v.Aux = sym
-		v.AddArg3(ptr, idx, mem)
-		return true
-	}
-	// match: (MOVBstoreconstidx1 [c] {s} p i x:(MOVBstoreconstidx1 [a] {s} p i mem))
-	// cond: x.Uses == 1 && ValAndOff(a).Off() + 1 == ValAndOff(c).Off() && clobber(x)
-	// result: (MOVWstoreconstidx1 [makeValAndOff(ValAndOff(a).Val()&0xff | ValAndOff(c).Val()<<8, ValAndOff(a).Off())] {s} p i mem)
+	// match: (MOVBstoreconst [c] {s} p1 x:(MOVBstoreconst [a] {s} p0 mem))
+	// cond: x.Uses == 1 && ValAndOff(a).Off() == ValAndOff(c).Off() && sequentialAddresses(p0, p1, 1) && clobber(x)
+	// result: (MOVWstoreconst [makeValAndOff(ValAndOff(a).Val()&0xff | ValAndOff(c).Val()<<8, ValAndOff(a).Off())] {s} p0 mem)
 	for {
 		c := v.AuxInt
 		s := v.Aux
-		p := v_0
-		i := v_1
-		x := v_2
-		if x.Op != Op386MOVBstoreconstidx1 {
+		p1 := v_0
+		x := v_1
+		if x.Op != Op386MOVBstoreconst {
 			break
 		}
 		a := x.AuxInt
 		if x.Aux != s {
 			break
 		}
-		mem := x.Args[2]
-		if p != x.Args[0] || i != x.Args[1] || !(x.Uses == 1 && ValAndOff(a).Off()+1 == ValAndOff(c).Off() && clobber(x)) {
+		mem := x.Args[1]
+		p0 := x.Args[0]
+		if !(x.Uses == 1 && ValAndOff(a).Off() == ValAndOff(c).Off() && sequentialAddresses(p0, p1, 1) && clobber(x)) {
 			break
 		}
-		v.reset(Op386MOVWstoreconstidx1)
+		v.reset(Op386MOVWstoreconst)
 		v.AuxInt = makeValAndOff(ValAndOff(a).Val()&0xff|ValAndOff(c).Val()<<8, ValAndOff(a).Off())
 		v.Aux = s
-		v.AddArg3(p, i, mem)
+		v.AddArg2(p0, mem)
 		return true
 	}
-	return false
-}
-func rewriteValue386_Op386MOVBstoreidx1(v *Value) bool {
-	v_3 := v.Args[3]
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	// match: (MOVBstoreidx1 [c] {sym} (ADDLconst [d] ptr) idx val mem)
-	// result: (MOVBstoreidx1 [int64(int32(c+d))] {sym} ptr idx val mem)
+	// match: (MOVBstoreconst [a] {s} p0 x:(MOVBstoreconst [c] {s} p1 mem))
+	// cond: x.Uses == 1 && ValAndOff(a).Off() == ValAndOff(c).Off() && sequentialAddresses(p0, p1, 1) && clobber(x)
+	// result: (MOVWstoreconst [makeValAndOff(ValAndOff(a).Val()&0xff | ValAndOff(c).Val()<<8, ValAndOff(a).Off())] {s} p0 mem)
 	for {
-		c := v.AuxInt
-		sym := v.Aux
-		for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-			if v_0.Op != Op386ADDLconst {
-				continue
-			}
-			d := v_0.AuxInt
-			ptr := v_0.Args[0]
-			idx := v_1
-			val := v_2
-			mem := v_3
-			v.reset(Op386MOVBstoreidx1)
-			v.AuxInt = int64(int32(c + d))
-			v.Aux = sym
-			v.AddArg4(ptr, idx, val, mem)
-			return true
-		}
-		break
-	}
-	// match: (MOVBstoreidx1 [c] {sym} ptr (ADDLconst [d] idx) val mem)
-	// result: (MOVBstoreidx1 [int64(int32(c+d))] {sym} ptr idx val mem)
-	for {
-		c := v.AuxInt
-		sym := v.Aux
-		for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-			ptr := v_0
-			if v_1.Op != Op386ADDLconst {
-				continue
-			}
-			d := v_1.AuxInt
-			idx := v_1.Args[0]
-			val := v_2
-			mem := v_3
-			v.reset(Op386MOVBstoreidx1)
-			v.AuxInt = int64(int32(c + d))
-			v.Aux = sym
-			v.AddArg4(ptr, idx, val, mem)
-			return true
-		}
-		break
-	}
-	// match: (MOVBstoreidx1 [i] {s} p idx (SHRLconst [8] w) x:(MOVBstoreidx1 [i-1] {s} p idx w mem))
-	// cond: x.Uses == 1 && clobber(x)
-	// result: (MOVWstoreidx1 [i-1] {s} p idx w mem)
-	for {
-		i := v.AuxInt
+		a := v.AuxInt
 		s := v.Aux
-		for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-			p := v_0
-			idx := v_1
-			if v_2.Op != Op386SHRLconst || v_2.AuxInt != 8 {
-				continue
-			}
-			w := v_2.Args[0]
-			x := v_3
-			if x.Op != Op386MOVBstoreidx1 || x.AuxInt != i-1 || x.Aux != s {
-				continue
-			}
-			mem := x.Args[3]
-			x_0 := x.Args[0]
-			x_1 := x.Args[1]
-			for _i1 := 0; _i1 <= 1; _i1, x_0, x_1 = _i1+1, x_1, x_0 {
-				if p != x_0 || idx != x_1 || w != x.Args[2] || !(x.Uses == 1 && clobber(x)) {
-					continue
-				}
-				v.reset(Op386MOVWstoreidx1)
-				v.AuxInt = i - 1
-				v.Aux = s
-				v.AddArg4(p, idx, w, mem)
-				return true
-			}
+		p0 := v_0
+		x := v_1
+		if x.Op != Op386MOVBstoreconst {
+			break
 		}
-		break
-	}
-	// match: (MOVBstoreidx1 [i] {s} p idx (SHRWconst [8] w) x:(MOVBstoreidx1 [i-1] {s} p idx w mem))
-	// cond: x.Uses == 1 && clobber(x)
-	// result: (MOVWstoreidx1 [i-1] {s} p idx w mem)
-	for {
-		i := v.AuxInt
-		s := v.Aux
-		for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-			p := v_0
-			idx := v_1
-			if v_2.Op != Op386SHRWconst || v_2.AuxInt != 8 {
-				continue
-			}
-			w := v_2.Args[0]
-			x := v_3
-			if x.Op != Op386MOVBstoreidx1 || x.AuxInt != i-1 || x.Aux != s {
-				continue
-			}
-			mem := x.Args[3]
-			x_0 := x.Args[0]
-			x_1 := x.Args[1]
-			for _i1 := 0; _i1 <= 1; _i1, x_0, x_1 = _i1+1, x_1, x_0 {
-				if p != x_0 || idx != x_1 || w != x.Args[2] || !(x.Uses == 1 && clobber(x)) {
-					continue
-				}
-				v.reset(Op386MOVWstoreidx1)
-				v.AuxInt = i - 1
-				v.Aux = s
-				v.AddArg4(p, idx, w, mem)
-				return true
-			}
+		c := x.AuxInt
+		if x.Aux != s {
+			break
 		}
-		break
-	}
-	// match: (MOVBstoreidx1 [i] {s} p idx w x:(MOVBstoreidx1 [i+1] {s} p idx (SHRLconst [8] w) mem))
-	// cond: x.Uses == 1 && clobber(x)
-	// result: (MOVWstoreidx1 [i] {s} p idx w mem)
-	for {
-		i := v.AuxInt
-		s := v.Aux
-		for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-			p := v_0
-			idx := v_1
-			w := v_2
-			x := v_3
-			if x.Op != Op386MOVBstoreidx1 || x.AuxInt != i+1 || x.Aux != s {
-				continue
-			}
-			mem := x.Args[3]
-			x_0 := x.Args[0]
-			x_1 := x.Args[1]
-			for _i1 := 0; _i1 <= 1; _i1, x_0, x_1 = _i1+1, x_1, x_0 {
-				if p != x_0 || idx != x_1 {
-					continue
-				}
-				x_2 := x.Args[2]
-				if x_2.Op != Op386SHRLconst || x_2.AuxInt != 8 || w != x_2.Args[0] || !(x.Uses == 1 && clobber(x)) {
-					continue
-				}
-				v.reset(Op386MOVWstoreidx1)
-				v.AuxInt = i
-				v.Aux = s
-				v.AddArg4(p, idx, w, mem)
-				return true
-			}
+		mem := x.Args[1]
+		p1 := x.Args[0]
+		if !(x.Uses == 1 && ValAndOff(a).Off() == ValAndOff(c).Off() && sequentialAddresses(p0, p1, 1) && clobber(x)) {
+			break
 		}
-		break
-	}
-	// match: (MOVBstoreidx1 [i] {s} p idx w x:(MOVBstoreidx1 [i+1] {s} p idx (SHRWconst [8] w) mem))
-	// cond: x.Uses == 1 && clobber(x)
-	// result: (MOVWstoreidx1 [i] {s} p idx w mem)
-	for {
-		i := v.AuxInt
-		s := v.Aux
-		for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-			p := v_0
-			idx := v_1
-			w := v_2
-			x := v_3
-			if x.Op != Op386MOVBstoreidx1 || x.AuxInt != i+1 || x.Aux != s {
-				continue
-			}
-			mem := x.Args[3]
-			x_0 := x.Args[0]
-			x_1 := x.Args[1]
-			for _i1 := 0; _i1 <= 1; _i1, x_0, x_1 = _i1+1, x_1, x_0 {
-				if p != x_0 || idx != x_1 {
-					continue
-				}
-				x_2 := x.Args[2]
-				if x_2.Op != Op386SHRWconst || x_2.AuxInt != 8 || w != x_2.Args[0] || !(x.Uses == 1 && clobber(x)) {
-					continue
-				}
-				v.reset(Op386MOVWstoreidx1)
-				v.AuxInt = i
-				v.Aux = s
-				v.AddArg4(p, idx, w, mem)
-				return true
-			}
-		}
-		break
-	}
-	// match: (MOVBstoreidx1 [i] {s} p idx (SHRLconst [j] w) x:(MOVBstoreidx1 [i-1] {s} p idx w0:(SHRLconst [j-8] w) mem))
-	// cond: x.Uses == 1 && clobber(x)
-	// result: (MOVWstoreidx1 [i-1] {s} p idx w0 mem)
-	for {
-		i := v.AuxInt
-		s := v.Aux
-		for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-			p := v_0
-			idx := v_1
-			if v_2.Op != Op386SHRLconst {
-				continue
-			}
-			j := v_2.AuxInt
-			w := v_2.Args[0]
-			x := v_3
-			if x.Op != Op386MOVBstoreidx1 || x.AuxInt != i-1 || x.Aux != s {
-				continue
-			}
-			mem := x.Args[3]
-			x_0 := x.Args[0]
-			x_1 := x.Args[1]
-			for _i1 := 0; _i1 <= 1; _i1, x_0, x_1 = _i1+1, x_1, x_0 {
-				if p != x_0 || idx != x_1 {
-					continue
-				}
-				w0 := x.Args[2]
-				if w0.Op != Op386SHRLconst || w0.AuxInt != j-8 || w != w0.Args[0] || !(x.Uses == 1 && clobber(x)) {
-					continue
-				}
-				v.reset(Op386MOVWstoreidx1)
-				v.AuxInt = i - 1
-				v.Aux = s
-				v.AddArg4(p, idx, w0, mem)
-				return true
-			}
-		}
-		break
+		v.reset(Op386MOVWstoreconst)
+		v.AuxInt = makeValAndOff(ValAndOff(a).Val()&0xff|ValAndOff(c).Val()<<8, ValAndOff(a).Off())
+		v.Aux = s
+		v.AddArg2(p0, mem)
+		return true
 	}
 	return false
 }
@@ -5158,79 +4265,6 @@
 		v.AddArg2(base, mem)
 		return true
 	}
-	// match: (MOVLload [off1] {sym1} (LEAL1 [off2] {sym2} ptr idx) mem)
-	// cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-	// result: (MOVLloadidx1 [off1+off2] {mergeSym(sym1,sym2)} ptr idx mem)
-	for {
-		off1 := v.AuxInt
-		sym1 := v.Aux
-		if v_0.Op != Op386LEAL1 {
-			break
-		}
-		off2 := v_0.AuxInt
-		sym2 := v_0.Aux
-		idx := v_0.Args[1]
-		ptr := v_0.Args[0]
-		mem := v_1
-		if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
-			break
-		}
-		v.reset(Op386MOVLloadidx1)
-		v.AuxInt = off1 + off2
-		v.Aux = mergeSym(sym1, sym2)
-		v.AddArg3(ptr, idx, mem)
-		return true
-	}
-	// match: (MOVLload [off1] {sym1} (LEAL4 [off2] {sym2} ptr idx) mem)
-	// cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-	// result: (MOVLloadidx4 [off1+off2] {mergeSym(sym1,sym2)} ptr idx mem)
-	for {
-		off1 := v.AuxInt
-		sym1 := v.Aux
-		if v_0.Op != Op386LEAL4 {
-			break
-		}
-		off2 := v_0.AuxInt
-		sym2 := v_0.Aux
-		idx := v_0.Args[1]
-		ptr := v_0.Args[0]
-		mem := v_1
-		if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
-			break
-		}
-		v.reset(Op386MOVLloadidx4)
-		v.AuxInt = off1 + off2
-		v.Aux = mergeSym(sym1, sym2)
-		v.AddArg3(ptr, idx, mem)
-		return true
-	}
-	// match: (MOVLload [off] {sym} (ADDL ptr idx) mem)
-	// cond: ptr.Op != OpSB
-	// result: (MOVLloadidx1 [off] {sym} ptr idx mem)
-	for {
-		off := v.AuxInt
-		sym := v.Aux
-		if v_0.Op != Op386ADDL {
-			break
-		}
-		_ = v_0.Args[1]
-		v_0_0 := v_0.Args[0]
-		v_0_1 := v_0.Args[1]
-		for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 {
-			ptr := v_0_0
-			idx := v_0_1
-			mem := v_1
-			if !(ptr.Op != OpSB) {
-				continue
-			}
-			v.reset(Op386MOVLloadidx1)
-			v.AuxInt = off
-			v.Aux = sym
-			v.AddArg3(ptr, idx, mem)
-			return true
-		}
-		break
-	}
 	// match: (MOVLload [off] {sym} (SB) _)
 	// cond: symIsRO(sym)
 	// result: (MOVLconst [int64(int32(read32(sym, off, config.ctxt.Arch.ByteOrder)))])
@@ -5246,116 +4280,6 @@
 	}
 	return false
 }
-func rewriteValue386_Op386MOVLloadidx1(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	// match: (MOVLloadidx1 [c] {sym} ptr (SHLLconst [2] idx) mem)
-	// result: (MOVLloadidx4 [c] {sym} ptr idx mem)
-	for {
-		c := v.AuxInt
-		sym := v.Aux
-		for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-			ptr := v_0
-			if v_1.Op != Op386SHLLconst || v_1.AuxInt != 2 {
-				continue
-			}
-			idx := v_1.Args[0]
-			mem := v_2
-			v.reset(Op386MOVLloadidx4)
-			v.AuxInt = c
-			v.Aux = sym
-			v.AddArg3(ptr, idx, mem)
-			return true
-		}
-		break
-	}
-	// match: (MOVLloadidx1 [c] {sym} (ADDLconst [d] ptr) idx mem)
-	// result: (MOVLloadidx1 [int64(int32(c+d))] {sym} ptr idx mem)
-	for {
-		c := v.AuxInt
-		sym := v.Aux
-		for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-			if v_0.Op != Op386ADDLconst {
-				continue
-			}
-			d := v_0.AuxInt
-			ptr := v_0.Args[0]
-			idx := v_1
-			mem := v_2
-			v.reset(Op386MOVLloadidx1)
-			v.AuxInt = int64(int32(c + d))
-			v.Aux = sym
-			v.AddArg3(ptr, idx, mem)
-			return true
-		}
-		break
-	}
-	// match: (MOVLloadidx1 [c] {sym} ptr (ADDLconst [d] idx) mem)
-	// result: (MOVLloadidx1 [int64(int32(c+d))] {sym} ptr idx mem)
-	for {
-		c := v.AuxInt
-		sym := v.Aux
-		for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-			ptr := v_0
-			if v_1.Op != Op386ADDLconst {
-				continue
-			}
-			d := v_1.AuxInt
-			idx := v_1.Args[0]
-			mem := v_2
-			v.reset(Op386MOVLloadidx1)
-			v.AuxInt = int64(int32(c + d))
-			v.Aux = sym
-			v.AddArg3(ptr, idx, mem)
-			return true
-		}
-		break
-	}
-	return false
-}
-func rewriteValue386_Op386MOVLloadidx4(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	// match: (MOVLloadidx4 [c] {sym} (ADDLconst [d] ptr) idx mem)
-	// result: (MOVLloadidx4 [int64(int32(c+d))] {sym} ptr idx mem)
-	for {
-		c := v.AuxInt
-		sym := v.Aux
-		if v_0.Op != Op386ADDLconst {
-			break
-		}
-		d := v_0.AuxInt
-		ptr := v_0.Args[0]
-		idx := v_1
-		mem := v_2
-		v.reset(Op386MOVLloadidx4)
-		v.AuxInt = int64(int32(c + d))
-		v.Aux = sym
-		v.AddArg3(ptr, idx, mem)
-		return true
-	}
-	// match: (MOVLloadidx4 [c] {sym} ptr (ADDLconst [d] idx) mem)
-	// result: (MOVLloadidx4 [int64(int32(c+4*d))] {sym} ptr idx mem)
-	for {
-		c := v.AuxInt
-		sym := v.Aux
-		ptr := v_0
-		if v_1.Op != Op386ADDLconst {
-			break
-		}
-		d := v_1.AuxInt
-		idx := v_1.Args[0]
-		mem := v_2
-		v.reset(Op386MOVLloadidx4)
-		v.AuxInt = int64(int32(c + 4*d))
-		v.Aux = sym
-		v.AddArg3(ptr, idx, mem)
-		return true
-	}
-	return false
-}
 func rewriteValue386_Op386MOVLstore(v *Value) bool {
 	v_2 := v.Args[2]
 	v_1 := v.Args[1]
@@ -5428,82 +4352,6 @@
 		v.AddArg3(base, val, mem)
 		return true
 	}
-	// match: (MOVLstore [off1] {sym1} (LEAL1 [off2] {sym2} ptr idx) val mem)
-	// cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-	// result: (MOVLstoreidx1 [off1+off2] {mergeSym(sym1,sym2)} ptr idx val mem)
-	for {
-		off1 := v.AuxInt
-		sym1 := v.Aux
-		if v_0.Op != Op386LEAL1 {
-			break
-		}
-		off2 := v_0.AuxInt
-		sym2 := v_0.Aux
-		idx := v_0.Args[1]
-		ptr := v_0.Args[0]
-		val := v_1
-		mem := v_2
-		if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
-			break
-		}
-		v.reset(Op386MOVLstoreidx1)
-		v.AuxInt = off1 + off2
-		v.Aux = mergeSym(sym1, sym2)
-		v.AddArg4(ptr, idx, val, mem)
-		return true
-	}
-	// match: (MOVLstore [off1] {sym1} (LEAL4 [off2] {sym2} ptr idx) val mem)
-	// cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-	// result: (MOVLstoreidx4 [off1+off2] {mergeSym(sym1,sym2)} ptr idx val mem)
-	for {
-		off1 := v.AuxInt
-		sym1 := v.Aux
-		if v_0.Op != Op386LEAL4 {
-			break
-		}
-		off2 := v_0.AuxInt
-		sym2 := v_0.Aux
-		idx := v_0.Args[1]
-		ptr := v_0.Args[0]
-		val := v_1
-		mem := v_2
-		if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
-			break
-		}
-		v.reset(Op386MOVLstoreidx4)
-		v.AuxInt = off1 + off2
-		v.Aux = mergeSym(sym1, sym2)
-		v.AddArg4(ptr, idx, val, mem)
-		return true
-	}
-	// match: (MOVLstore [off] {sym} (ADDL ptr idx) val mem)
-	// cond: ptr.Op != OpSB
-	// result: (MOVLstoreidx1 [off] {sym} ptr idx val mem)
-	for {
-		off := v.AuxInt
-		sym := v.Aux
-		if v_0.Op != Op386ADDL {
-			break
-		}
-		_ = v_0.Args[1]
-		v_0_0 := v_0.Args[0]
-		v_0_1 := v_0.Args[1]
-		for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 {
-			ptr := v_0_0
-			idx := v_0_1
-			val := v_1
-			mem := v_2
-			if !(ptr.Op != OpSB) {
-				continue
-			}
-			v.reset(Op386MOVLstoreidx1)
-			v.AuxInt = off
-			v.Aux = sym
-			v.AddArg4(ptr, idx, val, mem)
-			return true
-		}
-		break
-	}
 	// match: (MOVLstore {sym} [off] ptr y:(ADDLload x [off] {sym} ptr mem) mem)
 	// cond: y.Uses==1 && clobber(y)
 	// result: (ADDLmodify [off] {sym} ptr x mem)
@@ -5912,658 +4760,6 @@
 		v.AddArg2(ptr, mem)
 		return true
 	}
-	// match: (MOVLstoreconst [x] {sym1} (LEAL1 [off] {sym2} ptr idx) mem)
-	// cond: canMergeSym(sym1, sym2)
-	// result: (MOVLstoreconstidx1 [ValAndOff(x).add(off)] {mergeSym(sym1,sym2)} ptr idx mem)
-	for {
-		x := v.AuxInt
-		sym1 := v.Aux
-		if v_0.Op != Op386LEAL1 {
-			break
-		}
-		off := v_0.AuxInt
-		sym2 := v_0.Aux
-		idx := v_0.Args[1]
-		ptr := v_0.Args[0]
-		mem := v_1
-		if !(canMergeSym(sym1, sym2)) {
-			break
-		}
-		v.reset(Op386MOVLstoreconstidx1)
-		v.AuxInt = ValAndOff(x).add(off)
-		v.Aux = mergeSym(sym1, sym2)
-		v.AddArg3(ptr, idx, mem)
-		return true
-	}
-	// match: (MOVLstoreconst [x] {sym1} (LEAL4 [off] {sym2} ptr idx) mem)
-	// cond: canMergeSym(sym1, sym2)
-	// result: (MOVLstoreconstidx4 [ValAndOff(x).add(off)] {mergeSym(sym1,sym2)} ptr idx mem)
-	for {
-		x := v.AuxInt
-		sym1 := v.Aux
-		if v_0.Op != Op386LEAL4 {
-			break
-		}
-		off := v_0.AuxInt
-		sym2 := v_0.Aux
-		idx := v_0.Args[1]
-		ptr := v_0.Args[0]
-		mem := v_1
-		if !(canMergeSym(sym1, sym2)) {
-			break
-		}
-		v.reset(Op386MOVLstoreconstidx4)
-		v.AuxInt = ValAndOff(x).add(off)
-		v.Aux = mergeSym(sym1, sym2)
-		v.AddArg3(ptr, idx, mem)
-		return true
-	}
-	// match: (MOVLstoreconst [x] {sym} (ADDL ptr idx) mem)
-	// result: (MOVLstoreconstidx1 [x] {sym} ptr idx mem)
-	for {
-		x := v.AuxInt
-		sym := v.Aux
-		if v_0.Op != Op386ADDL {
-			break
-		}
-		idx := v_0.Args[1]
-		ptr := v_0.Args[0]
-		mem := v_1
-		v.reset(Op386MOVLstoreconstidx1)
-		v.AuxInt = x
-		v.Aux = sym
-		v.AddArg3(ptr, idx, mem)
-		return true
-	}
-	return false
-}
-func rewriteValue386_Op386MOVLstoreconstidx1(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	// match: (MOVLstoreconstidx1 [c] {sym} ptr (SHLLconst [2] idx) mem)
-	// result: (MOVLstoreconstidx4 [c] {sym} ptr idx mem)
-	for {
-		c := v.AuxInt
-		sym := v.Aux
-		ptr := v_0
-		if v_1.Op != Op386SHLLconst || v_1.AuxInt != 2 {
-			break
-		}
-		idx := v_1.Args[0]
-		mem := v_2
-		v.reset(Op386MOVLstoreconstidx4)
-		v.AuxInt = c
-		v.Aux = sym
-		v.AddArg3(ptr, idx, mem)
-		return true
-	}
-	// match: (MOVLstoreconstidx1 [x] {sym} (ADDLconst [c] ptr) idx mem)
-	// result: (MOVLstoreconstidx1 [ValAndOff(x).add(c)] {sym} ptr idx mem)
-	for {
-		x := v.AuxInt
-		sym := v.Aux
-		if v_0.Op != Op386ADDLconst {
-			break
-		}
-		c := v_0.AuxInt
-		ptr := v_0.Args[0]
-		idx := v_1
-		mem := v_2
-		v.reset(Op386MOVLstoreconstidx1)
-		v.AuxInt = ValAndOff(x).add(c)
-		v.Aux = sym
-		v.AddArg3(ptr, idx, mem)
-		return true
-	}
-	// match: (MOVLstoreconstidx1 [x] {sym} ptr (ADDLconst [c] idx) mem)
-	// result: (MOVLstoreconstidx1 [ValAndOff(x).add(c)] {sym} ptr idx mem)
-	for {
-		x := v.AuxInt
-		sym := v.Aux
-		ptr := v_0
-		if v_1.Op != Op386ADDLconst {
-			break
-		}
-		c := v_1.AuxInt
-		idx := v_1.Args[0]
-		mem := v_2
-		v.reset(Op386MOVLstoreconstidx1)
-		v.AuxInt = ValAndOff(x).add(c)
-		v.Aux = sym
-		v.AddArg3(ptr, idx, mem)
-		return true
-	}
-	return false
-}
-func rewriteValue386_Op386MOVLstoreconstidx4(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	// match: (MOVLstoreconstidx4 [x] {sym} (ADDLconst [c] ptr) idx mem)
-	// result: (MOVLstoreconstidx4 [ValAndOff(x).add(c)] {sym} ptr idx mem)
-	for {
-		x := v.AuxInt
-		sym := v.Aux
-		if v_0.Op != Op386ADDLconst {
-			break
-		}
-		c := v_0.AuxInt
-		ptr := v_0.Args[0]
-		idx := v_1
-		mem := v_2
-		v.reset(Op386MOVLstoreconstidx4)
-		v.AuxInt = ValAndOff(x).add(c)
-		v.Aux = sym
-		v.AddArg3(ptr, idx, mem)
-		return true
-	}
-	// match: (MOVLstoreconstidx4 [x] {sym} ptr (ADDLconst [c] idx) mem)
-	// result: (MOVLstoreconstidx4 [ValAndOff(x).add(4*c)] {sym} ptr idx mem)
-	for {
-		x := v.AuxInt
-		sym := v.Aux
-		ptr := v_0
-		if v_1.Op != Op386ADDLconst {
-			break
-		}
-		c := v_1.AuxInt
-		idx := v_1.Args[0]
-		mem := v_2
-		v.reset(Op386MOVLstoreconstidx4)
-		v.AuxInt = ValAndOff(x).add(4 * c)
-		v.Aux = sym
-		v.AddArg3(ptr, idx, mem)
-		return true
-	}
-	return false
-}
-func rewriteValue386_Op386MOVLstoreidx1(v *Value) bool {
-	v_3 := v.Args[3]
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	// match: (MOVLstoreidx1 [c] {sym} ptr (SHLLconst [2] idx) val mem)
-	// result: (MOVLstoreidx4 [c] {sym} ptr idx val mem)
-	for {
-		c := v.AuxInt
-		sym := v.Aux
-		for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-			ptr := v_0
-			if v_1.Op != Op386SHLLconst || v_1.AuxInt != 2 {
-				continue
-			}
-			idx := v_1.Args[0]
-			val := v_2
-			mem := v_3
-			v.reset(Op386MOVLstoreidx4)
-			v.AuxInt = c
-			v.Aux = sym
-			v.AddArg4(ptr, idx, val, mem)
-			return true
-		}
-		break
-	}
-	// match: (MOVLstoreidx1 [c] {sym} (ADDLconst [d] ptr) idx val mem)
-	// result: (MOVLstoreidx1 [int64(int32(c+d))] {sym} ptr idx val mem)
-	for {
-		c := v.AuxInt
-		sym := v.Aux
-		for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-			if v_0.Op != Op386ADDLconst {
-				continue
-			}
-			d := v_0.AuxInt
-			ptr := v_0.Args[0]
-			idx := v_1
-			val := v_2
-			mem := v_3
-			v.reset(Op386MOVLstoreidx1)
-			v.AuxInt = int64(int32(c + d))
-			v.Aux = sym
-			v.AddArg4(ptr, idx, val, mem)
-			return true
-		}
-		break
-	}
-	// match: (MOVLstoreidx1 [c] {sym} ptr (ADDLconst [d] idx) val mem)
-	// result: (MOVLstoreidx1 [int64(int32(c+d))] {sym} ptr idx val mem)
-	for {
-		c := v.AuxInt
-		sym := v.Aux
-		for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-			ptr := v_0
-			if v_1.Op != Op386ADDLconst {
-				continue
-			}
-			d := v_1.AuxInt
-			idx := v_1.Args[0]
-			val := v_2
-			mem := v_3
-			v.reset(Op386MOVLstoreidx1)
-			v.AuxInt = int64(int32(c + d))
-			v.Aux = sym
-			v.AddArg4(ptr, idx, val, mem)
-			return true
-		}
-		break
-	}
-	return false
-}
-func rewriteValue386_Op386MOVLstoreidx4(v *Value) bool {
-	v_3 := v.Args[3]
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	// match: (MOVLstoreidx4 [c] {sym} (ADDLconst [d] ptr) idx val mem)
-	// result: (MOVLstoreidx4 [int64(int32(c+d))] {sym} ptr idx val mem)
-	for {
-		c := v.AuxInt
-		sym := v.Aux
-		if v_0.Op != Op386ADDLconst {
-			break
-		}
-		d := v_0.AuxInt
-		ptr := v_0.Args[0]
-		idx := v_1
-		val := v_2
-		mem := v_3
-		v.reset(Op386MOVLstoreidx4)
-		v.AuxInt = int64(int32(c + d))
-		v.Aux = sym
-		v.AddArg4(ptr, idx, val, mem)
-		return true
-	}
-	// match: (MOVLstoreidx4 [c] {sym} ptr (ADDLconst [d] idx) val mem)
-	// result: (MOVLstoreidx4 [int64(int32(c+4*d))] {sym} ptr idx val mem)
-	for {
-		c := v.AuxInt
-		sym := v.Aux
-		ptr := v_0
-		if v_1.Op != Op386ADDLconst {
-			break
-		}
-		d := v_1.AuxInt
-		idx := v_1.Args[0]
-		val := v_2
-		mem := v_3
-		v.reset(Op386MOVLstoreidx4)
-		v.AuxInt = int64(int32(c + 4*d))
-		v.Aux = sym
-		v.AddArg4(ptr, idx, val, mem)
-		return true
-	}
-	// match: (MOVLstoreidx4 {sym} [off] ptr idx y:(ADDLloadidx4 x [off] {sym} ptr idx mem) mem)
-	// cond: y.Uses==1 && clobber(y)
-	// result: (ADDLmodifyidx4 [off] {sym} ptr idx x mem)
-	for {
-		off := v.AuxInt
-		sym := v.Aux
-		ptr := v_0
-		idx := v_1
-		y := v_2
-		if y.Op != Op386ADDLloadidx4 || y.AuxInt != off || y.Aux != sym {
-			break
-		}
-		mem := y.Args[3]
-		x := y.Args[0]
-		if ptr != y.Args[1] || idx != y.Args[2] || mem != v_3 || !(y.Uses == 1 && clobber(y)) {
-			break
-		}
-		v.reset(Op386ADDLmodifyidx4)
-		v.AuxInt = off
-		v.Aux = sym
-		v.AddArg4(ptr, idx, x, mem)
-		return true
-	}
-	// match: (MOVLstoreidx4 {sym} [off] ptr idx y:(ANDLloadidx4 x [off] {sym} ptr idx mem) mem)
-	// cond: y.Uses==1 && clobber(y)
-	// result: (ANDLmodifyidx4 [off] {sym} ptr idx x mem)
-	for {
-		off := v.AuxInt
-		sym := v.Aux
-		ptr := v_0
-		idx := v_1
-		y := v_2
-		if y.Op != Op386ANDLloadidx4 || y.AuxInt != off || y.Aux != sym {
-			break
-		}
-		mem := y.Args[3]
-		x := y.Args[0]
-		if ptr != y.Args[1] || idx != y.Args[2] || mem != v_3 || !(y.Uses == 1 && clobber(y)) {
-			break
-		}
-		v.reset(Op386ANDLmodifyidx4)
-		v.AuxInt = off
-		v.Aux = sym
-		v.AddArg4(ptr, idx, x, mem)
-		return true
-	}
-	// match: (MOVLstoreidx4 {sym} [off] ptr idx y:(ORLloadidx4 x [off] {sym} ptr idx mem) mem)
-	// cond: y.Uses==1 && clobber(y)
-	// result: (ORLmodifyidx4 [off] {sym} ptr idx x mem)
-	for {
-		off := v.AuxInt
-		sym := v.Aux
-		ptr := v_0
-		idx := v_1
-		y := v_2
-		if y.Op != Op386ORLloadidx4 || y.AuxInt != off || y.Aux != sym {
-			break
-		}
-		mem := y.Args[3]
-		x := y.Args[0]
-		if ptr != y.Args[1] || idx != y.Args[2] || mem != v_3 || !(y.Uses == 1 && clobber(y)) {
-			break
-		}
-		v.reset(Op386ORLmodifyidx4)
-		v.AuxInt = off
-		v.Aux = sym
-		v.AddArg4(ptr, idx, x, mem)
-		return true
-	}
-	// match: (MOVLstoreidx4 {sym} [off] ptr idx y:(XORLloadidx4 x [off] {sym} ptr idx mem) mem)
-	// cond: y.Uses==1 && clobber(y)
-	// result: (XORLmodifyidx4 [off] {sym} ptr idx x mem)
-	for {
-		off := v.AuxInt
-		sym := v.Aux
-		ptr := v_0
-		idx := v_1
-		y := v_2
-		if y.Op != Op386XORLloadidx4 || y.AuxInt != off || y.Aux != sym {
-			break
-		}
-		mem := y.Args[3]
-		x := y.Args[0]
-		if ptr != y.Args[1] || idx != y.Args[2] || mem != v_3 || !(y.Uses == 1 && clobber(y)) {
-			break
-		}
-		v.reset(Op386XORLmodifyidx4)
-		v.AuxInt = off
-		v.Aux = sym
-		v.AddArg4(ptr, idx, x, mem)
-		return true
-	}
-	// match: (MOVLstoreidx4 {sym} [off] ptr idx y:(ADDL l:(MOVLloadidx4 [off] {sym} ptr idx mem) x) mem)
-	// cond: y.Uses==1 && l.Uses==1 && clobber(y, l)
-	// result: (ADDLmodifyidx4 [off] {sym} ptr idx x mem)
-	for {
-		off := v.AuxInt
-		sym := v.Aux
-		ptr := v_0
-		idx := v_1
-		y := v_2
-		if y.Op != Op386ADDL {
-			break
-		}
-		_ = y.Args[1]
-		y_0 := y.Args[0]
-		y_1 := y.Args[1]
-		for _i0 := 0; _i0 <= 1; _i0, y_0, y_1 = _i0+1, y_1, y_0 {
-			l := y_0
-			if l.Op != Op386MOVLloadidx4 || l.AuxInt != off || l.Aux != sym {
-				continue
-			}
-			mem := l.Args[2]
-			if ptr != l.Args[0] || idx != l.Args[1] {
-				continue
-			}
-			x := y_1
-			if mem != v_3 || !(y.Uses == 1 && l.Uses == 1 && clobber(y, l)) {
-				continue
-			}
-			v.reset(Op386ADDLmodifyidx4)
-			v.AuxInt = off
-			v.Aux = sym
-			v.AddArg4(ptr, idx, x, mem)
-			return true
-		}
-		break
-	}
-	// match: (MOVLstoreidx4 {sym} [off] ptr idx y:(SUBL l:(MOVLloadidx4 [off] {sym} ptr idx mem) x) mem)
-	// cond: y.Uses==1 && l.Uses==1 && clobber(y, l)
-	// result: (SUBLmodifyidx4 [off] {sym} ptr idx x mem)
-	for {
-		off := v.AuxInt
-		sym := v.Aux
-		ptr := v_0
-		idx := v_1
-		y := v_2
-		if y.Op != Op386SUBL {
-			break
-		}
-		x := y.Args[1]
-		l := y.Args[0]
-		if l.Op != Op386MOVLloadidx4 || l.AuxInt != off || l.Aux != sym {
-			break
-		}
-		mem := l.Args[2]
-		if ptr != l.Args[0] || idx != l.Args[1] || mem != v_3 || !(y.Uses == 1 && l.Uses == 1 && clobber(y, l)) {
-			break
-		}
-		v.reset(Op386SUBLmodifyidx4)
-		v.AuxInt = off
-		v.Aux = sym
-		v.AddArg4(ptr, idx, x, mem)
-		return true
-	}
-	// match: (MOVLstoreidx4 {sym} [off] ptr idx y:(ANDL l:(MOVLloadidx4 [off] {sym} ptr idx mem) x) mem)
-	// cond: y.Uses==1 && l.Uses==1 && clobber(y, l)
-	// result: (ANDLmodifyidx4 [off] {sym} ptr idx x mem)
-	for {
-		off := v.AuxInt
-		sym := v.Aux
-		ptr := v_0
-		idx := v_1
-		y := v_2
-		if y.Op != Op386ANDL {
-			break
-		}
-		_ = y.Args[1]
-		y_0 := y.Args[0]
-		y_1 := y.Args[1]
-		for _i0 := 0; _i0 <= 1; _i0, y_0, y_1 = _i0+1, y_1, y_0 {
-			l := y_0
-			if l.Op != Op386MOVLloadidx4 || l.AuxInt != off || l.Aux != sym {
-				continue
-			}
-			mem := l.Args[2]
-			if ptr != l.Args[0] || idx != l.Args[1] {
-				continue
-			}
-			x := y_1
-			if mem != v_3 || !(y.Uses == 1 && l.Uses == 1 && clobber(y, l)) {
-				continue
-			}
-			v.reset(Op386ANDLmodifyidx4)
-			v.AuxInt = off
-			v.Aux = sym
-			v.AddArg4(ptr, idx, x, mem)
-			return true
-		}
-		break
-	}
-	// match: (MOVLstoreidx4 {sym} [off] ptr idx y:(ORL l:(MOVLloadidx4 [off] {sym} ptr idx mem) x) mem)
-	// cond: y.Uses==1 && l.Uses==1 && clobber(y, l)
-	// result: (ORLmodifyidx4 [off] {sym} ptr idx x mem)
-	for {
-		off := v.AuxInt
-		sym := v.Aux
-		ptr := v_0
-		idx := v_1
-		y := v_2
-		if y.Op != Op386ORL {
-			break
-		}
-		_ = y.Args[1]
-		y_0 := y.Args[0]
-		y_1 := y.Args[1]
-		for _i0 := 0; _i0 <= 1; _i0, y_0, y_1 = _i0+1, y_1, y_0 {
-			l := y_0
-			if l.Op != Op386MOVLloadidx4 || l.AuxInt != off || l.Aux != sym {
-				continue
-			}
-			mem := l.Args[2]
-			if ptr != l.Args[0] || idx != l.Args[1] {
-				continue
-			}
-			x := y_1
-			if mem != v_3 || !(y.Uses == 1 && l.Uses == 1 && clobber(y, l)) {
-				continue
-			}
-			v.reset(Op386ORLmodifyidx4)
-			v.AuxInt = off
-			v.Aux = sym
-			v.AddArg4(ptr, idx, x, mem)
-			return true
-		}
-		break
-	}
-	// match: (MOVLstoreidx4 {sym} [off] ptr idx y:(XORL l:(MOVLloadidx4 [off] {sym} ptr idx mem) x) mem)
-	// cond: y.Uses==1 && l.Uses==1 && clobber(y, l)
-	// result: (XORLmodifyidx4 [off] {sym} ptr idx x mem)
-	for {
-		off := v.AuxInt
-		sym := v.Aux
-		ptr := v_0
-		idx := v_1
-		y := v_2
-		if y.Op != Op386XORL {
-			break
-		}
-		_ = y.Args[1]
-		y_0 := y.Args[0]
-		y_1 := y.Args[1]
-		for _i0 := 0; _i0 <= 1; _i0, y_0, y_1 = _i0+1, y_1, y_0 {
-			l := y_0
-			if l.Op != Op386MOVLloadidx4 || l.AuxInt != off || l.Aux != sym {
-				continue
-			}
-			mem := l.Args[2]
-			if ptr != l.Args[0] || idx != l.Args[1] {
-				continue
-			}
-			x := y_1
-			if mem != v_3 || !(y.Uses == 1 && l.Uses == 1 && clobber(y, l)) {
-				continue
-			}
-			v.reset(Op386XORLmodifyidx4)
-			v.AuxInt = off
-			v.Aux = sym
-			v.AddArg4(ptr, idx, x, mem)
-			return true
-		}
-		break
-	}
-	// match: (MOVLstoreidx4 {sym} [off] ptr idx y:(ADDLconst [c] l:(MOVLloadidx4 [off] {sym} ptr idx mem)) mem)
-	// cond: y.Uses==1 && l.Uses==1 && clobber(y, l) && validValAndOff(c,off)
-	// result: (ADDLconstmodifyidx4 [makeValAndOff(c,off)] {sym} ptr idx mem)
-	for {
-		off := v.AuxInt
-		sym := v.Aux
-		ptr := v_0
-		idx := v_1
-		y := v_2
-		if y.Op != Op386ADDLconst {
-			break
-		}
-		c := y.AuxInt
-		l := y.Args[0]
-		if l.Op != Op386MOVLloadidx4 || l.AuxInt != off || l.Aux != sym {
-			break
-		}
-		mem := l.Args[2]
-		if ptr != l.Args[0] || idx != l.Args[1] || mem != v_3 || !(y.Uses == 1 && l.Uses == 1 && clobber(y, l) && validValAndOff(c, off)) {
-			break
-		}
-		v.reset(Op386ADDLconstmodifyidx4)
-		v.AuxInt = makeValAndOff(c, off)
-		v.Aux = sym
-		v.AddArg3(ptr, idx, mem)
-		return true
-	}
-	// match: (MOVLstoreidx4 {sym} [off] ptr idx y:(ANDLconst [c] l:(MOVLloadidx4 [off] {sym} ptr idx mem)) mem)
-	// cond: y.Uses==1 && l.Uses==1 && clobber(y, l) && validValAndOff(c,off)
-	// result: (ANDLconstmodifyidx4 [makeValAndOff(c,off)] {sym} ptr idx mem)
-	for {
-		off := v.AuxInt
-		sym := v.Aux
-		ptr := v_0
-		idx := v_1
-		y := v_2
-		if y.Op != Op386ANDLconst {
-			break
-		}
-		c := y.AuxInt
-		l := y.Args[0]
-		if l.Op != Op386MOVLloadidx4 || l.AuxInt != off || l.Aux != sym {
-			break
-		}
-		mem := l.Args[2]
-		if ptr != l.Args[0] || idx != l.Args[1] || mem != v_3 || !(y.Uses == 1 && l.Uses == 1 && clobber(y, l) && validValAndOff(c, off)) {
-			break
-		}
-		v.reset(Op386ANDLconstmodifyidx4)
-		v.AuxInt = makeValAndOff(c, off)
-		v.Aux = sym
-		v.AddArg3(ptr, idx, mem)
-		return true
-	}
-	// match: (MOVLstoreidx4 {sym} [off] ptr idx y:(ORLconst [c] l:(MOVLloadidx4 [off] {sym} ptr idx mem)) mem)
-	// cond: y.Uses==1 && l.Uses==1 && clobber(y, l) && validValAndOff(c,off)
-	// result: (ORLconstmodifyidx4 [makeValAndOff(c,off)] {sym} ptr idx mem)
-	for {
-		off := v.AuxInt
-		sym := v.Aux
-		ptr := v_0
-		idx := v_1
-		y := v_2
-		if y.Op != Op386ORLconst {
-			break
-		}
-		c := y.AuxInt
-		l := y.Args[0]
-		if l.Op != Op386MOVLloadidx4 || l.AuxInt != off || l.Aux != sym {
-			break
-		}
-		mem := l.Args[2]
-		if ptr != l.Args[0] || idx != l.Args[1] || mem != v_3 || !(y.Uses == 1 && l.Uses == 1 && clobber(y, l) && validValAndOff(c, off)) {
-			break
-		}
-		v.reset(Op386ORLconstmodifyidx4)
-		v.AuxInt = makeValAndOff(c, off)
-		v.Aux = sym
-		v.AddArg3(ptr, idx, mem)
-		return true
-	}
-	// match: (MOVLstoreidx4 {sym} [off] ptr idx y:(XORLconst [c] l:(MOVLloadidx4 [off] {sym} ptr idx mem)) mem)
-	// cond: y.Uses==1 && l.Uses==1 && clobber(y, l) && validValAndOff(c,off)
-	// result: (XORLconstmodifyidx4 [makeValAndOff(c,off)] {sym} ptr idx mem)
-	for {
-		off := v.AuxInt
-		sym := v.Aux
-		ptr := v_0
-		idx := v_1
-		y := v_2
-		if y.Op != Op386XORLconst {
-			break
-		}
-		c := y.AuxInt
-		l := y.Args[0]
-		if l.Op != Op386MOVLloadidx4 || l.AuxInt != off || l.Aux != sym {
-			break
-		}
-		mem := l.Args[2]
-		if ptr != l.Args[0] || idx != l.Args[1] || mem != v_3 || !(y.Uses == 1 && l.Uses == 1 && clobber(y, l) && validValAndOff(c, off)) {
-			break
-		}
-		v.reset(Op386XORLconstmodifyidx4)
-		v.AuxInt = makeValAndOff(c, off)
-		v.Aux = sym
-		v.AddArg3(ptr, idx, mem)
-		return true
-	}
 	return false
 }
 func rewriteValue386_Op386MOVSDconst(v *Value) bool {
@@ -6634,163 +4830,6 @@
 		v.AddArg2(base, mem)
 		return true
 	}
-	// match: (MOVSDload [off1] {sym1} (LEAL1 [off2] {sym2} ptr idx) mem)
-	// cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-	// result: (MOVSDloadidx1 [off1+off2] {mergeSym(sym1,sym2)} ptr idx mem)
-	for {
-		off1 := v.AuxInt
-		sym1 := v.Aux
-		if v_0.Op != Op386LEAL1 {
-			break
-		}
-		off2 := v_0.AuxInt
-		sym2 := v_0.Aux
-		idx := v_0.Args[1]
-		ptr := v_0.Args[0]
-		mem := v_1
-		if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
-			break
-		}
-		v.reset(Op386MOVSDloadidx1)
-		v.AuxInt = off1 + off2
-		v.Aux = mergeSym(sym1, sym2)
-		v.AddArg3(ptr, idx, mem)
-		return true
-	}
-	// match: (MOVSDload [off1] {sym1} (LEAL8 [off2] {sym2} ptr idx) mem)
-	// cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-	// result: (MOVSDloadidx8 [off1+off2] {mergeSym(sym1,sym2)} ptr idx mem)
-	for {
-		off1 := v.AuxInt
-		sym1 := v.Aux
-		if v_0.Op != Op386LEAL8 {
-			break
-		}
-		off2 := v_0.AuxInt
-		sym2 := v_0.Aux
-		idx := v_0.Args[1]
-		ptr := v_0.Args[0]
-		mem := v_1
-		if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
-			break
-		}
-		v.reset(Op386MOVSDloadidx8)
-		v.AuxInt = off1 + off2
-		v.Aux = mergeSym(sym1, sym2)
-		v.AddArg3(ptr, idx, mem)
-		return true
-	}
-	// match: (MOVSDload [off] {sym} (ADDL ptr idx) mem)
-	// cond: ptr.Op != OpSB
-	// result: (MOVSDloadidx1 [off] {sym} ptr idx mem)
-	for {
-		off := v.AuxInt
-		sym := v.Aux
-		if v_0.Op != Op386ADDL {
-			break
-		}
-		_ = v_0.Args[1]
-		v_0_0 := v_0.Args[0]
-		v_0_1 := v_0.Args[1]
-		for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 {
-			ptr := v_0_0
-			idx := v_0_1
-			mem := v_1
-			if !(ptr.Op != OpSB) {
-				continue
-			}
-			v.reset(Op386MOVSDloadidx1)
-			v.AuxInt = off
-			v.Aux = sym
-			v.AddArg3(ptr, idx, mem)
-			return true
-		}
-		break
-	}
-	return false
-}
-func rewriteValue386_Op386MOVSDloadidx1(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	// match: (MOVSDloadidx1 [c] {sym} (ADDLconst [d] ptr) idx mem)
-	// result: (MOVSDloadidx1 [int64(int32(c+d))] {sym} ptr idx mem)
-	for {
-		c := v.AuxInt
-		sym := v.Aux
-		if v_0.Op != Op386ADDLconst {
-			break
-		}
-		d := v_0.AuxInt
-		ptr := v_0.Args[0]
-		idx := v_1
-		mem := v_2
-		v.reset(Op386MOVSDloadidx1)
-		v.AuxInt = int64(int32(c + d))
-		v.Aux = sym
-		v.AddArg3(ptr, idx, mem)
-		return true
-	}
-	// match: (MOVSDloadidx1 [c] {sym} ptr (ADDLconst [d] idx) mem)
-	// result: (MOVSDloadidx1 [int64(int32(c+d))] {sym} ptr idx mem)
-	for {
-		c := v.AuxInt
-		sym := v.Aux
-		ptr := v_0
-		if v_1.Op != Op386ADDLconst {
-			break
-		}
-		d := v_1.AuxInt
-		idx := v_1.Args[0]
-		mem := v_2
-		v.reset(Op386MOVSDloadidx1)
-		v.AuxInt = int64(int32(c + d))
-		v.Aux = sym
-		v.AddArg3(ptr, idx, mem)
-		return true
-	}
-	return false
-}
-func rewriteValue386_Op386MOVSDloadidx8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	// match: (MOVSDloadidx8 [c] {sym} (ADDLconst [d] ptr) idx mem)
-	// result: (MOVSDloadidx8 [int64(int32(c+d))] {sym} ptr idx mem)
-	for {
-		c := v.AuxInt
-		sym := v.Aux
-		if v_0.Op != Op386ADDLconst {
-			break
-		}
-		d := v_0.AuxInt
-		ptr := v_0.Args[0]
-		idx := v_1
-		mem := v_2
-		v.reset(Op386MOVSDloadidx8)
-		v.AuxInt = int64(int32(c + d))
-		v.Aux = sym
-		v.AddArg3(ptr, idx, mem)
-		return true
-	}
-	// match: (MOVSDloadidx8 [c] {sym} ptr (ADDLconst [d] idx) mem)
-	// result: (MOVSDloadidx8 [int64(int32(c+8*d))] {sym} ptr idx mem)
-	for {
-		c := v.AuxInt
-		sym := v.Aux
-		ptr := v_0
-		if v_1.Op != Op386ADDLconst {
-			break
-		}
-		d := v_1.AuxInt
-		idx := v_1.Args[0]
-		mem := v_2
-		v.reset(Op386MOVSDloadidx8)
-		v.AuxInt = int64(int32(c + 8*d))
-		v.Aux = sym
-		v.AddArg3(ptr, idx, mem)
-		return true
-	}
 	return false
 }
 func rewriteValue386_Op386MOVSDstore(v *Value) bool {
@@ -6844,172 +4883,6 @@
 		v.AddArg3(base, val, mem)
 		return true
 	}
-	// match: (MOVSDstore [off1] {sym1} (LEAL1 [off2] {sym2} ptr idx) val mem)
-	// cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-	// result: (MOVSDstoreidx1 [off1+off2] {mergeSym(sym1,sym2)} ptr idx val mem)
-	for {
-		off1 := v.AuxInt
-		sym1 := v.Aux
-		if v_0.Op != Op386LEAL1 {
-			break
-		}
-		off2 := v_0.AuxInt
-		sym2 := v_0.Aux
-		idx := v_0.Args[1]
-		ptr := v_0.Args[0]
-		val := v_1
-		mem := v_2
-		if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
-			break
-		}
-		v.reset(Op386MOVSDstoreidx1)
-		v.AuxInt = off1 + off2
-		v.Aux = mergeSym(sym1, sym2)
-		v.AddArg4(ptr, idx, val, mem)
-		return true
-	}
-	// match: (MOVSDstore [off1] {sym1} (LEAL8 [off2] {sym2} ptr idx) val mem)
-	// cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-	// result: (MOVSDstoreidx8 [off1+off2] {mergeSym(sym1,sym2)} ptr idx val mem)
-	for {
-		off1 := v.AuxInt
-		sym1 := v.Aux
-		if v_0.Op != Op386LEAL8 {
-			break
-		}
-		off2 := v_0.AuxInt
-		sym2 := v_0.Aux
-		idx := v_0.Args[1]
-		ptr := v_0.Args[0]
-		val := v_1
-		mem := v_2
-		if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
-			break
-		}
-		v.reset(Op386MOVSDstoreidx8)
-		v.AuxInt = off1 + off2
-		v.Aux = mergeSym(sym1, sym2)
-		v.AddArg4(ptr, idx, val, mem)
-		return true
-	}
-	// match: (MOVSDstore [off] {sym} (ADDL ptr idx) val mem)
-	// cond: ptr.Op != OpSB
-	// result: (MOVSDstoreidx1 [off] {sym} ptr idx val mem)
-	for {
-		off := v.AuxInt
-		sym := v.Aux
-		if v_0.Op != Op386ADDL {
-			break
-		}
-		_ = v_0.Args[1]
-		v_0_0 := v_0.Args[0]
-		v_0_1 := v_0.Args[1]
-		for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 {
-			ptr := v_0_0
-			idx := v_0_1
-			val := v_1
-			mem := v_2
-			if !(ptr.Op != OpSB) {
-				continue
-			}
-			v.reset(Op386MOVSDstoreidx1)
-			v.AuxInt = off
-			v.Aux = sym
-			v.AddArg4(ptr, idx, val, mem)
-			return true
-		}
-		break
-	}
-	return false
-}
-func rewriteValue386_Op386MOVSDstoreidx1(v *Value) bool {
-	v_3 := v.Args[3]
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	// match: (MOVSDstoreidx1 [c] {sym} (ADDLconst [d] ptr) idx val mem)
-	// result: (MOVSDstoreidx1 [int64(int32(c+d))] {sym} ptr idx val mem)
-	for {
-		c := v.AuxInt
-		sym := v.Aux
-		if v_0.Op != Op386ADDLconst {
-			break
-		}
-		d := v_0.AuxInt
-		ptr := v_0.Args[0]
-		idx := v_1
-		val := v_2
-		mem := v_3
-		v.reset(Op386MOVSDstoreidx1)
-		v.AuxInt = int64(int32(c + d))
-		v.Aux = sym
-		v.AddArg4(ptr, idx, val, mem)
-		return true
-	}
-	// match: (MOVSDstoreidx1 [c] {sym} ptr (ADDLconst [d] idx) val mem)
-	// result: (MOVSDstoreidx1 [int64(int32(c+d))] {sym} ptr idx val mem)
-	for {
-		c := v.AuxInt
-		sym := v.Aux
-		ptr := v_0
-		if v_1.Op != Op386ADDLconst {
-			break
-		}
-		d := v_1.AuxInt
-		idx := v_1.Args[0]
-		val := v_2
-		mem := v_3
-		v.reset(Op386MOVSDstoreidx1)
-		v.AuxInt = int64(int32(c + d))
-		v.Aux = sym
-		v.AddArg4(ptr, idx, val, mem)
-		return true
-	}
-	return false
-}
-func rewriteValue386_Op386MOVSDstoreidx8(v *Value) bool {
-	v_3 := v.Args[3]
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	// match: (MOVSDstoreidx8 [c] {sym} (ADDLconst [d] ptr) idx val mem)
-	// result: (MOVSDstoreidx8 [int64(int32(c+d))] {sym} ptr idx val mem)
-	for {
-		c := v.AuxInt
-		sym := v.Aux
-		if v_0.Op != Op386ADDLconst {
-			break
-		}
-		d := v_0.AuxInt
-		ptr := v_0.Args[0]
-		idx := v_1
-		val := v_2
-		mem := v_3
-		v.reset(Op386MOVSDstoreidx8)
-		v.AuxInt = int64(int32(c + d))
-		v.Aux = sym
-		v.AddArg4(ptr, idx, val, mem)
-		return true
-	}
-	// match: (MOVSDstoreidx8 [c] {sym} ptr (ADDLconst [d] idx) val mem)
-	// result: (MOVSDstoreidx8 [int64(int32(c+8*d))] {sym} ptr idx val mem)
-	for {
-		c := v.AuxInt
-		sym := v.Aux
-		ptr := v_0
-		if v_1.Op != Op386ADDLconst {
-			break
-		}
-		d := v_1.AuxInt
-		idx := v_1.Args[0]
-		val := v_2
-		mem := v_3
-		v.reset(Op386MOVSDstoreidx8)
-		v.AuxInt = int64(int32(c + 8*d))
-		v.Aux = sym
-		v.AddArg4(ptr, idx, val, mem)
-		return true
-	}
 	return false
 }
 func rewriteValue386_Op386MOVSSconst(v *Value) bool {
@@ -7080,163 +4953,6 @@
 		v.AddArg2(base, mem)
 		return true
 	}
-	// match: (MOVSSload [off1] {sym1} (LEAL1 [off2] {sym2} ptr idx) mem)
-	// cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-	// result: (MOVSSloadidx1 [off1+off2] {mergeSym(sym1,sym2)} ptr idx mem)
-	for {
-		off1 := v.AuxInt
-		sym1 := v.Aux
-		if v_0.Op != Op386LEAL1 {
-			break
-		}
-		off2 := v_0.AuxInt
-		sym2 := v_0.Aux
-		idx := v_0.Args[1]
-		ptr := v_0.Args[0]
-		mem := v_1
-		if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
-			break
-		}
-		v.reset(Op386MOVSSloadidx1)
-		v.AuxInt = off1 + off2
-		v.Aux = mergeSym(sym1, sym2)
-		v.AddArg3(ptr, idx, mem)
-		return true
-	}
-	// match: (MOVSSload [off1] {sym1} (LEAL4 [off2] {sym2} ptr idx) mem)
-	// cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-	// result: (MOVSSloadidx4 [off1+off2] {mergeSym(sym1,sym2)} ptr idx mem)
-	for {
-		off1 := v.AuxInt
-		sym1 := v.Aux
-		if v_0.Op != Op386LEAL4 {
-			break
-		}
-		off2 := v_0.AuxInt
-		sym2 := v_0.Aux
-		idx := v_0.Args[1]
-		ptr := v_0.Args[0]
-		mem := v_1
-		if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
-			break
-		}
-		v.reset(Op386MOVSSloadidx4)
-		v.AuxInt = off1 + off2
-		v.Aux = mergeSym(sym1, sym2)
-		v.AddArg3(ptr, idx, mem)
-		return true
-	}
-	// match: (MOVSSload [off] {sym} (ADDL ptr idx) mem)
-	// cond: ptr.Op != OpSB
-	// result: (MOVSSloadidx1 [off] {sym} ptr idx mem)
-	for {
-		off := v.AuxInt
-		sym := v.Aux
-		if v_0.Op != Op386ADDL {
-			break
-		}
-		_ = v_0.Args[1]
-		v_0_0 := v_0.Args[0]
-		v_0_1 := v_0.Args[1]
-		for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 {
-			ptr := v_0_0
-			idx := v_0_1
-			mem := v_1
-			if !(ptr.Op != OpSB) {
-				continue
-			}
-			v.reset(Op386MOVSSloadidx1)
-			v.AuxInt = off
-			v.Aux = sym
-			v.AddArg3(ptr, idx, mem)
-			return true
-		}
-		break
-	}
-	return false
-}
-func rewriteValue386_Op386MOVSSloadidx1(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	// match: (MOVSSloadidx1 [c] {sym} (ADDLconst [d] ptr) idx mem)
-	// result: (MOVSSloadidx1 [int64(int32(c+d))] {sym} ptr idx mem)
-	for {
-		c := v.AuxInt
-		sym := v.Aux
-		if v_0.Op != Op386ADDLconst {
-			break
-		}
-		d := v_0.AuxInt
-		ptr := v_0.Args[0]
-		idx := v_1
-		mem := v_2
-		v.reset(Op386MOVSSloadidx1)
-		v.AuxInt = int64(int32(c + d))
-		v.Aux = sym
-		v.AddArg3(ptr, idx, mem)
-		return true
-	}
-	// match: (MOVSSloadidx1 [c] {sym} ptr (ADDLconst [d] idx) mem)
-	// result: (MOVSSloadidx1 [int64(int32(c+d))] {sym} ptr idx mem)
-	for {
-		c := v.AuxInt
-		sym := v.Aux
-		ptr := v_0
-		if v_1.Op != Op386ADDLconst {
-			break
-		}
-		d := v_1.AuxInt
-		idx := v_1.Args[0]
-		mem := v_2
-		v.reset(Op386MOVSSloadidx1)
-		v.AuxInt = int64(int32(c + d))
-		v.Aux = sym
-		v.AddArg3(ptr, idx, mem)
-		return true
-	}
-	return false
-}
-func rewriteValue386_Op386MOVSSloadidx4(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	// match: (MOVSSloadidx4 [c] {sym} (ADDLconst [d] ptr) idx mem)
-	// result: (MOVSSloadidx4 [int64(int32(c+d))] {sym} ptr idx mem)
-	for {
-		c := v.AuxInt
-		sym := v.Aux
-		if v_0.Op != Op386ADDLconst {
-			break
-		}
-		d := v_0.AuxInt
-		ptr := v_0.Args[0]
-		idx := v_1
-		mem := v_2
-		v.reset(Op386MOVSSloadidx4)
-		v.AuxInt = int64(int32(c + d))
-		v.Aux = sym
-		v.AddArg3(ptr, idx, mem)
-		return true
-	}
-	// match: (MOVSSloadidx4 [c] {sym} ptr (ADDLconst [d] idx) mem)
-	// result: (MOVSSloadidx4 [int64(int32(c+4*d))] {sym} ptr idx mem)
-	for {
-		c := v.AuxInt
-		sym := v.Aux
-		ptr := v_0
-		if v_1.Op != Op386ADDLconst {
-			break
-		}
-		d := v_1.AuxInt
-		idx := v_1.Args[0]
-		mem := v_2
-		v.reset(Op386MOVSSloadidx4)
-		v.AuxInt = int64(int32(c + 4*d))
-		v.Aux = sym
-		v.AddArg3(ptr, idx, mem)
-		return true
-	}
 	return false
 }
 func rewriteValue386_Op386MOVSSstore(v *Value) bool {
@@ -7290,172 +5006,6 @@
 		v.AddArg3(base, val, mem)
 		return true
 	}
-	// match: (MOVSSstore [off1] {sym1} (LEAL1 [off2] {sym2} ptr idx) val mem)
-	// cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-	// result: (MOVSSstoreidx1 [off1+off2] {mergeSym(sym1,sym2)} ptr idx val mem)
-	for {
-		off1 := v.AuxInt
-		sym1 := v.Aux
-		if v_0.Op != Op386LEAL1 {
-			break
-		}
-		off2 := v_0.AuxInt
-		sym2 := v_0.Aux
-		idx := v_0.Args[1]
-		ptr := v_0.Args[0]
-		val := v_1
-		mem := v_2
-		if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
-			break
-		}
-		v.reset(Op386MOVSSstoreidx1)
-		v.AuxInt = off1 + off2
-		v.Aux = mergeSym(sym1, sym2)
-		v.AddArg4(ptr, idx, val, mem)
-		return true
-	}
-	// match: (MOVSSstore [off1] {sym1} (LEAL4 [off2] {sym2} ptr idx) val mem)
-	// cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-	// result: (MOVSSstoreidx4 [off1+off2] {mergeSym(sym1,sym2)} ptr idx val mem)
-	for {
-		off1 := v.AuxInt
-		sym1 := v.Aux
-		if v_0.Op != Op386LEAL4 {
-			break
-		}
-		off2 := v_0.AuxInt
-		sym2 := v_0.Aux
-		idx := v_0.Args[1]
-		ptr := v_0.Args[0]
-		val := v_1
-		mem := v_2
-		if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
-			break
-		}
-		v.reset(Op386MOVSSstoreidx4)
-		v.AuxInt = off1 + off2
-		v.Aux = mergeSym(sym1, sym2)
-		v.AddArg4(ptr, idx, val, mem)
-		return true
-	}
-	// match: (MOVSSstore [off] {sym} (ADDL ptr idx) val mem)
-	// cond: ptr.Op != OpSB
-	// result: (MOVSSstoreidx1 [off] {sym} ptr idx val mem)
-	for {
-		off := v.AuxInt
-		sym := v.Aux
-		if v_0.Op != Op386ADDL {
-			break
-		}
-		_ = v_0.Args[1]
-		v_0_0 := v_0.Args[0]
-		v_0_1 := v_0.Args[1]
-		for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 {
-			ptr := v_0_0
-			idx := v_0_1
-			val := v_1
-			mem := v_2
-			if !(ptr.Op != OpSB) {
-				continue
-			}
-			v.reset(Op386MOVSSstoreidx1)
-			v.AuxInt = off
-			v.Aux = sym
-			v.AddArg4(ptr, idx, val, mem)
-			return true
-		}
-		break
-	}
-	return false
-}
-func rewriteValue386_Op386MOVSSstoreidx1(v *Value) bool {
-	v_3 := v.Args[3]
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	// match: (MOVSSstoreidx1 [c] {sym} (ADDLconst [d] ptr) idx val mem)
-	// result: (MOVSSstoreidx1 [int64(int32(c+d))] {sym} ptr idx val mem)
-	for {
-		c := v.AuxInt
-		sym := v.Aux
-		if v_0.Op != Op386ADDLconst {
-			break
-		}
-		d := v_0.AuxInt
-		ptr := v_0.Args[0]
-		idx := v_1
-		val := v_2
-		mem := v_3
-		v.reset(Op386MOVSSstoreidx1)
-		v.AuxInt = int64(int32(c + d))
-		v.Aux = sym
-		v.AddArg4(ptr, idx, val, mem)
-		return true
-	}
-	// match: (MOVSSstoreidx1 [c] {sym} ptr (ADDLconst [d] idx) val mem)
-	// result: (MOVSSstoreidx1 [int64(int32(c+d))] {sym} ptr idx val mem)
-	for {
-		c := v.AuxInt
-		sym := v.Aux
-		ptr := v_0
-		if v_1.Op != Op386ADDLconst {
-			break
-		}
-		d := v_1.AuxInt
-		idx := v_1.Args[0]
-		val := v_2
-		mem := v_3
-		v.reset(Op386MOVSSstoreidx1)
-		v.AuxInt = int64(int32(c + d))
-		v.Aux = sym
-		v.AddArg4(ptr, idx, val, mem)
-		return true
-	}
-	return false
-}
-func rewriteValue386_Op386MOVSSstoreidx4(v *Value) bool {
-	v_3 := v.Args[3]
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	// match: (MOVSSstoreidx4 [c] {sym} (ADDLconst [d] ptr) idx val mem)
-	// result: (MOVSSstoreidx4 [int64(int32(c+d))] {sym} ptr idx val mem)
-	for {
-		c := v.AuxInt
-		sym := v.Aux
-		if v_0.Op != Op386ADDLconst {
-			break
-		}
-		d := v_0.AuxInt
-		ptr := v_0.Args[0]
-		idx := v_1
-		val := v_2
-		mem := v_3
-		v.reset(Op386MOVSSstoreidx4)
-		v.AuxInt = int64(int32(c + d))
-		v.Aux = sym
-		v.AddArg4(ptr, idx, val, mem)
-		return true
-	}
-	// match: (MOVSSstoreidx4 [c] {sym} ptr (ADDLconst [d] idx) val mem)
-	// result: (MOVSSstoreidx4 [int64(int32(c+4*d))] {sym} ptr idx val mem)
-	for {
-		c := v.AuxInt
-		sym := v.Aux
-		ptr := v_0
-		if v_1.Op != Op386ADDLconst {
-			break
-		}
-		d := v_1.AuxInt
-		idx := v_1.Args[0]
-		val := v_2
-		mem := v_3
-		v.reset(Op386MOVSSstoreidx4)
-		v.AuxInt = int64(int32(c + 4*d))
-		v.Aux = sym
-		v.AddArg4(ptr, idx, val, mem)
-		return true
-	}
 	return false
 }
 func rewriteValue386_Op386MOVWLSX(v *Value) bool {
@@ -7579,54 +5129,6 @@
 		v0.AddArg2(ptr, mem)
 		return true
 	}
-	// match: (MOVWLZX x:(MOVWloadidx1 [off] {sym} ptr idx mem))
-	// cond: x.Uses == 1 && clobber(x)
-	// result: @x.Block (MOVWloadidx1 <v.Type> [off] {sym} ptr idx mem)
-	for {
-		x := v_0
-		if x.Op != Op386MOVWloadidx1 {
-			break
-		}
-		off := x.AuxInt
-		sym := x.Aux
-		mem := x.Args[2]
-		ptr := x.Args[0]
-		idx := x.Args[1]
-		if !(x.Uses == 1 && clobber(x)) {
-			break
-		}
-		b = x.Block
-		v0 := b.NewValue0(v.Pos, Op386MOVWloadidx1, v.Type)
-		v.copyOf(v0)
-		v0.AuxInt = off
-		v0.Aux = sym
-		v0.AddArg3(ptr, idx, mem)
-		return true
-	}
-	// match: (MOVWLZX x:(MOVWloadidx2 [off] {sym} ptr idx mem))
-	// cond: x.Uses == 1 && clobber(x)
-	// result: @x.Block (MOVWloadidx2 <v.Type> [off] {sym} ptr idx mem)
-	for {
-		x := v_0
-		if x.Op != Op386MOVWloadidx2 {
-			break
-		}
-		off := x.AuxInt
-		sym := x.Aux
-		mem := x.Args[2]
-		ptr := x.Args[0]
-		idx := x.Args[1]
-		if !(x.Uses == 1 && clobber(x)) {
-			break
-		}
-		b = x.Block
-		v0 := b.NewValue0(v.Pos, Op386MOVWloadidx2, v.Type)
-		v.copyOf(v0)
-		v0.AuxInt = off
-		v0.Aux = sym
-		v0.AddArg3(ptr, idx, mem)
-		return true
-	}
 	// match: (MOVWLZX (ANDLconst [c] x))
 	// result: (ANDLconst [c & 0xffff] x)
 	for {
@@ -7711,79 +5213,6 @@
 		v.AddArg2(base, mem)
 		return true
 	}
-	// match: (MOVWload [off1] {sym1} (LEAL1 [off2] {sym2} ptr idx) mem)
-	// cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-	// result: (MOVWloadidx1 [off1+off2] {mergeSym(sym1,sym2)} ptr idx mem)
-	for {
-		off1 := v.AuxInt
-		sym1 := v.Aux
-		if v_0.Op != Op386LEAL1 {
-			break
-		}
-		off2 := v_0.AuxInt
-		sym2 := v_0.Aux
-		idx := v_0.Args[1]
-		ptr := v_0.Args[0]
-		mem := v_1
-		if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
-			break
-		}
-		v.reset(Op386MOVWloadidx1)
-		v.AuxInt = off1 + off2
-		v.Aux = mergeSym(sym1, sym2)
-		v.AddArg3(ptr, idx, mem)
-		return true
-	}
-	// match: (MOVWload [off1] {sym1} (LEAL2 [off2] {sym2} ptr idx) mem)
-	// cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-	// result: (MOVWloadidx2 [off1+off2] {mergeSym(sym1,sym2)} ptr idx mem)
-	for {
-		off1 := v.AuxInt
-		sym1 := v.Aux
-		if v_0.Op != Op386LEAL2 {
-			break
-		}
-		off2 := v_0.AuxInt
-		sym2 := v_0.Aux
-		idx := v_0.Args[1]
-		ptr := v_0.Args[0]
-		mem := v_1
-		if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
-			break
-		}
-		v.reset(Op386MOVWloadidx2)
-		v.AuxInt = off1 + off2
-		v.Aux = mergeSym(sym1, sym2)
-		v.AddArg3(ptr, idx, mem)
-		return true
-	}
-	// match: (MOVWload [off] {sym} (ADDL ptr idx) mem)
-	// cond: ptr.Op != OpSB
-	// result: (MOVWloadidx1 [off] {sym} ptr idx mem)
-	for {
-		off := v.AuxInt
-		sym := v.Aux
-		if v_0.Op != Op386ADDL {
-			break
-		}
-		_ = v_0.Args[1]
-		v_0_0 := v_0.Args[0]
-		v_0_1 := v_0.Args[1]
-		for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 {
-			ptr := v_0_0
-			idx := v_0_1
-			mem := v_1
-			if !(ptr.Op != OpSB) {
-				continue
-			}
-			v.reset(Op386MOVWloadidx1)
-			v.AuxInt = off
-			v.Aux = sym
-			v.AddArg3(ptr, idx, mem)
-			return true
-		}
-		break
-	}
 	// match: (MOVWload [off] {sym} (SB) _)
 	// cond: symIsRO(sym)
 	// result: (MOVLconst [int64(read16(sym, off, config.ctxt.Arch.ByteOrder))])
@@ -7799,116 +5228,6 @@
 	}
 	return false
 }
-func rewriteValue386_Op386MOVWloadidx1(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	// match: (MOVWloadidx1 [c] {sym} ptr (SHLLconst [1] idx) mem)
-	// result: (MOVWloadidx2 [c] {sym} ptr idx mem)
-	for {
-		c := v.AuxInt
-		sym := v.Aux
-		for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-			ptr := v_0
-			if v_1.Op != Op386SHLLconst || v_1.AuxInt != 1 {
-				continue
-			}
-			idx := v_1.Args[0]
-			mem := v_2
-			v.reset(Op386MOVWloadidx2)
-			v.AuxInt = c
-			v.Aux = sym
-			v.AddArg3(ptr, idx, mem)
-			return true
-		}
-		break
-	}
-	// match: (MOVWloadidx1 [c] {sym} (ADDLconst [d] ptr) idx mem)
-	// result: (MOVWloadidx1 [int64(int32(c+d))] {sym} ptr idx mem)
-	for {
-		c := v.AuxInt
-		sym := v.Aux
-		for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-			if v_0.Op != Op386ADDLconst {
-				continue
-			}
-			d := v_0.AuxInt
-			ptr := v_0.Args[0]
-			idx := v_1
-			mem := v_2
-			v.reset(Op386MOVWloadidx1)
-			v.AuxInt = int64(int32(c + d))
-			v.Aux = sym
-			v.AddArg3(ptr, idx, mem)
-			return true
-		}
-		break
-	}
-	// match: (MOVWloadidx1 [c] {sym} ptr (ADDLconst [d] idx) mem)
-	// result: (MOVWloadidx1 [int64(int32(c+d))] {sym} ptr idx mem)
-	for {
-		c := v.AuxInt
-		sym := v.Aux
-		for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-			ptr := v_0
-			if v_1.Op != Op386ADDLconst {
-				continue
-			}
-			d := v_1.AuxInt
-			idx := v_1.Args[0]
-			mem := v_2
-			v.reset(Op386MOVWloadidx1)
-			v.AuxInt = int64(int32(c + d))
-			v.Aux = sym
-			v.AddArg3(ptr, idx, mem)
-			return true
-		}
-		break
-	}
-	return false
-}
-func rewriteValue386_Op386MOVWloadidx2(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	// match: (MOVWloadidx2 [c] {sym} (ADDLconst [d] ptr) idx mem)
-	// result: (MOVWloadidx2 [int64(int32(c+d))] {sym} ptr idx mem)
-	for {
-		c := v.AuxInt
-		sym := v.Aux
-		if v_0.Op != Op386ADDLconst {
-			break
-		}
-		d := v_0.AuxInt
-		ptr := v_0.Args[0]
-		idx := v_1
-		mem := v_2
-		v.reset(Op386MOVWloadidx2)
-		v.AuxInt = int64(int32(c + d))
-		v.Aux = sym
-		v.AddArg3(ptr, idx, mem)
-		return true
-	}
-	// match: (MOVWloadidx2 [c] {sym} ptr (ADDLconst [d] idx) mem)
-	// result: (MOVWloadidx2 [int64(int32(c+2*d))] {sym} ptr idx mem)
-	for {
-		c := v.AuxInt
-		sym := v.Aux
-		ptr := v_0
-		if v_1.Op != Op386ADDLconst {
-			break
-		}
-		d := v_1.AuxInt
-		idx := v_1.Args[0]
-		mem := v_2
-		v.reset(Op386MOVWloadidx2)
-		v.AuxInt = int64(int32(c + 2*d))
-		v.Aux = sym
-		v.AddArg3(ptr, idx, mem)
-		return true
-	}
-	return false
-}
 func rewriteValue386_Op386MOVWstore(v *Value) bool {
 	v_2 := v.Args[2]
 	v_1 := v.Args[1]
@@ -8015,82 +5334,6 @@
 		v.AddArg3(base, val, mem)
 		return true
 	}
-	// match: (MOVWstore [off1] {sym1} (LEAL1 [off2] {sym2} ptr idx) val mem)
-	// cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-	// result: (MOVWstoreidx1 [off1+off2] {mergeSym(sym1,sym2)} ptr idx val mem)
-	for {
-		off1 := v.AuxInt
-		sym1 := v.Aux
-		if v_0.Op != Op386LEAL1 {
-			break
-		}
-		off2 := v_0.AuxInt
-		sym2 := v_0.Aux
-		idx := v_0.Args[1]
-		ptr := v_0.Args[0]
-		val := v_1
-		mem := v_2
-		if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
-			break
-		}
-		v.reset(Op386MOVWstoreidx1)
-		v.AuxInt = off1 + off2
-		v.Aux = mergeSym(sym1, sym2)
-		v.AddArg4(ptr, idx, val, mem)
-		return true
-	}
-	// match: (MOVWstore [off1] {sym1} (LEAL2 [off2] {sym2} ptr idx) val mem)
-	// cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-	// result: (MOVWstoreidx2 [off1+off2] {mergeSym(sym1,sym2)} ptr idx val mem)
-	for {
-		off1 := v.AuxInt
-		sym1 := v.Aux
-		if v_0.Op != Op386LEAL2 {
-			break
-		}
-		off2 := v_0.AuxInt
-		sym2 := v_0.Aux
-		idx := v_0.Args[1]
-		ptr := v_0.Args[0]
-		val := v_1
-		mem := v_2
-		if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
-			break
-		}
-		v.reset(Op386MOVWstoreidx2)
-		v.AuxInt = off1 + off2
-		v.Aux = mergeSym(sym1, sym2)
-		v.AddArg4(ptr, idx, val, mem)
-		return true
-	}
-	// match: (MOVWstore [off] {sym} (ADDL ptr idx) val mem)
-	// cond: ptr.Op != OpSB
-	// result: (MOVWstoreidx1 [off] {sym} ptr idx val mem)
-	for {
-		off := v.AuxInt
-		sym := v.Aux
-		if v_0.Op != Op386ADDL {
-			break
-		}
-		_ = v_0.Args[1]
-		v_0_0 := v_0.Args[0]
-		v_0_1 := v_0.Args[1]
-		for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 {
-			ptr := v_0_0
-			idx := v_0_1
-			val := v_1
-			mem := v_2
-			if !(ptr.Op != OpSB) {
-				continue
-			}
-			v.reset(Op386MOVWstoreidx1)
-			v.AuxInt = off
-			v.Aux = sym
-			v.AddArg4(ptr, idx, val, mem)
-			return true
-		}
-		break
-	}
 	// match: (MOVWstore [i] {s} p (SHRLconst [16] w) x:(MOVWstore [i-2] {s} p w mem))
 	// cond: x.Uses == 1 && clobber(x)
 	// result: (MOVLstore [i-2] {s} p w mem)
@@ -8146,6 +5389,60 @@
 		v.AddArg3(p, w0, mem)
 		return true
 	}
+	// match: (MOVWstore [i] {s} p1 (SHRLconst [16] w) x:(MOVWstore [i] {s} p0 w mem))
+	// cond: x.Uses == 1 && sequentialAddresses(p0, p1, 2) && clobber(x)
+	// result: (MOVLstore [i] {s} p0 w mem)
+	for {
+		i := v.AuxInt
+		s := v.Aux
+		p1 := v_0
+		if v_1.Op != Op386SHRLconst || v_1.AuxInt != 16 {
+			break
+		}
+		w := v_1.Args[0]
+		x := v_2
+		if x.Op != Op386MOVWstore || x.AuxInt != i || x.Aux != s {
+			break
+		}
+		mem := x.Args[2]
+		p0 := x.Args[0]
+		if w != x.Args[1] || !(x.Uses == 1 && sequentialAddresses(p0, p1, 2) && clobber(x)) {
+			break
+		}
+		v.reset(Op386MOVLstore)
+		v.AuxInt = i
+		v.Aux = s
+		v.AddArg3(p0, w, mem)
+		return true
+	}
+	// match: (MOVWstore [i] {s} p1 (SHRLconst [j] w) x:(MOVWstore [i] {s} p0 w0:(SHRLconst [j-16] w) mem))
+	// cond: x.Uses == 1 && sequentialAddresses(p0, p1, 2) && clobber(x)
+	// result: (MOVLstore [i] {s} p0 w0 mem)
+	for {
+		i := v.AuxInt
+		s := v.Aux
+		p1 := v_0
+		if v_1.Op != Op386SHRLconst {
+			break
+		}
+		j := v_1.AuxInt
+		w := v_1.Args[0]
+		x := v_2
+		if x.Op != Op386MOVWstore || x.AuxInt != i || x.Aux != s {
+			break
+		}
+		mem := x.Args[2]
+		p0 := x.Args[0]
+		w0 := x.Args[1]
+		if w0.Op != Op386SHRLconst || w0.AuxInt != j-16 || w != w0.Args[0] || !(x.Uses == 1 && sequentialAddresses(p0, p1, 2) && clobber(x)) {
+			break
+		}
+		v.reset(Op386MOVLstore)
+		v.AuxInt = i
+		v.Aux = s
+		v.AddArg3(p0, w0, mem)
+		return true
+	}
 	return false
 }
 func rewriteValue386_Op386MOVWstoreconst(v *Value) bool {
@@ -8196,69 +5493,6 @@
 		v.AddArg2(ptr, mem)
 		return true
 	}
-	// match: (MOVWstoreconst [x] {sym1} (LEAL1 [off] {sym2} ptr idx) mem)
-	// cond: canMergeSym(sym1, sym2)
-	// result: (MOVWstoreconstidx1 [ValAndOff(x).add(off)] {mergeSym(sym1,sym2)} ptr idx mem)
-	for {
-		x := v.AuxInt
-		sym1 := v.Aux
-		if v_0.Op != Op386LEAL1 {
-			break
-		}
-		off := v_0.AuxInt
-		sym2 := v_0.Aux
-		idx := v_0.Args[1]
-		ptr := v_0.Args[0]
-		mem := v_1
-		if !(canMergeSym(sym1, sym2)) {
-			break
-		}
-		v.reset(Op386MOVWstoreconstidx1)
-		v.AuxInt = ValAndOff(x).add(off)
-		v.Aux = mergeSym(sym1, sym2)
-		v.AddArg3(ptr, idx, mem)
-		return true
-	}
-	// match: (MOVWstoreconst [x] {sym1} (LEAL2 [off] {sym2} ptr idx) mem)
-	// cond: canMergeSym(sym1, sym2)
-	// result: (MOVWstoreconstidx2 [ValAndOff(x).add(off)] {mergeSym(sym1,sym2)} ptr idx mem)
-	for {
-		x := v.AuxInt
-		sym1 := v.Aux
-		if v_0.Op != Op386LEAL2 {
-			break
-		}
-		off := v_0.AuxInt
-		sym2 := v_0.Aux
-		idx := v_0.Args[1]
-		ptr := v_0.Args[0]
-		mem := v_1
-		if !(canMergeSym(sym1, sym2)) {
-			break
-		}
-		v.reset(Op386MOVWstoreconstidx2)
-		v.AuxInt = ValAndOff(x).add(off)
-		v.Aux = mergeSym(sym1, sym2)
-		v.AddArg3(ptr, idx, mem)
-		return true
-	}
-	// match: (MOVWstoreconst [x] {sym} (ADDL ptr idx) mem)
-	// result: (MOVWstoreconstidx1 [x] {sym} ptr idx mem)
-	for {
-		x := v.AuxInt
-		sym := v.Aux
-		if v_0.Op != Op386ADDL {
-			break
-		}
-		idx := v_0.Args[1]
-		ptr := v_0.Args[0]
-		mem := v_1
-		v.reset(Op386MOVWstoreconstidx1)
-		v.AuxInt = x
-		v.Aux = sym
-		v.AddArg3(ptr, idx, mem)
-		return true
-	}
 	// match: (MOVWstoreconst [c] {s} p x:(MOVWstoreconst [a] {s} p mem))
 	// cond: x.Uses == 1 && ValAndOff(a).Off() + 2 == ValAndOff(c).Off() && clobber(x)
 	// result: (MOVLstoreconst [makeValAndOff(ValAndOff(a).Val()&0xffff | ValAndOff(c).Val()<<16, ValAndOff(a).Off())] {s} p mem)
@@ -8309,413 +5543,56 @@
 		v.AddArg2(p, mem)
 		return true
 	}
-	return false
-}
-func rewriteValue386_Op386MOVWstoreconstidx1(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	// match: (MOVWstoreconstidx1 [c] {sym} ptr (SHLLconst [1] idx) mem)
-	// result: (MOVWstoreconstidx2 [c] {sym} ptr idx mem)
-	for {
-		c := v.AuxInt
-		sym := v.Aux
-		ptr := v_0
-		if v_1.Op != Op386SHLLconst || v_1.AuxInt != 1 {
-			break
-		}
-		idx := v_1.Args[0]
-		mem := v_2
-		v.reset(Op386MOVWstoreconstidx2)
-		v.AuxInt = c
-		v.Aux = sym
-		v.AddArg3(ptr, idx, mem)
-		return true
-	}
-	// match: (MOVWstoreconstidx1 [x] {sym} (ADDLconst [c] ptr) idx mem)
-	// result: (MOVWstoreconstidx1 [ValAndOff(x).add(c)] {sym} ptr idx mem)
-	for {
-		x := v.AuxInt
-		sym := v.Aux
-		if v_0.Op != Op386ADDLconst {
-			break
-		}
-		c := v_0.AuxInt
-		ptr := v_0.Args[0]
-		idx := v_1
-		mem := v_2
-		v.reset(Op386MOVWstoreconstidx1)
-		v.AuxInt = ValAndOff(x).add(c)
-		v.Aux = sym
-		v.AddArg3(ptr, idx, mem)
-		return true
-	}
-	// match: (MOVWstoreconstidx1 [x] {sym} ptr (ADDLconst [c] idx) mem)
-	// result: (MOVWstoreconstidx1 [ValAndOff(x).add(c)] {sym} ptr idx mem)
-	for {
-		x := v.AuxInt
-		sym := v.Aux
-		ptr := v_0
-		if v_1.Op != Op386ADDLconst {
-			break
-		}
-		c := v_1.AuxInt
-		idx := v_1.Args[0]
-		mem := v_2
-		v.reset(Op386MOVWstoreconstidx1)
-		v.AuxInt = ValAndOff(x).add(c)
-		v.Aux = sym
-		v.AddArg3(ptr, idx, mem)
-		return true
-	}
-	// match: (MOVWstoreconstidx1 [c] {s} p i x:(MOVWstoreconstidx1 [a] {s} p i mem))
-	// cond: x.Uses == 1 && ValAndOff(a).Off() + 2 == ValAndOff(c).Off() && clobber(x)
-	// result: (MOVLstoreconstidx1 [makeValAndOff(ValAndOff(a).Val()&0xffff | ValAndOff(c).Val()<<16, ValAndOff(a).Off())] {s} p i mem)
+	// match: (MOVWstoreconst [c] {s} p1 x:(MOVWstoreconst [a] {s} p0 mem))
+	// cond: x.Uses == 1 && ValAndOff(a).Off() == ValAndOff(c).Off() && sequentialAddresses(p0, p1, 2) && clobber(x)
+	// result: (MOVLstoreconst [makeValAndOff(ValAndOff(a).Val()&0xffff | ValAndOff(c).Val()<<16, ValAndOff(a).Off())] {s} p0 mem)
 	for {
 		c := v.AuxInt
 		s := v.Aux
-		p := v_0
-		i := v_1
-		x := v_2
-		if x.Op != Op386MOVWstoreconstidx1 {
+		p1 := v_0
+		x := v_1
+		if x.Op != Op386MOVWstoreconst {
 			break
 		}
 		a := x.AuxInt
 		if x.Aux != s {
 			break
 		}
-		mem := x.Args[2]
-		if p != x.Args[0] || i != x.Args[1] || !(x.Uses == 1 && ValAndOff(a).Off()+2 == ValAndOff(c).Off() && clobber(x)) {
+		mem := x.Args[1]
+		p0 := x.Args[0]
+		if !(x.Uses == 1 && ValAndOff(a).Off() == ValAndOff(c).Off() && sequentialAddresses(p0, p1, 2) && clobber(x)) {
 			break
 		}
-		v.reset(Op386MOVLstoreconstidx1)
+		v.reset(Op386MOVLstoreconst)
 		v.AuxInt = makeValAndOff(ValAndOff(a).Val()&0xffff|ValAndOff(c).Val()<<16, ValAndOff(a).Off())
 		v.Aux = s
-		v.AddArg3(p, i, mem)
+		v.AddArg2(p0, mem)
 		return true
 	}
-	return false
-}
-func rewriteValue386_Op386MOVWstoreconstidx2(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (MOVWstoreconstidx2 [x] {sym} (ADDLconst [c] ptr) idx mem)
-	// result: (MOVWstoreconstidx2 [ValAndOff(x).add(c)] {sym} ptr idx mem)
+	// match: (MOVWstoreconst [a] {s} p0 x:(MOVWstoreconst [c] {s} p1 mem))
+	// cond: x.Uses == 1 && ValAndOff(a).Off() == ValAndOff(c).Off() && sequentialAddresses(p0, p1, 2) && clobber(x)
+	// result: (MOVLstoreconst [makeValAndOff(ValAndOff(a).Val()&0xffff | ValAndOff(c).Val()<<16, ValAndOff(a).Off())] {s} p0 mem)
 	for {
-		x := v.AuxInt
-		sym := v.Aux
-		if v_0.Op != Op386ADDLconst {
-			break
-		}
-		c := v_0.AuxInt
-		ptr := v_0.Args[0]
-		idx := v_1
-		mem := v_2
-		v.reset(Op386MOVWstoreconstidx2)
-		v.AuxInt = ValAndOff(x).add(c)
-		v.Aux = sym
-		v.AddArg3(ptr, idx, mem)
-		return true
-	}
-	// match: (MOVWstoreconstidx2 [x] {sym} ptr (ADDLconst [c] idx) mem)
-	// result: (MOVWstoreconstidx2 [ValAndOff(x).add(2*c)] {sym} ptr idx mem)
-	for {
-		x := v.AuxInt
-		sym := v.Aux
-		ptr := v_0
-		if v_1.Op != Op386ADDLconst {
-			break
-		}
-		c := v_1.AuxInt
-		idx := v_1.Args[0]
-		mem := v_2
-		v.reset(Op386MOVWstoreconstidx2)
-		v.AuxInt = ValAndOff(x).add(2 * c)
-		v.Aux = sym
-		v.AddArg3(ptr, idx, mem)
-		return true
-	}
-	// match: (MOVWstoreconstidx2 [c] {s} p i x:(MOVWstoreconstidx2 [a] {s} p i mem))
-	// cond: x.Uses == 1 && ValAndOff(a).Off() + 2 == ValAndOff(c).Off() && clobber(x)
-	// result: (MOVLstoreconstidx1 [makeValAndOff(ValAndOff(a).Val()&0xffff | ValAndOff(c).Val()<<16, ValAndOff(a).Off())] {s} p (SHLLconst <i.Type> [1] i) mem)
-	for {
-		c := v.AuxInt
+		a := v.AuxInt
 		s := v.Aux
-		p := v_0
-		i := v_1
-		x := v_2
-		if x.Op != Op386MOVWstoreconstidx2 {
+		p0 := v_0
+		x := v_1
+		if x.Op != Op386MOVWstoreconst {
 			break
 		}
-		a := x.AuxInt
+		c := x.AuxInt
 		if x.Aux != s {
 			break
 		}
-		mem := x.Args[2]
-		if p != x.Args[0] || i != x.Args[1] || !(x.Uses == 1 && ValAndOff(a).Off()+2 == ValAndOff(c).Off() && clobber(x)) {
+		mem := x.Args[1]
+		p1 := x.Args[0]
+		if !(x.Uses == 1 && ValAndOff(a).Off() == ValAndOff(c).Off() && sequentialAddresses(p0, p1, 2) && clobber(x)) {
 			break
 		}
-		v.reset(Op386MOVLstoreconstidx1)
+		v.reset(Op386MOVLstoreconst)
 		v.AuxInt = makeValAndOff(ValAndOff(a).Val()&0xffff|ValAndOff(c).Val()<<16, ValAndOff(a).Off())
 		v.Aux = s
-		v0 := b.NewValue0(v.Pos, Op386SHLLconst, i.Type)
-		v0.AuxInt = 1
-		v0.AddArg(i)
-		v.AddArg3(p, v0, mem)
-		return true
-	}
-	return false
-}
-func rewriteValue386_Op386MOVWstoreidx1(v *Value) bool {
-	v_3 := v.Args[3]
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	// match: (MOVWstoreidx1 [c] {sym} ptr (SHLLconst [1] idx) val mem)
-	// result: (MOVWstoreidx2 [c] {sym} ptr idx val mem)
-	for {
-		c := v.AuxInt
-		sym := v.Aux
-		for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-			ptr := v_0
-			if v_1.Op != Op386SHLLconst || v_1.AuxInt != 1 {
-				continue
-			}
-			idx := v_1.Args[0]
-			val := v_2
-			mem := v_3
-			v.reset(Op386MOVWstoreidx2)
-			v.AuxInt = c
-			v.Aux = sym
-			v.AddArg4(ptr, idx, val, mem)
-			return true
-		}
-		break
-	}
-	// match: (MOVWstoreidx1 [c] {sym} (ADDLconst [d] ptr) idx val mem)
-	// result: (MOVWstoreidx1 [int64(int32(c+d))] {sym} ptr idx val mem)
-	for {
-		c := v.AuxInt
-		sym := v.Aux
-		for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-			if v_0.Op != Op386ADDLconst {
-				continue
-			}
-			d := v_0.AuxInt
-			ptr := v_0.Args[0]
-			idx := v_1
-			val := v_2
-			mem := v_3
-			v.reset(Op386MOVWstoreidx1)
-			v.AuxInt = int64(int32(c + d))
-			v.Aux = sym
-			v.AddArg4(ptr, idx, val, mem)
-			return true
-		}
-		break
-	}
-	// match: (MOVWstoreidx1 [c] {sym} ptr (ADDLconst [d] idx) val mem)
-	// result: (MOVWstoreidx1 [int64(int32(c+d))] {sym} ptr idx val mem)
-	for {
-		c := v.AuxInt
-		sym := v.Aux
-		for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-			ptr := v_0
-			if v_1.Op != Op386ADDLconst {
-				continue
-			}
-			d := v_1.AuxInt
-			idx := v_1.Args[0]
-			val := v_2
-			mem := v_3
-			v.reset(Op386MOVWstoreidx1)
-			v.AuxInt = int64(int32(c + d))
-			v.Aux = sym
-			v.AddArg4(ptr, idx, val, mem)
-			return true
-		}
-		break
-	}
-	// match: (MOVWstoreidx1 [i] {s} p idx (SHRLconst [16] w) x:(MOVWstoreidx1 [i-2] {s} p idx w mem))
-	// cond: x.Uses == 1 && clobber(x)
-	// result: (MOVLstoreidx1 [i-2] {s} p idx w mem)
-	for {
-		i := v.AuxInt
-		s := v.Aux
-		for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-			p := v_0
-			idx := v_1
-			if v_2.Op != Op386SHRLconst || v_2.AuxInt != 16 {
-				continue
-			}
-			w := v_2.Args[0]
-			x := v_3
-			if x.Op != Op386MOVWstoreidx1 || x.AuxInt != i-2 || x.Aux != s {
-				continue
-			}
-			mem := x.Args[3]
-			x_0 := x.Args[0]
-			x_1 := x.Args[1]
-			for _i1 := 0; _i1 <= 1; _i1, x_0, x_1 = _i1+1, x_1, x_0 {
-				if p != x_0 || idx != x_1 || w != x.Args[2] || !(x.Uses == 1 && clobber(x)) {
-					continue
-				}
-				v.reset(Op386MOVLstoreidx1)
-				v.AuxInt = i - 2
-				v.Aux = s
-				v.AddArg4(p, idx, w, mem)
-				return true
-			}
-		}
-		break
-	}
-	// match: (MOVWstoreidx1 [i] {s} p idx (SHRLconst [j] w) x:(MOVWstoreidx1 [i-2] {s} p idx w0:(SHRLconst [j-16] w) mem))
-	// cond: x.Uses == 1 && clobber(x)
-	// result: (MOVLstoreidx1 [i-2] {s} p idx w0 mem)
-	for {
-		i := v.AuxInt
-		s := v.Aux
-		for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-			p := v_0
-			idx := v_1
-			if v_2.Op != Op386SHRLconst {
-				continue
-			}
-			j := v_2.AuxInt
-			w := v_2.Args[0]
-			x := v_3
-			if x.Op != Op386MOVWstoreidx1 || x.AuxInt != i-2 || x.Aux != s {
-				continue
-			}
-			mem := x.Args[3]
-			x_0 := x.Args[0]
-			x_1 := x.Args[1]
-			for _i1 := 0; _i1 <= 1; _i1, x_0, x_1 = _i1+1, x_1, x_0 {
-				if p != x_0 || idx != x_1 {
-					continue
-				}
-				w0 := x.Args[2]
-				if w0.Op != Op386SHRLconst || w0.AuxInt != j-16 || w != w0.Args[0] || !(x.Uses == 1 && clobber(x)) {
-					continue
-				}
-				v.reset(Op386MOVLstoreidx1)
-				v.AuxInt = i - 2
-				v.Aux = s
-				v.AddArg4(p, idx, w0, mem)
-				return true
-			}
-		}
-		break
-	}
-	return false
-}
-func rewriteValue386_Op386MOVWstoreidx2(v *Value) bool {
-	v_3 := v.Args[3]
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (MOVWstoreidx2 [c] {sym} (ADDLconst [d] ptr) idx val mem)
-	// result: (MOVWstoreidx2 [int64(int32(c+d))] {sym} ptr idx val mem)
-	for {
-		c := v.AuxInt
-		sym := v.Aux
-		if v_0.Op != Op386ADDLconst {
-			break
-		}
-		d := v_0.AuxInt
-		ptr := v_0.Args[0]
-		idx := v_1
-		val := v_2
-		mem := v_3
-		v.reset(Op386MOVWstoreidx2)
-		v.AuxInt = int64(int32(c + d))
-		v.Aux = sym
-		v.AddArg4(ptr, idx, val, mem)
-		return true
-	}
-	// match: (MOVWstoreidx2 [c] {sym} ptr (ADDLconst [d] idx) val mem)
-	// result: (MOVWstoreidx2 [int64(int32(c+2*d))] {sym} ptr idx val mem)
-	for {
-		c := v.AuxInt
-		sym := v.Aux
-		ptr := v_0
-		if v_1.Op != Op386ADDLconst {
-			break
-		}
-		d := v_1.AuxInt
-		idx := v_1.Args[0]
-		val := v_2
-		mem := v_3
-		v.reset(Op386MOVWstoreidx2)
-		v.AuxInt = int64(int32(c + 2*d))
-		v.Aux = sym
-		v.AddArg4(ptr, idx, val, mem)
-		return true
-	}
-	// match: (MOVWstoreidx2 [i] {s} p idx (SHRLconst [16] w) x:(MOVWstoreidx2 [i-2] {s} p idx w mem))
-	// cond: x.Uses == 1 && clobber(x)
-	// result: (MOVLstoreidx1 [i-2] {s} p (SHLLconst <idx.Type> [1] idx) w mem)
-	for {
-		i := v.AuxInt
-		s := v.Aux
-		p := v_0
-		idx := v_1
-		if v_2.Op != Op386SHRLconst || v_2.AuxInt != 16 {
-			break
-		}
-		w := v_2.Args[0]
-		x := v_3
-		if x.Op != Op386MOVWstoreidx2 || x.AuxInt != i-2 || x.Aux != s {
-			break
-		}
-		mem := x.Args[3]
-		if p != x.Args[0] || idx != x.Args[1] || w != x.Args[2] || !(x.Uses == 1 && clobber(x)) {
-			break
-		}
-		v.reset(Op386MOVLstoreidx1)
-		v.AuxInt = i - 2
-		v.Aux = s
-		v0 := b.NewValue0(v.Pos, Op386SHLLconst, idx.Type)
-		v0.AuxInt = 1
-		v0.AddArg(idx)
-		v.AddArg4(p, v0, w, mem)
-		return true
-	}
-	// match: (MOVWstoreidx2 [i] {s} p idx (SHRLconst [j] w) x:(MOVWstoreidx2 [i-2] {s} p idx w0:(SHRLconst [j-16] w) mem))
-	// cond: x.Uses == 1 && clobber(x)
-	// result: (MOVLstoreidx1 [i-2] {s} p (SHLLconst <idx.Type> [1] idx) w0 mem)
-	for {
-		i := v.AuxInt
-		s := v.Aux
-		p := v_0
-		idx := v_1
-		if v_2.Op != Op386SHRLconst {
-			break
-		}
-		j := v_2.AuxInt
-		w := v_2.Args[0]
-		x := v_3
-		if x.Op != Op386MOVWstoreidx2 || x.AuxInt != i-2 || x.Aux != s {
-			break
-		}
-		mem := x.Args[3]
-		if p != x.Args[0] || idx != x.Args[1] {
-			break
-		}
-		w0 := x.Args[2]
-		if w0.Op != Op386SHRLconst || w0.AuxInt != j-16 || w != w0.Args[0] || !(x.Uses == 1 && clobber(x)) {
-			break
-		}
-		v.reset(Op386MOVLstoreidx1)
-		v.AuxInt = i - 2
-		v.Aux = s
-		v0 := b.NewValue0(v.Pos, Op386SHLLconst, idx.Type)
-		v0.AuxInt = 1
-		v0.AddArg(idx)
-		v.AddArg4(p, v0, w0, mem)
+		v.AddArg2(p0, mem)
 		return true
 	}
 	return false
@@ -8764,32 +5641,6 @@
 		}
 		break
 	}
-	// match: (MULL x l:(MOVLloadidx4 [off] {sym} ptr idx mem))
-	// cond: canMergeLoadClobber(v, l, x) && clobber(l)
-	// result: (MULLloadidx4 x [off] {sym} ptr idx mem)
-	for {
-		for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-			x := v_0
-			l := v_1
-			if l.Op != Op386MOVLloadidx4 {
-				continue
-			}
-			off := l.AuxInt
-			sym := l.Aux
-			mem := l.Args[2]
-			ptr := l.Args[0]
-			idx := l.Args[1]
-			if !(canMergeLoadClobber(v, l, x) && clobber(l)) {
-				continue
-			}
-			v.reset(Op386MULLloadidx4)
-			v.AuxInt = off
-			v.Aux = sym
-			v.AddArg4(x, ptr, idx, mem)
-			return true
-		}
-		break
-	}
 	return false
 }
 func rewriteValue386_Op386MULLconst(v *Value) bool {
@@ -9267,109 +6118,6 @@
 		v.AddArg3(val, base, mem)
 		return true
 	}
-	// match: (MULLload [off1] {sym1} val (LEAL4 [off2] {sym2} ptr idx) mem)
-	// cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-	// result: (MULLloadidx4 [off1+off2] {mergeSym(sym1,sym2)} val ptr idx mem)
-	for {
-		off1 := v.AuxInt
-		sym1 := v.Aux
-		val := v_0
-		if v_1.Op != Op386LEAL4 {
-			break
-		}
-		off2 := v_1.AuxInt
-		sym2 := v_1.Aux
-		idx := v_1.Args[1]
-		ptr := v_1.Args[0]
-		mem := v_2
-		if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
-			break
-		}
-		v.reset(Op386MULLloadidx4)
-		v.AuxInt = off1 + off2
-		v.Aux = mergeSym(sym1, sym2)
-		v.AddArg4(val, ptr, idx, mem)
-		return true
-	}
-	return false
-}
-func rewriteValue386_Op386MULLloadidx4(v *Value) bool {
-	v_3 := v.Args[3]
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	config := b.Func.Config
-	// match: (MULLloadidx4 [off1] {sym} val (ADDLconst [off2] base) idx mem)
-	// cond: is32Bit(off1+off2)
-	// result: (MULLloadidx4 [off1+off2] {sym} val base idx mem)
-	for {
-		off1 := v.AuxInt
-		sym := v.Aux
-		val := v_0
-		if v_1.Op != Op386ADDLconst {
-			break
-		}
-		off2 := v_1.AuxInt
-		base := v_1.Args[0]
-		idx := v_2
-		mem := v_3
-		if !(is32Bit(off1 + off2)) {
-			break
-		}
-		v.reset(Op386MULLloadidx4)
-		v.AuxInt = off1 + off2
-		v.Aux = sym
-		v.AddArg4(val, base, idx, mem)
-		return true
-	}
-	// match: (MULLloadidx4 [off1] {sym} val base (ADDLconst [off2] idx) mem)
-	// cond: is32Bit(off1+off2*4)
-	// result: (MULLloadidx4 [off1+off2*4] {sym} val base idx mem)
-	for {
-		off1 := v.AuxInt
-		sym := v.Aux
-		val := v_0
-		base := v_1
-		if v_2.Op != Op386ADDLconst {
-			break
-		}
-		off2 := v_2.AuxInt
-		idx := v_2.Args[0]
-		mem := v_3
-		if !(is32Bit(off1 + off2*4)) {
-			break
-		}
-		v.reset(Op386MULLloadidx4)
-		v.AuxInt = off1 + off2*4
-		v.Aux = sym
-		v.AddArg4(val, base, idx, mem)
-		return true
-	}
-	// match: (MULLloadidx4 [off1] {sym1} val (LEAL [off2] {sym2} base) idx mem)
-	// cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared)
-	// result: (MULLloadidx4 [off1+off2] {mergeSym(sym1,sym2)} val base idx mem)
-	for {
-		off1 := v.AuxInt
-		sym1 := v.Aux
-		val := v_0
-		if v_1.Op != Op386LEAL {
-			break
-		}
-		off2 := v_1.AuxInt
-		sym2 := v_1.Aux
-		base := v_1.Args[0]
-		idx := v_2
-		mem := v_3
-		if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared)) {
-			break
-		}
-		v.reset(Op386MULLloadidx4)
-		v.AuxInt = off1 + off2
-		v.Aux = mergeSym(sym1, sym2)
-		v.AddArg4(val, base, idx, mem)
-		return true
-	}
 	return false
 }
 func rewriteValue386_Op386MULSD(v *Value) bool {
@@ -9692,32 +6440,6 @@
 		}
 		break
 	}
-	// match: (ORL x l:(MOVLloadidx4 [off] {sym} ptr idx mem))
-	// cond: canMergeLoadClobber(v, l, x) && clobber(l)
-	// result: (ORLloadidx4 x [off] {sym} ptr idx mem)
-	for {
-		for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-			x := v_0
-			l := v_1
-			if l.Op != Op386MOVLloadidx4 {
-				continue
-			}
-			off := l.AuxInt
-			sym := l.Aux
-			mem := l.Args[2]
-			ptr := l.Args[0]
-			idx := l.Args[1]
-			if !(canMergeLoadClobber(v, l, x) && clobber(l)) {
-				continue
-			}
-			v.reset(Op386ORLloadidx4)
-			v.AuxInt = off
-			v.Aux = sym
-			v.AddArg4(x, ptr, idx, mem)
-			return true
-		}
-		break
-	}
 	// match: (ORL x x)
 	// result: x
 	for {
@@ -9767,6 +6489,42 @@
 		}
 		break
 	}
+	// match: (ORL x0:(MOVBload [i] {s} p0 mem) s0:(SHLLconst [8] x1:(MOVBload [i] {s} p1 mem)))
+	// cond: x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && sequentialAddresses(p0, p1, 1) && mergePoint(b,x0,x1) != nil && clobber(x0, x1, s0)
+	// result: @mergePoint(b,x0,x1) (MOVWload [i] {s} p0 mem)
+	for {
+		for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
+			x0 := v_0
+			if x0.Op != Op386MOVBload {
+				continue
+			}
+			i := x0.AuxInt
+			s := x0.Aux
+			mem := x0.Args[1]
+			p0 := x0.Args[0]
+			s0 := v_1
+			if s0.Op != Op386SHLLconst || s0.AuxInt != 8 {
+				continue
+			}
+			x1 := s0.Args[0]
+			if x1.Op != Op386MOVBload || x1.AuxInt != i || x1.Aux != s {
+				continue
+			}
+			_ = x1.Args[1]
+			p1 := x1.Args[0]
+			if mem != x1.Args[1] || !(x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && sequentialAddresses(p0, p1, 1) && mergePoint(b, x0, x1) != nil && clobber(x0, x1, s0)) {
+				continue
+			}
+			b = mergePoint(b, x0, x1)
+			v0 := b.NewValue0(x1.Pos, Op386MOVWload, typ.UInt16)
+			v.copyOf(v0)
+			v0.AuxInt = i
+			v0.Aux = s
+			v0.AddArg2(p0, mem)
+			return true
+		}
+		break
+	}
 	// match: (ORL o0:(ORL x0:(MOVWload [i0] {s} p mem) s0:(SHLLconst [16] x1:(MOVBload [i2] {s} p mem))) s1:(SHLLconst [24] x2:(MOVBload [i3] {s} p mem)))
 	// cond: i2 == i0+2 && i3 == i0+3 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && o0.Uses == 1 && mergePoint(b,x0,x1,x2) != nil && clobber(x0, x1, x2, s0, s1, o0)
 	// result: @mergePoint(b,x0,x1,x2) (MOVLload [i0] {s} p mem)
@@ -9831,57 +6589,9 @@
 		}
 		break
 	}
-	// match: (ORL x0:(MOVBloadidx1 [i0] {s} p idx mem) s0:(SHLLconst [8] x1:(MOVBloadidx1 [i1] {s} p idx mem)))
-	// cond: i1==i0+1 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && mergePoint(b,x0,x1) != nil && clobber(x0, x1, s0)
-	// result: @mergePoint(b,x0,x1) (MOVWloadidx1 <v.Type> [i0] {s} p idx mem)
-	for {
-		for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-			x0 := v_0
-			if x0.Op != Op386MOVBloadidx1 {
-				continue
-			}
-			i0 := x0.AuxInt
-			s := x0.Aux
-			mem := x0.Args[2]
-			x0_0 := x0.Args[0]
-			x0_1 := x0.Args[1]
-			for _i1 := 0; _i1 <= 1; _i1, x0_0, x0_1 = _i1+1, x0_1, x0_0 {
-				p := x0_0
-				idx := x0_1
-				s0 := v_1
-				if s0.Op != Op386SHLLconst || s0.AuxInt != 8 {
-					continue
-				}
-				x1 := s0.Args[0]
-				if x1.Op != Op386MOVBloadidx1 {
-					continue
-				}
-				i1 := x1.AuxInt
-				if x1.Aux != s {
-					continue
-				}
-				_ = x1.Args[2]
-				x1_0 := x1.Args[0]
-				x1_1 := x1.Args[1]
-				for _i2 := 0; _i2 <= 1; _i2, x1_0, x1_1 = _i2+1, x1_1, x1_0 {
-					if p != x1_0 || idx != x1_1 || mem != x1.Args[2] || !(i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0, x1, s0)) {
-						continue
-					}
-					b = mergePoint(b, x0, x1)
-					v0 := b.NewValue0(v.Pos, Op386MOVWloadidx1, v.Type)
-					v.copyOf(v0)
-					v0.AuxInt = i0
-					v0.Aux = s
-					v0.AddArg3(p, idx, mem)
-					return true
-				}
-			}
-		}
-		break
-	}
-	// match: (ORL o0:(ORL x0:(MOVWloadidx1 [i0] {s} p idx mem) s0:(SHLLconst [16] x1:(MOVBloadidx1 [i2] {s} p idx mem))) s1:(SHLLconst [24] x2:(MOVBloadidx1 [i3] {s} p idx mem)))
-	// cond: i2 == i0+2 && i3 == i0+3 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && o0.Uses == 1 && mergePoint(b,x0,x1,x2) != nil && clobber(x0, x1, x2, s0, s1, o0)
-	// result: @mergePoint(b,x0,x1,x2) (MOVLloadidx1 <v.Type> [i0] {s} p idx mem)
+	// match: (ORL o0:(ORL x0:(MOVWload [i] {s} p0 mem) s0:(SHLLconst [16] x1:(MOVBload [i] {s} p1 mem))) s1:(SHLLconst [24] x2:(MOVBload [i] {s} p2 mem)))
+	// cond: x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && o0.Uses == 1 && sequentialAddresses(p0, p1, 2) && sequentialAddresses(p1, p2, 1) && mergePoint(b,x0,x1,x2) != nil && clobber(x0, x1, x2, s0, s1, o0)
+	// result: @mergePoint(b,x0,x1,x2) (MOVLload [i] {s} p0 mem)
 	for {
 		for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
 			o0 := v_0
@@ -9893,65 +6603,46 @@
 			o0_1 := o0.Args[1]
 			for _i1 := 0; _i1 <= 1; _i1, o0_0, o0_1 = _i1+1, o0_1, o0_0 {
 				x0 := o0_0
-				if x0.Op != Op386MOVWloadidx1 {
+				if x0.Op != Op386MOVWload {
 					continue
 				}
-				i0 := x0.AuxInt
+				i := x0.AuxInt
 				s := x0.Aux
-				mem := x0.Args[2]
-				x0_0 := x0.Args[0]
-				x0_1 := x0.Args[1]
-				for _i2 := 0; _i2 <= 1; _i2, x0_0, x0_1 = _i2+1, x0_1, x0_0 {
-					p := x0_0
-					idx := x0_1
-					s0 := o0_1
-					if s0.Op != Op386SHLLconst || s0.AuxInt != 16 {
-						continue
-					}
-					x1 := s0.Args[0]
-					if x1.Op != Op386MOVBloadidx1 {
-						continue
-					}
-					i2 := x1.AuxInt
-					if x1.Aux != s {
-						continue
-					}
-					_ = x1.Args[2]
-					x1_0 := x1.Args[0]
-					x1_1 := x1.Args[1]
-					for _i3 := 0; _i3 <= 1; _i3, x1_0, x1_1 = _i3+1, x1_1, x1_0 {
-						if p != x1_0 || idx != x1_1 || mem != x1.Args[2] {
-							continue
-						}
-						s1 := v_1
-						if s1.Op != Op386SHLLconst || s1.AuxInt != 24 {
-							continue
-						}
-						x2 := s1.Args[0]
-						if x2.Op != Op386MOVBloadidx1 {
-							continue
-						}
-						i3 := x2.AuxInt
-						if x2.Aux != s {
-							continue
-						}
-						_ = x2.Args[2]
-						x2_0 := x2.Args[0]
-						x2_1 := x2.Args[1]
-						for _i4 := 0; _i4 <= 1; _i4, x2_0, x2_1 = _i4+1, x2_1, x2_0 {
-							if p != x2_0 || idx != x2_1 || mem != x2.Args[2] || !(i2 == i0+2 && i3 == i0+3 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && o0.Uses == 1 && mergePoint(b, x0, x1, x2) != nil && clobber(x0, x1, x2, s0, s1, o0)) {
-								continue
-							}
-							b = mergePoint(b, x0, x1, x2)
-							v0 := b.NewValue0(v.Pos, Op386MOVLloadidx1, v.Type)
-							v.copyOf(v0)
-							v0.AuxInt = i0
-							v0.Aux = s
-							v0.AddArg3(p, idx, mem)
-							return true
-						}
-					}
+				mem := x0.Args[1]
+				p0 := x0.Args[0]
+				s0 := o0_1
+				if s0.Op != Op386SHLLconst || s0.AuxInt != 16 {
+					continue
 				}
+				x1 := s0.Args[0]
+				if x1.Op != Op386MOVBload || x1.AuxInt != i || x1.Aux != s {
+					continue
+				}
+				_ = x1.Args[1]
+				p1 := x1.Args[0]
+				if mem != x1.Args[1] {
+					continue
+				}
+				s1 := v_1
+				if s1.Op != Op386SHLLconst || s1.AuxInt != 24 {
+					continue
+				}
+				x2 := s1.Args[0]
+				if x2.Op != Op386MOVBload || x2.AuxInt != i || x2.Aux != s {
+					continue
+				}
+				_ = x2.Args[1]
+				p2 := x2.Args[0]
+				if mem != x2.Args[1] || !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && o0.Uses == 1 && sequentialAddresses(p0, p1, 2) && sequentialAddresses(p1, p2, 1) && mergePoint(b, x0, x1, x2) != nil && clobber(x0, x1, x2, s0, s1, o0)) {
+					continue
+				}
+				b = mergePoint(b, x0, x1, x2)
+				v0 := b.NewValue0(x2.Pos, Op386MOVLload, typ.UInt32)
+				v.copyOf(v0)
+				v0.AuxInt = i
+				v0.Aux = s
+				v0.AddArg2(p0, mem)
+				return true
 			}
 		}
 		break
@@ -10048,81 +6739,6 @@
 	}
 	return false
 }
-func rewriteValue386_Op386ORLconstmodifyidx4(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	config := b.Func.Config
-	// match: (ORLconstmodifyidx4 [valoff1] {sym} (ADDLconst [off2] base) idx mem)
-	// cond: ValAndOff(valoff1).canAdd(off2)
-	// result: (ORLconstmodifyidx4 [ValAndOff(valoff1).add(off2)] {sym} base idx mem)
-	for {
-		valoff1 := v.AuxInt
-		sym := v.Aux
-		if v_0.Op != Op386ADDLconst {
-			break
-		}
-		off2 := v_0.AuxInt
-		base := v_0.Args[0]
-		idx := v_1
-		mem := v_2
-		if !(ValAndOff(valoff1).canAdd(off2)) {
-			break
-		}
-		v.reset(Op386ORLconstmodifyidx4)
-		v.AuxInt = ValAndOff(valoff1).add(off2)
-		v.Aux = sym
-		v.AddArg3(base, idx, mem)
-		return true
-	}
-	// match: (ORLconstmodifyidx4 [valoff1] {sym} base (ADDLconst [off2] idx) mem)
-	// cond: ValAndOff(valoff1).canAdd(off2*4)
-	// result: (ORLconstmodifyidx4 [ValAndOff(valoff1).add(off2*4)] {sym} base idx mem)
-	for {
-		valoff1 := v.AuxInt
-		sym := v.Aux
-		base := v_0
-		if v_1.Op != Op386ADDLconst {
-			break
-		}
-		off2 := v_1.AuxInt
-		idx := v_1.Args[0]
-		mem := v_2
-		if !(ValAndOff(valoff1).canAdd(off2 * 4)) {
-			break
-		}
-		v.reset(Op386ORLconstmodifyidx4)
-		v.AuxInt = ValAndOff(valoff1).add(off2 * 4)
-		v.Aux = sym
-		v.AddArg3(base, idx, mem)
-		return true
-	}
-	// match: (ORLconstmodifyidx4 [valoff1] {sym1} (LEAL [off2] {sym2} base) idx mem)
-	// cond: ValAndOff(valoff1).canAdd(off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared)
-	// result: (ORLconstmodifyidx4 [ValAndOff(valoff1).add(off2)] {mergeSym(sym1,sym2)} base idx mem)
-	for {
-		valoff1 := v.AuxInt
-		sym1 := v.Aux
-		if v_0.Op != Op386LEAL {
-			break
-		}
-		off2 := v_0.AuxInt
-		sym2 := v_0.Aux
-		base := v_0.Args[0]
-		idx := v_1
-		mem := v_2
-		if !(ValAndOff(valoff1).canAdd(off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared)) {
-			break
-		}
-		v.reset(Op386ORLconstmodifyidx4)
-		v.AuxInt = ValAndOff(valoff1).add(off2)
-		v.Aux = mergeSym(sym1, sym2)
-		v.AddArg3(base, idx, mem)
-		return true
-	}
-	return false
-}
 func rewriteValue386_Op386ORLload(v *Value) bool {
 	v_2 := v.Args[2]
 	v_1 := v.Args[1]
@@ -10174,109 +6790,6 @@
 		v.AddArg3(val, base, mem)
 		return true
 	}
-	// match: (ORLload [off1] {sym1} val (LEAL4 [off2] {sym2} ptr idx) mem)
-	// cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-	// result: (ORLloadidx4 [off1+off2] {mergeSym(sym1,sym2)} val ptr idx mem)
-	for {
-		off1 := v.AuxInt
-		sym1 := v.Aux
-		val := v_0
-		if v_1.Op != Op386LEAL4 {
-			break
-		}
-		off2 := v_1.AuxInt
-		sym2 := v_1.Aux
-		idx := v_1.Args[1]
-		ptr := v_1.Args[0]
-		mem := v_2
-		if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
-			break
-		}
-		v.reset(Op386ORLloadidx4)
-		v.AuxInt = off1 + off2
-		v.Aux = mergeSym(sym1, sym2)
-		v.AddArg4(val, ptr, idx, mem)
-		return true
-	}
-	return false
-}
-func rewriteValue386_Op386ORLloadidx4(v *Value) bool {
-	v_3 := v.Args[3]
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	config := b.Func.Config
-	// match: (ORLloadidx4 [off1] {sym} val (ADDLconst [off2] base) idx mem)
-	// cond: is32Bit(off1+off2)
-	// result: (ORLloadidx4 [off1+off2] {sym} val base idx mem)
-	for {
-		off1 := v.AuxInt
-		sym := v.Aux
-		val := v_0
-		if v_1.Op != Op386ADDLconst {
-			break
-		}
-		off2 := v_1.AuxInt
-		base := v_1.Args[0]
-		idx := v_2
-		mem := v_3
-		if !(is32Bit(off1 + off2)) {
-			break
-		}
-		v.reset(Op386ORLloadidx4)
-		v.AuxInt = off1 + off2
-		v.Aux = sym
-		v.AddArg4(val, base, idx, mem)
-		return true
-	}
-	// match: (ORLloadidx4 [off1] {sym} val base (ADDLconst [off2] idx) mem)
-	// cond: is32Bit(off1+off2*4)
-	// result: (ORLloadidx4 [off1+off2*4] {sym} val base idx mem)
-	for {
-		off1 := v.AuxInt
-		sym := v.Aux
-		val := v_0
-		base := v_1
-		if v_2.Op != Op386ADDLconst {
-			break
-		}
-		off2 := v_2.AuxInt
-		idx := v_2.Args[0]
-		mem := v_3
-		if !(is32Bit(off1 + off2*4)) {
-			break
-		}
-		v.reset(Op386ORLloadidx4)
-		v.AuxInt = off1 + off2*4
-		v.Aux = sym
-		v.AddArg4(val, base, idx, mem)
-		return true
-	}
-	// match: (ORLloadidx4 [off1] {sym1} val (LEAL [off2] {sym2} base) idx mem)
-	// cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared)
-	// result: (ORLloadidx4 [off1+off2] {mergeSym(sym1,sym2)} val base idx mem)
-	for {
-		off1 := v.AuxInt
-		sym1 := v.Aux
-		val := v_0
-		if v_1.Op != Op386LEAL {
-			break
-		}
-		off2 := v_1.AuxInt
-		sym2 := v_1.Aux
-		base := v_1.Args[0]
-		idx := v_2
-		mem := v_3
-		if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared)) {
-			break
-		}
-		v.reset(Op386ORLloadidx4)
-		v.AuxInt = off1 + off2
-		v.Aux = mergeSym(sym1, sym2)
-		v.AddArg4(val, base, idx, mem)
-		return true
-	}
 	return false
 }
 func rewriteValue386_Op386ORLmodify(v *Value) bool {
@@ -10332,107 +6845,6 @@
 	}
 	return false
 }
-func rewriteValue386_Op386ORLmodifyidx4(v *Value) bool {
-	v_3 := v.Args[3]
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	config := b.Func.Config
-	// match: (ORLmodifyidx4 [off1] {sym} (ADDLconst [off2] base) idx val mem)
-	// cond: is32Bit(off1+off2)
-	// result: (ORLmodifyidx4 [off1+off2] {sym} base idx val mem)
-	for {
-		off1 := v.AuxInt
-		sym := v.Aux
-		if v_0.Op != Op386ADDLconst {
-			break
-		}
-		off2 := v_0.AuxInt
-		base := v_0.Args[0]
-		idx := v_1
-		val := v_2
-		mem := v_3
-		if !(is32Bit(off1 + off2)) {
-			break
-		}
-		v.reset(Op386ORLmodifyidx4)
-		v.AuxInt = off1 + off2
-		v.Aux = sym
-		v.AddArg4(base, idx, val, mem)
-		return true
-	}
-	// match: (ORLmodifyidx4 [off1] {sym} base (ADDLconst [off2] idx) val mem)
-	// cond: is32Bit(off1+off2*4)
-	// result: (ORLmodifyidx4 [off1+off2*4] {sym} base idx val mem)
-	for {
-		off1 := v.AuxInt
-		sym := v.Aux
-		base := v_0
-		if v_1.Op != Op386ADDLconst {
-			break
-		}
-		off2 := v_1.AuxInt
-		idx := v_1.Args[0]
-		val := v_2
-		mem := v_3
-		if !(is32Bit(off1 + off2*4)) {
-			break
-		}
-		v.reset(Op386ORLmodifyidx4)
-		v.AuxInt = off1 + off2*4
-		v.Aux = sym
-		v.AddArg4(base, idx, val, mem)
-		return true
-	}
-	// match: (ORLmodifyidx4 [off1] {sym1} (LEAL [off2] {sym2} base) idx val mem)
-	// cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared)
-	// result: (ORLmodifyidx4 [off1+off2] {mergeSym(sym1,sym2)} base idx val mem)
-	for {
-		off1 := v.AuxInt
-		sym1 := v.Aux
-		if v_0.Op != Op386LEAL {
-			break
-		}
-		off2 := v_0.AuxInt
-		sym2 := v_0.Aux
-		base := v_0.Args[0]
-		idx := v_1
-		val := v_2
-		mem := v_3
-		if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared)) {
-			break
-		}
-		v.reset(Op386ORLmodifyidx4)
-		v.AuxInt = off1 + off2
-		v.Aux = mergeSym(sym1, sym2)
-		v.AddArg4(base, idx, val, mem)
-		return true
-	}
-	// match: (ORLmodifyidx4 [off] {sym} ptr idx (MOVLconst [c]) mem)
-	// cond: validValAndOff(c,off)
-	// result: (ORLconstmodifyidx4 [makeValAndOff(c,off)] {sym} ptr idx mem)
-	for {
-		off := v.AuxInt
-		sym := v.Aux
-		ptr := v_0
-		idx := v_1
-		if v_2.Op != Op386MOVLconst {
-			break
-		}
-		c := v_2.AuxInt
-		mem := v_3
-		if !(validValAndOff(c, off)) {
-			break
-		}
-		v.reset(Op386ORLconstmodifyidx4)
-		v.AuxInt = makeValAndOff(c, off)
-		v.Aux = sym
-		v.AddArg3(ptr, idx, mem)
-		return true
-	}
-	return false
-}
 func rewriteValue386_Op386ROLBconst(v *Value) bool {
 	v_0 := v.Args[0]
 	// match: (ROLBconst [c] (ROLBconst [d] x))
@@ -11629,29 +8041,6 @@
 		v.AddArg3(x, ptr, mem)
 		return true
 	}
-	// match: (SUBL x l:(MOVLloadidx4 [off] {sym} ptr idx mem))
-	// cond: canMergeLoadClobber(v, l, x) && clobber(l)
-	// result: (SUBLloadidx4 x [off] {sym} ptr idx mem)
-	for {
-		x := v_0
-		l := v_1
-		if l.Op != Op386MOVLloadidx4 {
-			break
-		}
-		off := l.AuxInt
-		sym := l.Aux
-		mem := l.Args[2]
-		ptr := l.Args[0]
-		idx := l.Args[1]
-		if !(canMergeLoadClobber(v, l, x) && clobber(l)) {
-			break
-		}
-		v.reset(Op386SUBLloadidx4)
-		v.AuxInt = off
-		v.Aux = sym
-		v.AddArg4(x, ptr, idx, mem)
-		return true
-	}
 	// match: (SUBL x x)
 	// result: (MOVLconst [0])
 	for {
@@ -11759,109 +8148,6 @@
 		v.AddArg3(val, base, mem)
 		return true
 	}
-	// match: (SUBLload [off1] {sym1} val (LEAL4 [off2] {sym2} ptr idx) mem)
-	// cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-	// result: (SUBLloadidx4 [off1+off2] {mergeSym(sym1,sym2)} val ptr idx mem)
-	for {
-		off1 := v.AuxInt
-		sym1 := v.Aux
-		val := v_0
-		if v_1.Op != Op386LEAL4 {
-			break
-		}
-		off2 := v_1.AuxInt
-		sym2 := v_1.Aux
-		idx := v_1.Args[1]
-		ptr := v_1.Args[0]
-		mem := v_2
-		if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
-			break
-		}
-		v.reset(Op386SUBLloadidx4)
-		v.AuxInt = off1 + off2
-		v.Aux = mergeSym(sym1, sym2)
-		v.AddArg4(val, ptr, idx, mem)
-		return true
-	}
-	return false
-}
-func rewriteValue386_Op386SUBLloadidx4(v *Value) bool {
-	v_3 := v.Args[3]
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	config := b.Func.Config
-	// match: (SUBLloadidx4 [off1] {sym} val (ADDLconst [off2] base) idx mem)
-	// cond: is32Bit(off1+off2)
-	// result: (SUBLloadidx4 [off1+off2] {sym} val base idx mem)
-	for {
-		off1 := v.AuxInt
-		sym := v.Aux
-		val := v_0
-		if v_1.Op != Op386ADDLconst {
-			break
-		}
-		off2 := v_1.AuxInt
-		base := v_1.Args[0]
-		idx := v_2
-		mem := v_3
-		if !(is32Bit(off1 + off2)) {
-			break
-		}
-		v.reset(Op386SUBLloadidx4)
-		v.AuxInt = off1 + off2
-		v.Aux = sym
-		v.AddArg4(val, base, idx, mem)
-		return true
-	}
-	// match: (SUBLloadidx4 [off1] {sym} val base (ADDLconst [off2] idx) mem)
-	// cond: is32Bit(off1+off2*4)
-	// result: (SUBLloadidx4 [off1+off2*4] {sym} val base idx mem)
-	for {
-		off1 := v.AuxInt
-		sym := v.Aux
-		val := v_0
-		base := v_1
-		if v_2.Op != Op386ADDLconst {
-			break
-		}
-		off2 := v_2.AuxInt
-		idx := v_2.Args[0]
-		mem := v_3
-		if !(is32Bit(off1 + off2*4)) {
-			break
-		}
-		v.reset(Op386SUBLloadidx4)
-		v.AuxInt = off1 + off2*4
-		v.Aux = sym
-		v.AddArg4(val, base, idx, mem)
-		return true
-	}
-	// match: (SUBLloadidx4 [off1] {sym1} val (LEAL [off2] {sym2} base) idx mem)
-	// cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared)
-	// result: (SUBLloadidx4 [off1+off2] {mergeSym(sym1,sym2)} val base idx mem)
-	for {
-		off1 := v.AuxInt
-		sym1 := v.Aux
-		val := v_0
-		if v_1.Op != Op386LEAL {
-			break
-		}
-		off2 := v_1.AuxInt
-		sym2 := v_1.Aux
-		base := v_1.Args[0]
-		idx := v_2
-		mem := v_3
-		if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared)) {
-			break
-		}
-		v.reset(Op386SUBLloadidx4)
-		v.AuxInt = off1 + off2
-		v.Aux = mergeSym(sym1, sym2)
-		v.AddArg4(val, base, idx, mem)
-		return true
-	}
 	return false
 }
 func rewriteValue386_Op386SUBLmodify(v *Value) bool {
@@ -11917,107 +8203,6 @@
 	}
 	return false
 }
-func rewriteValue386_Op386SUBLmodifyidx4(v *Value) bool {
-	v_3 := v.Args[3]
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	config := b.Func.Config
-	// match: (SUBLmodifyidx4 [off1] {sym} (ADDLconst [off2] base) idx val mem)
-	// cond: is32Bit(off1+off2)
-	// result: (SUBLmodifyidx4 [off1+off2] {sym} base idx val mem)
-	for {
-		off1 := v.AuxInt
-		sym := v.Aux
-		if v_0.Op != Op386ADDLconst {
-			break
-		}
-		off2 := v_0.AuxInt
-		base := v_0.Args[0]
-		idx := v_1
-		val := v_2
-		mem := v_3
-		if !(is32Bit(off1 + off2)) {
-			break
-		}
-		v.reset(Op386SUBLmodifyidx4)
-		v.AuxInt = off1 + off2
-		v.Aux = sym
-		v.AddArg4(base, idx, val, mem)
-		return true
-	}
-	// match: (SUBLmodifyidx4 [off1] {sym} base (ADDLconst [off2] idx) val mem)
-	// cond: is32Bit(off1+off2*4)
-	// result: (SUBLmodifyidx4 [off1+off2*4] {sym} base idx val mem)
-	for {
-		off1 := v.AuxInt
-		sym := v.Aux
-		base := v_0
-		if v_1.Op != Op386ADDLconst {
-			break
-		}
-		off2 := v_1.AuxInt
-		idx := v_1.Args[0]
-		val := v_2
-		mem := v_3
-		if !(is32Bit(off1 + off2*4)) {
-			break
-		}
-		v.reset(Op386SUBLmodifyidx4)
-		v.AuxInt = off1 + off2*4
-		v.Aux = sym
-		v.AddArg4(base, idx, val, mem)
-		return true
-	}
-	// match: (SUBLmodifyidx4 [off1] {sym1} (LEAL [off2] {sym2} base) idx val mem)
-	// cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared)
-	// result: (SUBLmodifyidx4 [off1+off2] {mergeSym(sym1,sym2)} base idx val mem)
-	for {
-		off1 := v.AuxInt
-		sym1 := v.Aux
-		if v_0.Op != Op386LEAL {
-			break
-		}
-		off2 := v_0.AuxInt
-		sym2 := v_0.Aux
-		base := v_0.Args[0]
-		idx := v_1
-		val := v_2
-		mem := v_3
-		if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared)) {
-			break
-		}
-		v.reset(Op386SUBLmodifyidx4)
-		v.AuxInt = off1 + off2
-		v.Aux = mergeSym(sym1, sym2)
-		v.AddArg4(base, idx, val, mem)
-		return true
-	}
-	// match: (SUBLmodifyidx4 [off] {sym} ptr idx (MOVLconst [c]) mem)
-	// cond: validValAndOff(-c,off)
-	// result: (ADDLconstmodifyidx4 [makeValAndOff(-c,off)] {sym} ptr idx mem)
-	for {
-		off := v.AuxInt
-		sym := v.Aux
-		ptr := v_0
-		idx := v_1
-		if v_2.Op != Op386MOVLconst {
-			break
-		}
-		c := v_2.AuxInt
-		mem := v_3
-		if !(validValAndOff(-c, off)) {
-			break
-		}
-		v.reset(Op386ADDLconstmodifyidx4)
-		v.AuxInt = makeValAndOff(-c, off)
-		v.Aux = sym
-		v.AddArg3(ptr, idx, mem)
-		return true
-	}
-	return false
-}
 func rewriteValue386_Op386SUBSD(v *Value) bool {
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
@@ -12300,32 +8485,6 @@
 		}
 		break
 	}
-	// match: (XORL x l:(MOVLloadidx4 [off] {sym} ptr idx mem))
-	// cond: canMergeLoadClobber(v, l, x) && clobber(l)
-	// result: (XORLloadidx4 x [off] {sym} ptr idx mem)
-	for {
-		for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-			x := v_0
-			l := v_1
-			if l.Op != Op386MOVLloadidx4 {
-				continue
-			}
-			off := l.AuxInt
-			sym := l.Aux
-			mem := l.Args[2]
-			ptr := l.Args[0]
-			idx := l.Args[1]
-			if !(canMergeLoadClobber(v, l, x) && clobber(l)) {
-				continue
-			}
-			v.reset(Op386XORLloadidx4)
-			v.AuxInt = off
-			v.Aux = sym
-			v.AddArg4(x, ptr, idx, mem)
-			return true
-		}
-		break
-	}
 	// match: (XORL x x)
 	// result: (MOVLconst [0])
 	for {
@@ -12431,81 +8590,6 @@
 	}
 	return false
 }
-func rewriteValue386_Op386XORLconstmodifyidx4(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	config := b.Func.Config
-	// match: (XORLconstmodifyidx4 [valoff1] {sym} (ADDLconst [off2] base) idx mem)
-	// cond: ValAndOff(valoff1).canAdd(off2)
-	// result: (XORLconstmodifyidx4 [ValAndOff(valoff1).add(off2)] {sym} base idx mem)
-	for {
-		valoff1 := v.AuxInt
-		sym := v.Aux
-		if v_0.Op != Op386ADDLconst {
-			break
-		}
-		off2 := v_0.AuxInt
-		base := v_0.Args[0]
-		idx := v_1
-		mem := v_2
-		if !(ValAndOff(valoff1).canAdd(off2)) {
-			break
-		}
-		v.reset(Op386XORLconstmodifyidx4)
-		v.AuxInt = ValAndOff(valoff1).add(off2)
-		v.Aux = sym
-		v.AddArg3(base, idx, mem)
-		return true
-	}
-	// match: (XORLconstmodifyidx4 [valoff1] {sym} base (ADDLconst [off2] idx) mem)
-	// cond: ValAndOff(valoff1).canAdd(off2*4)
-	// result: (XORLconstmodifyidx4 [ValAndOff(valoff1).add(off2*4)] {sym} base idx mem)
-	for {
-		valoff1 := v.AuxInt
-		sym := v.Aux
-		base := v_0
-		if v_1.Op != Op386ADDLconst {
-			break
-		}
-		off2 := v_1.AuxInt
-		idx := v_1.Args[0]
-		mem := v_2
-		if !(ValAndOff(valoff1).canAdd(off2 * 4)) {
-			break
-		}
-		v.reset(Op386XORLconstmodifyidx4)
-		v.AuxInt = ValAndOff(valoff1).add(off2 * 4)
-		v.Aux = sym
-		v.AddArg3(base, idx, mem)
-		return true
-	}
-	// match: (XORLconstmodifyidx4 [valoff1] {sym1} (LEAL [off2] {sym2} base) idx mem)
-	// cond: ValAndOff(valoff1).canAdd(off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared)
-	// result: (XORLconstmodifyidx4 [ValAndOff(valoff1).add(off2)] {mergeSym(sym1,sym2)} base idx mem)
-	for {
-		valoff1 := v.AuxInt
-		sym1 := v.Aux
-		if v_0.Op != Op386LEAL {
-			break
-		}
-		off2 := v_0.AuxInt
-		sym2 := v_0.Aux
-		base := v_0.Args[0]
-		idx := v_1
-		mem := v_2
-		if !(ValAndOff(valoff1).canAdd(off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared)) {
-			break
-		}
-		v.reset(Op386XORLconstmodifyidx4)
-		v.AuxInt = ValAndOff(valoff1).add(off2)
-		v.Aux = mergeSym(sym1, sym2)
-		v.AddArg3(base, idx, mem)
-		return true
-	}
-	return false
-}
 func rewriteValue386_Op386XORLload(v *Value) bool {
 	v_2 := v.Args[2]
 	v_1 := v.Args[1]
@@ -12557,109 +8641,6 @@
 		v.AddArg3(val, base, mem)
 		return true
 	}
-	// match: (XORLload [off1] {sym1} val (LEAL4 [off2] {sym2} ptr idx) mem)
-	// cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-	// result: (XORLloadidx4 [off1+off2] {mergeSym(sym1,sym2)} val ptr idx mem)
-	for {
-		off1 := v.AuxInt
-		sym1 := v.Aux
-		val := v_0
-		if v_1.Op != Op386LEAL4 {
-			break
-		}
-		off2 := v_1.AuxInt
-		sym2 := v_1.Aux
-		idx := v_1.Args[1]
-		ptr := v_1.Args[0]
-		mem := v_2
-		if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
-			break
-		}
-		v.reset(Op386XORLloadidx4)
-		v.AuxInt = off1 + off2
-		v.Aux = mergeSym(sym1, sym2)
-		v.AddArg4(val, ptr, idx, mem)
-		return true
-	}
-	return false
-}
-func rewriteValue386_Op386XORLloadidx4(v *Value) bool {
-	v_3 := v.Args[3]
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	config := b.Func.Config
-	// match: (XORLloadidx4 [off1] {sym} val (ADDLconst [off2] base) idx mem)
-	// cond: is32Bit(off1+off2)
-	// result: (XORLloadidx4 [off1+off2] {sym} val base idx mem)
-	for {
-		off1 := v.AuxInt
-		sym := v.Aux
-		val := v_0
-		if v_1.Op != Op386ADDLconst {
-			break
-		}
-		off2 := v_1.AuxInt
-		base := v_1.Args[0]
-		idx := v_2
-		mem := v_3
-		if !(is32Bit(off1 + off2)) {
-			break
-		}
-		v.reset(Op386XORLloadidx4)
-		v.AuxInt = off1 + off2
-		v.Aux = sym
-		v.AddArg4(val, base, idx, mem)
-		return true
-	}
-	// match: (XORLloadidx4 [off1] {sym} val base (ADDLconst [off2] idx) mem)
-	// cond: is32Bit(off1+off2*4)
-	// result: (XORLloadidx4 [off1+off2*4] {sym} val base idx mem)
-	for {
-		off1 := v.AuxInt
-		sym := v.Aux
-		val := v_0
-		base := v_1
-		if v_2.Op != Op386ADDLconst {
-			break
-		}
-		off2 := v_2.AuxInt
-		idx := v_2.Args[0]
-		mem := v_3
-		if !(is32Bit(off1 + off2*4)) {
-			break
-		}
-		v.reset(Op386XORLloadidx4)
-		v.AuxInt = off1 + off2*4
-		v.Aux = sym
-		v.AddArg4(val, base, idx, mem)
-		return true
-	}
-	// match: (XORLloadidx4 [off1] {sym1} val (LEAL [off2] {sym2} base) idx mem)
-	// cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared)
-	// result: (XORLloadidx4 [off1+off2] {mergeSym(sym1,sym2)} val base idx mem)
-	for {
-		off1 := v.AuxInt
-		sym1 := v.Aux
-		val := v_0
-		if v_1.Op != Op386LEAL {
-			break
-		}
-		off2 := v_1.AuxInt
-		sym2 := v_1.Aux
-		base := v_1.Args[0]
-		idx := v_2
-		mem := v_3
-		if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared)) {
-			break
-		}
-		v.reset(Op386XORLloadidx4)
-		v.AuxInt = off1 + off2
-		v.Aux = mergeSym(sym1, sym2)
-		v.AddArg4(val, base, idx, mem)
-		return true
-	}
 	return false
 }
 func rewriteValue386_Op386XORLmodify(v *Value) bool {
@@ -12715,107 +8696,6 @@
 	}
 	return false
 }
-func rewriteValue386_Op386XORLmodifyidx4(v *Value) bool {
-	v_3 := v.Args[3]
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	config := b.Func.Config
-	// match: (XORLmodifyidx4 [off1] {sym} (ADDLconst [off2] base) idx val mem)
-	// cond: is32Bit(off1+off2)
-	// result: (XORLmodifyidx4 [off1+off2] {sym} base idx val mem)
-	for {
-		off1 := v.AuxInt
-		sym := v.Aux
-		if v_0.Op != Op386ADDLconst {
-			break
-		}
-		off2 := v_0.AuxInt
-		base := v_0.Args[0]
-		idx := v_1
-		val := v_2
-		mem := v_3
-		if !(is32Bit(off1 + off2)) {
-			break
-		}
-		v.reset(Op386XORLmodifyidx4)
-		v.AuxInt = off1 + off2
-		v.Aux = sym
-		v.AddArg4(base, idx, val, mem)
-		return true
-	}
-	// match: (XORLmodifyidx4 [off1] {sym} base (ADDLconst [off2] idx) val mem)
-	// cond: is32Bit(off1+off2*4)
-	// result: (XORLmodifyidx4 [off1+off2*4] {sym} base idx val mem)
-	for {
-		off1 := v.AuxInt
-		sym := v.Aux
-		base := v_0
-		if v_1.Op != Op386ADDLconst {
-			break
-		}
-		off2 := v_1.AuxInt
-		idx := v_1.Args[0]
-		val := v_2
-		mem := v_3
-		if !(is32Bit(off1 + off2*4)) {
-			break
-		}
-		v.reset(Op386XORLmodifyidx4)
-		v.AuxInt = off1 + off2*4
-		v.Aux = sym
-		v.AddArg4(base, idx, val, mem)
-		return true
-	}
-	// match: (XORLmodifyidx4 [off1] {sym1} (LEAL [off2] {sym2} base) idx val mem)
-	// cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared)
-	// result: (XORLmodifyidx4 [off1+off2] {mergeSym(sym1,sym2)} base idx val mem)
-	for {
-		off1 := v.AuxInt
-		sym1 := v.Aux
-		if v_0.Op != Op386LEAL {
-			break
-		}
-		off2 := v_0.AuxInt
-		sym2 := v_0.Aux
-		base := v_0.Args[0]
-		idx := v_1
-		val := v_2
-		mem := v_3
-		if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared)) {
-			break
-		}
-		v.reset(Op386XORLmodifyidx4)
-		v.AuxInt = off1 + off2
-		v.Aux = mergeSym(sym1, sym2)
-		v.AddArg4(base, idx, val, mem)
-		return true
-	}
-	// match: (XORLmodifyidx4 [off] {sym} ptr idx (MOVLconst [c]) mem)
-	// cond: validValAndOff(c,off)
-	// result: (XORLconstmodifyidx4 [makeValAndOff(c,off)] {sym} ptr idx mem)
-	for {
-		off := v.AuxInt
-		sym := v.Aux
-		ptr := v_0
-		idx := v_1
-		if v_2.Op != Op386MOVLconst {
-			break
-		}
-		c := v_2.AuxInt
-		mem := v_3
-		if !(validValAndOff(c, off)) {
-			break
-		}
-		v.reset(Op386XORLconstmodifyidx4)
-		v.AuxInt = makeValAndOff(c, off)
-		v.Aux = sym
-		v.AddArg3(ptr, idx, mem)
-		return true
-	}
-	return false
-}
 func rewriteValue386_OpConstNil(v *Value) bool {
 	// match: (ConstNil)
 	// result: (MOVLconst [0])
diff --git a/src/cmd/compile/internal/ssa/rewritegeneric.go b/src/cmd/compile/internal/ssa/rewritegeneric.go
index 13873b2..d6213e8 100644
--- a/src/cmd/compile/internal/ssa/rewritegeneric.go
+++ b/src/cmd/compile/internal/ssa/rewritegeneric.go
@@ -3983,7 +3983,7 @@
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
 	// match: (Convert (Add64 (Convert ptr mem) off) mem)
-	// result: (Add64 ptr off)
+	// result: (AddPtr ptr off)
 	for {
 		if v_0.Op != OpAdd64 {
 			break
@@ -4001,14 +4001,14 @@
 			if mem != v_1 {
 				continue
 			}
-			v.reset(OpAdd64)
+			v.reset(OpAddPtr)
 			v.AddArg2(ptr, off)
 			return true
 		}
 		break
 	}
 	// match: (Convert (Add32 (Convert ptr mem) off) mem)
-	// result: (Add32 ptr off)
+	// result: (AddPtr ptr off)
 	for {
 		if v_0.Op != OpAdd32 {
 			break
@@ -4026,7 +4026,7 @@
 			if mem != v_1 {
 				continue
 			}
-			v.reset(OpAdd32)
+			v.reset(OpAddPtr)
 			v.AddArg2(ptr, off)
 			return true
 		}
diff --git a/test/codegen/memops.go b/test/codegen/memops.go
index 9d18153..0df1914 100644
--- a/test/codegen/memops.go
+++ b/test/codegen/memops.go
@@ -99,46 +99,61 @@
 func idxInt8(x, y []int8, i int) {
 	var t int8
 	// amd64: `MOVBL[SZ]X\t1\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*1\), [A-Z]+[0-9]*`
+	//   386: `MOVBL[SZ]X\t1\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*1\), [A-Z]+[0-9]*`
 	t = x[i+1]
 	// amd64: `MOVB\t[A-Z]+[0-9]*, 1\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*1\)`
+	//   386: `MOVB\t[A-Z]+[0-9]*, 1\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*1\)`
 	y[i+1] = t
 	// amd64: `MOVB\t[$]77, 1\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*1\)`
+	//   386: `MOVB\t[$]77, 1\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*1\)`
 	x[i+1] = 77
 }
 
 func idxInt16(x, y []int16, i int) {
 	var t int16
 	// amd64: `MOVWL[SZ]X\t2\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*2\), [A-Z]+[0-9]*`
+	//   386: `MOVWL[SZ]X\t2\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*2\), [A-Z]+[0-9]*`
 	t = x[i+1]
 	// amd64: `MOVW\t[A-Z]+[0-9]*, 2\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*2\)`
+	//   386: `MOVW\t[A-Z]+[0-9]*, 2\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*2\)`
 	y[i+1] = t
 	// amd64: `MOVWL[SZ]X\t2\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[12]\), [A-Z]+[0-9]*`
+	//   386: `MOVWL[SZ]X\t2\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[12]\), [A-Z]+[0-9]*`
 	t = x[16*i+1]
 	// amd64: `MOVW\t[A-Z]+[0-9]*, 2\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[12]\)`
+	//   386: `MOVW\t[A-Z]+[0-9]*, 2\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[12]\)`
 	y[16*i+1] = t
 	// amd64: `MOVW\t[$]77, 2\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*2\)`
+	//   386: `MOVW\t[$]77, 2\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*2\)`
 	x[i+1] = 77
 	// amd64: `MOVW\t[$]77, 2\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[12]\)`
+	//   386: `MOVW\t[$]77, 2\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[12]\)`
 	x[16*i+1] = 77
 }
 
 func idxInt32(x, y []int32, i int) {
 	var t int32
 	// amd64: `MOVL\t4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\), [A-Z]+[0-9]*`
+	//   386: `MOVL\t4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\), [A-Z]+[0-9]*`
 	t = x[i+1]
 	// amd64: `MOVL\t[A-Z]+[0-9]*, 4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\)`
+	//   386: `MOVL\t[A-Z]+[0-9]*, 4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\)`
 	y[i+1] = t
 	// amd64: `MOVL\t4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\), [A-Z]+[0-9]*`
 	t = x[2*i+1]
 	// amd64: `MOVL\t[A-Z]+[0-9]*, 4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\)`
 	y[2*i+1] = t
 	// amd64: `MOVL\t4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[14]\), [A-Z]+[0-9]*`
+	//   386: `MOVL\t4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[14]\), [A-Z]+[0-9]*`
 	t = x[16*i+1]
 	// amd64: `MOVL\t[A-Z]+[0-9]*, 4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[14]\)`
+	//   386: `MOVL\t[A-Z]+[0-9]*, 4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[14]\)`
 	y[16*i+1] = t
 	// amd64: `MOVL\t[$]77, 4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\)`
+	//   386: `MOVL\t[$]77, 4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\)`
 	x[i+1] = 77
 	// amd64: `MOVL\t[$]77, 4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[14]\)`
+	//   386: `MOVL\t[$]77, 4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[14]\)`
 	x[16*i+1] = 77
 }
 
@@ -160,24 +175,71 @@
 
 func idxFloat32(x, y []float32, i int) {
 	var t float32
-	// amd64: `MOVSS\t4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\), X[0-9]+`
+	//    amd64: `MOVSS\t4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\), X[0-9]+`
+	// 386/sse2: `MOVSS\t4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\), X[0-9]+`
 	t = x[i+1]
-	// amd64: `MOVSS\tX[0-9]+, 4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\)`
+	//    amd64: `MOVSS\tX[0-9]+, 4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\)`
+	// 386/sse2: `MOVSS\tX[0-9]+, 4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\)`
 	y[i+1] = t
-	// amd64: `MOVSS\t4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[14]\), X[0-9]+`
+	//    amd64: `MOVSS\t4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[14]\), X[0-9]+`
+	// 386/sse2: `MOVSS\t4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[14]\), X[0-9]+`
 	t = x[16*i+1]
-	// amd64: `MOVSS\tX[0-9]+, 4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[14]\)`
+	//    amd64: `MOVSS\tX[0-9]+, 4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[14]\)`
+	// 386/sse2: `MOVSS\tX[0-9]+, 4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[14]\)`
 	y[16*i+1] = t
 }
 
 func idxFloat64(x, y []float64, i int) {
 	var t float64
-	// amd64: `MOVSD\t8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\), X[0-9]+`
+	//    amd64: `MOVSD\t8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\), X[0-9]+`
+	// 386/sse2: `MOVSD\t8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\), X[0-9]+`
 	t = x[i+1]
-	// amd64: `MOVSD\tX[0-9]+, 8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\)`
+	//    amd64: `MOVSD\tX[0-9]+, 8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\)`
+	// 386/sse2: `MOVSD\tX[0-9]+, 8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\)`
 	y[i+1] = t
-	// amd64: `MOVSD\t8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[18]\), X[0-9]+`
+	//    amd64: `MOVSD\t8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[18]\), X[0-9]+`
+	// 386/sse2: `MOVSD\t8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[18]\), X[0-9]+`
 	t = x[16*i+1]
-	// amd64: `MOVSD\tX[0-9]+, 8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[18]\)`
+	//    amd64: `MOVSD\tX[0-9]+, 8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[18]\)`
+	// 386/sse2: `MOVSD\tX[0-9]+, 8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[18]\)`
 	y[16*i+1] = t
 }
+
+func idxLoadPlusOp(x []int32, i int) int32 {
+	s := x[0]
+	// 386: `ADDL\t4\([A-Z]+\)\([A-Z]+\*4\), [A-Z]+`
+	s += x[i+1]
+	// 386: `SUBL\t8\([A-Z]+\)\([A-Z]+\*4\), [A-Z]+`
+	s -= x[i+2]
+	// 386: `IMULL\t12\([A-Z]+\)\([A-Z]+\*4\), [A-Z]+`
+	s *= x[i+3]
+	// 386: `ANDL\t16\([A-Z]+\)\([A-Z]+\*4\), [A-Z]+`
+	s &= x[i+4]
+	// 386: `ORL\t20\([A-Z]+\)\([A-Z]+\*4\), [A-Z]+`
+	s |= x[i+5]
+	// 386: `XORL\t24\([A-Z]+\)\([A-Z]+\*4\), [A-Z]+`
+	s ^= x[i+6]
+	return s
+}
+
+func idxStorePlusOp(x []int32, i int, v int32) {
+	// 386: `ADDL\t[A-Z]+, 4\([A-Z]+\)\([A-Z]+\*4\)`
+	x[i+1] += v
+	// 386: `SUBL\t[A-Z]+, 8\([A-Z]+\)\([A-Z]+\*4\)`
+	x[i+2] -= v
+	// 386: `ANDL\t[A-Z]+, 12\([A-Z]+\)\([A-Z]+\*4\)`
+	x[i+3] &= v
+	// 386: `ORL\t[A-Z]+, 16\([A-Z]+\)\([A-Z]+\*4\)`
+	x[i+4] |= v
+	// 386: `XORL\t[A-Z]+, 20\([A-Z]+\)\([A-Z]+\*4\)`
+	x[i+5] ^= v
+
+	// 386: `ADDL\t[$]77, 24\([A-Z]+\)\([A-Z]+\*4\)`
+	x[i+6] += 77
+	// 386: `ANDL\t[$]77, 28\([A-Z]+\)\([A-Z]+\*4\)`
+	x[i+7] &= 77
+	// 386: `ORL\t[$]77, 32\([A-Z]+\)\([A-Z]+\*4\)`
+	x[i+8] |= 77
+	// 386: `XORL\t[$]77, 36\([A-Z]+\)\([A-Z]+\*4\)`
+	x[i+9] ^= 77
+}