[dev.types] all: merge master into dev.types

Change-Id: Ia6964cb4e09153c15cc9c5b441373d1b3cb8f757
diff --git a/api/next.txt b/api/next.txt
index fe7509b..076f39e 100644
--- a/api/next.txt
+++ b/api/next.txt
@@ -3,3 +3,17 @@
 pkg unicode, var Dives_Akuru *RangeTable
 pkg unicode, var Khitan_Small_Script *RangeTable
 pkg unicode, var Yezidi *RangeTable
+pkg text/template/parse, const NodeComment = 20
+pkg text/template/parse, const NodeComment NodeType
+pkg text/template/parse, const ParseComments = 1
+pkg text/template/parse, const ParseComments Mode
+pkg text/template/parse, method (*CommentNode) Copy() Node
+pkg text/template/parse, method (*CommentNode) String() string
+pkg text/template/parse, method (CommentNode) Position() Pos
+pkg text/template/parse, method (CommentNode) Type() NodeType
+pkg text/template/parse, type CommentNode struct
+pkg text/template/parse, type CommentNode struct, Text string
+pkg text/template/parse, type CommentNode struct, embedded NodeType
+pkg text/template/parse, type CommentNode struct, embedded Pos
+pkg text/template/parse, type Mode uint
+pkg text/template/parse, type Tree struct, Mode Mode
diff --git a/doc/asm.html b/doc/asm.html
index dbbe8f2..cc8598a 100644
--- a/doc/asm.html
+++ b/doc/asm.html
@@ -687,6 +687,13 @@
 MOVQ	g_m(AX), BX   // Move g.m into BX.
 </pre>
 
+<p>
+Register <code>BP</code> is callee-save.
+The assembler automatically inserts <code>BP</code> save/restore when frame size is larger than zero.
+Using <code>BP</code> as a general purpose register is allowed,
+however it can interfere with sampling-based profiling.
+</p>
+
 <h3 id="arm">ARM</h3>
 
 <p>
diff --git a/doc/go1.14.html b/doc/go1.14.html
index 35a9f3c..410e0cb 100644
--- a/doc/go1.14.html
+++ b/doc/go1.14.html
@@ -609,6 +609,12 @@
       If a program needs to accept invalid numbers like the empty string,
       consider wrapping the type with <a href="/pkg/encoding/json/#Unmarshaler"><code>Unmarshaler</code></a>.
     </p>
+
+    <p><!-- CL 200237 -->
+      <a href="/pkg/encoding/json/#Unmarshal"><code>Unmarshal</code></a>
+      can now support map keys with string underlying type which implement
+      <a href="/pkg/encoding/#TextUnmarshaler"><code>encoding.TextUnmarshaler</code></a>.
+    </p>
   </dd>
 </dl><!-- encoding/json -->
 
diff --git a/doc/go1.16.html b/doc/go1.16.html
index 4753cf9..95e63d0 100644
--- a/doc/go1.16.html
+++ b/doc/go1.16.html
@@ -43,6 +43,37 @@
 
 <h3 id="go-command">Go command</h3>
 
+<p><!-- golang.org/issue/24031 -->
+  <code>retract</code> directives may now be used in a <code>go.mod</code> file
+  to indicate that certain published versions of the module should not be used
+  by other modules. A module author may retract a version after a severe problem
+  is discovered or if the version was published unintentionally.<br>
+  TODO: write and link to section in golang.org/ref/mod<br>
+  TODO: write and link to tutorial or blog post
+</p>
+
+<p><!-- golang.org/issue/29062 -->
+  When using <code>go test</code>, a test that
+  calls <code>os.Exit(0)</code> during execution of a test function
+  will now be considered to fail.
+  This will help catch cases in which a test calls code that calls
+  os.Exit(0) and thereby stops running all future tests.
+  If a <code>TestMain</code> function calls <code>os.Exit(0)</code>
+  that is still considered to be a passing test.
+</p>
+
+<h4 id="all-pattern">The <code>all</code> pattern</h4>
+
+<p><!-- golang.org/cl/240623 -->
+  When the main module's <code>go.mod</code> file
+  declares <code>go</code> <code>1.16</code> or higher, the <code>all</code>
+  package pattern now matches only those packages that are transitively imported
+  by a package or test found in the main module. (Packages imported by <em>tests
+  of</em> packages imported by the main module are no longer included.) This is
+  the same set of packages retained
+  by <code>go</code> <code>mod</code> <code>vendor</code> since Go 1.11.
+</p>
+
 <p>
   TODO
 </p>
@@ -90,6 +121,28 @@
   TODO
 </p>
 
+<h3 id="net"><a href="/pkg/net/">net</a></h3>
+
+<p><!-- CL 250357 -->
+  The case of I/O on a closed network connection, or I/O on a network
+  connection that is closed before any of the I/O completes, can now
+  be detected using the new <a href="/pkg/net/#ErrClosed">ErrClosed</a> error.
+  A typical use would be <code>errors.Is(err, net.ErrClosed)</code>.
+  In earlier releases the only way to reliably detect this case was to
+  match the string returned by the <code>Error</code> method
+  with <code>"use of closed network connection"</code>.
+</p>
+
+
+<h3 id="text/template/parse"><a href="/pkg/text/template/parse/">text/template/parse</a></h3>
+
+<p><!-- CL 229398, golang.org/issue/34652 -->
+  A new <a href="/pkg/text/template/parse/#CommentNode"><code>CommentNode</code></a>
+  was added to the parse tree. The <a href="/pkg/text/template/parse/#Mode"><code>Mode</code></a>
+  field in the <code>parse.Tree</code> enables access to it.
+</p>
+<!-- text/template/parse -->
+
 <h3 id="unicode"><a href="/pkg/unicode/">unicode</a></h3>
 
 <p><!-- CL 248765 -->
@@ -112,3 +165,27 @@
 <p>
   TODO
 </p>
+
+<dl id="net/http"><dt><a href="/pkg/net/http/">net/http</a></dt>
+  <dd>
+    <p><!-- CL 233637 -->
+      In the <a href="/pkg/net/http/"><code>net/http</code></a> package, the
+      behavior of <a href="/pkg/net/http/#StripPrefix"><code>StripPrefix</code></a>
+      has been changed to strip the prefix from the request URL's
+      <code>RawPath</code> field in addition to its <code>Path</code> field.
+      In past releases, only the <code>Path</code> field was trimmed, and so if the
+      request URL contained any escaped characters the URL would be modified to
+      have mismatched <code>Path</code> and <code>RawPath</code> fields.
+      In Go 1.16, <code>StripPrefix</code> trims both fields.
+      If there are escaped characters in the prefix part of the request URL the
+      handler serves a 404 instead of its previous behavior of invoking the
+      underlying handler with a mismatched <code>Path</code>/<code>RawPath</code> pair.
+    </p>
+
+    <p><!-- CL 252497 -->
+     The <a href="/pkg/net/http/"><code>net/http</code></a> package now rejects HTTP range requests
+     of the form <code>"Range": "bytes=--N"</code> where <code>"-N"</code> is a negative suffix length, for
+     example <code>"Range": "bytes=--2"</code>. It now replies with a <code>416 "Range Not Satisfiable"</code> response.
+    </p>
+  </dd>
+</dl><!-- net/http -->
diff --git a/doc/install-source.html b/doc/install-source.html
index f8cda1d..cbf4eac 100644
--- a/doc/install-source.html
+++ b/doc/install-source.html
@@ -600,6 +600,9 @@
 <td></td><td><code>linux</code></td> <td><code>mips64le</code></td>
 </tr>
 <tr>
+<td></td><td><code>linux</code></td> <td><code>riscv64</code></td>
+</tr>
+<tr>
 <td></td><td><code>linux</code></td> <td><code>s390x</code></td>
 </tr>
 <tr>
diff --git a/misc/cgo/test/test.go b/misc/cgo/test/test.go
index 35bc3a1..05fa52b 100644
--- a/misc/cgo/test/test.go
+++ b/misc/cgo/test/test.go
@@ -319,6 +319,7 @@
 
 // issue 4339
 // We've historically permitted #include <>, so test it here.  Issue 29333.
+// Also see issue 41059.
 #include <issue4339.h>
 
 // issue 4417
diff --git a/misc/wasm/wasm_exec.js b/misc/wasm/wasm_exec.js
index 8501ae7..ef97c4e 100644
--- a/misc/wasm/wasm_exec.js
+++ b/misc/wasm/wasm_exec.js
@@ -11,6 +11,7 @@
 	// - Node.js
 	// - Electron
 	// - Parcel
+	// - Webpack
 
 	if (typeof global !== "undefined") {
 		// global already exists
@@ -28,7 +29,7 @@
 
 	if (!global.fs && global.require) {
 		const fs = require("fs");
-		if (Object.keys(fs) !== 0) {
+		if (typeof fs === "object" && fs !== null && Object.keys(fs).length !== 0) {
 			global.fs = fs;
 		}
 	}
@@ -556,6 +557,7 @@
 	}
 
 	if (
+		typeof module !== "undefined" &&
 		global.require &&
 		global.require.main === module &&
 		global.process &&
diff --git a/src/cmd/asm/internal/asm/testdata/arm64.s b/src/cmd/asm/internal/asm/testdata/arm64.s
index 5a6db05..e106ff2 100644
--- a/src/cmd/asm/internal/asm/testdata/arm64.s
+++ b/src/cmd/asm/internal/asm/testdata/arm64.s
@@ -145,6 +145,37 @@
 	VZIP2	V10.D2, V13.D2, V3.D2           // a379ca4e
 	VZIP1	V17.S2, V4.S2, V26.S2           // 9a38910e
 	VZIP2	V25.S2, V14.S2, V25.S2          // d979990e
+	VUXTL	V30.B8, V30.H8                  // dea7082f
+	VUXTL	V30.H4, V29.S4                  // dda7102f
+	VUXTL	V29.S2, V2.D2                   // a2a7202f
+	VUXTL2	V30.H8, V30.S4                  // dea7106f
+	VUXTL2	V29.S4, V2.D2                   // a2a7206f
+	VUXTL2	V30.B16, V2.H8                  // c2a7086f
+	VBIT	V21.B16, V25.B16, V4.B16        // 241fb56e
+	VBSL	V23.B16, V3.B16, V7.B16         // 671c776e
+	VCMTST	V2.B8, V29.B8, V2.B8            // a28f220e
+	VCMTST	V2.D2, V23.D2, V3.D2            // e38ee24e
+	VSUB	V2.B8, V30.B8, V30.B8           // de87222e
+	VUZP1	V0.B8, V30.B8, V1.B8            // c11b000e
+	VUZP1	V1.B16, V29.B16, V2.B16         // a21b014e
+	VUZP1	V2.H4, V28.H4, V3.H4            // 831b420e
+	VUZP1	V3.H8, V27.H8, V4.H8            // 641b434e
+	VUZP1	V28.S2, V2.S2, V5.S2            // 45189c0e
+	VUZP1	V29.S4, V1.S4, V6.S4            // 26189d4e
+	VUZP1	V30.D2, V0.D2, V7.D2            // 0718de4e
+	VUZP2	V0.D2, V30.D2, V1.D2            // c15bc04e
+	VUZP2	V30.D2, V0.D2, V29.D2           // 1d58de4e
+	VUSHLL	$0, V30.B8, V30.H8              // dea7082f
+	VUSHLL	$0, V30.H4, V29.S4              // dda7102f
+	VUSHLL	$0, V29.S2, V2.D2               // a2a7202f
+	VUSHLL2	$0, V30.B16, V2.H8              // c2a7086f
+	VUSHLL2	$0, V30.H8, V30.S4              // dea7106f
+	VUSHLL2	$0, V29.S4, V2.D2               // a2a7206f
+	VUSHLL	$7, V30.B8, V30.H8              // dea70f2f
+	VUSHLL	$15, V30.H4, V29.S4             // dda71f2f
+	VUSHLL2	$31, V30.S4, V2.D2              // c2a73f6f
+	VBIF	V0.B8, V30.B8, V1.B8            // c11fe02e
+	VBIF	V30.B16, V0.B16, V2.B16         // 021cfe6e
 	MOVD	(R2)(R6.SXTW), R4               // 44c866f8
 	MOVD	(R3)(R6), R5                    // MOVD	(R3)(R6*1), R5                  // 656866f8
 	MOVD	(R2)(R6), R4                    // MOVD	(R2)(R6*1), R4                  // 446866f8
@@ -186,6 +217,10 @@
 	FMOVS	$(0.96875), F3                  // 03f02d1e
 	FMOVD	$(28.0), F4                     // 0490671e
 
+// move a large constant to a Vd.
+	FMOVD	$0x8040201008040201, V20         // FMOVD	$-9205322385119247871, V20
+	FMOVQ	$0x8040201008040202, V29         // FMOVQ	$-9205322385119247870, V29
+
 	FMOVS	(R2)(R6), F4       // FMOVS (R2)(R6*1), F4    // 446866bc
 	FMOVS	(R2)(R6<<2), F4                               // 447866bc
 	FMOVD	(R2)(R6), F4       // FMOVD (R2)(R6*1), F4    // 446866fc
@@ -359,18 +394,22 @@
 	VLD4	(R15), [V10.H4, V11.H4, V12.H4, V13.H4]         // ea05400c
 	VLD4.P	32(R24), [V31.B8, V0.B8, V1.B8, V2.B8]          // 1f03df0c
 	VLD4.P	(R13)(R9), [V14.S2, V15.S2, V16.S2, V17.S2]     // VLD4.P	(R13)(R9*1), [V14.S2,V15.S2,V16.S2,V17.S2] // ae09c90c
-	VLD1R	(R0), [V0.B16]					// 00c0404d
-	VLD1R.P	16(R0), [V0.B16]				// 00c0df4d
-	VLD1R.P	(R15)(R1), [V15.H4]				// VLD1R.P	(R15)(R1*1), [V15.H4] // efc5c10d
-	VLD2R	(R15), [V15.H4, V16.H4]				// efc5600d
-	VLD2R.P	32(R0), [V0.D2, V1.D2]				// 00ccff4d
-	VLD2R.P	(R0)(R5), [V31.D1, V0.D1]			// VLD2R.P	(R0)(R5*1), [V31.D1, V0.D1] // 1fcce50d
-	VLD3R	(RSP), [V31.S2, V0.S2, V1.S2]			// ffeb400d
-	VLD3R.P	24(R15), [V15.H4, V16.H4, V17.H4]		// efe5df0d
-	VLD3R.P	(R15)(R6), [V15.H8, V16.H8, V17.H8]		// VLD3R.P	(R15)(R6*1), [V15.H8, V16.H8, V17.H8] // efe5c64d
-	VLD4R	(R0), [V0.B8, V1.B8, V2.B8, V3.B8]		// 00e0600d
-	VLD4R.P	64(RSP), [V31.S4, V0.S4, V1.S4, V2.S4]		// ffebff4d
-	VLD4R.P	(R15)(R9), [V15.H4, V16.H4, V17.H4, V18.H4]	// VLD4R.P	(R15)(R9*1), [V15.H4, V16.H4, V17.H4, V18.H4] // efe5e90d
+	VLD1R	(R1), [V9.B8]                                   // 29c0400d
+	VLD1R.P	(R1), [V9.B8]                                   // 29c0df0d
+	VLD1R.P	1(R1), [V2.B8]                                  // 22c0df0d
+	VLD1R.P	2(R1), [V2.H4]                                  // 22c4df0d
+	VLD1R	(R0), [V0.B16]                                  // 00c0404d
+	VLD1R.P	(R0), [V0.B16]                                  // 00c0df4d
+	VLD1R.P	(R15)(R1), [V15.H4]                             // VLD1R.P	(R15)(R1*1), [V15.H4] // efc5c10d
+	VLD2R	(R15), [V15.H4, V16.H4]                         // efc5600d
+	VLD2R.P	16(R0), [V0.D2, V1.D2]                          // 00ccff4d
+	VLD2R.P	(R0)(R5), [V31.D1, V0.D1]                       // VLD2R.P	(R0)(R5*1), [V31.D1, V0.D1] // 1fcce50d
+	VLD3R	(RSP), [V31.S2, V0.S2, V1.S2]                   // ffeb400d
+	VLD3R.P	6(R15), [V15.H4, V16.H4, V17.H4]                // efe5df0d
+	VLD3R.P	(R15)(R6), [V15.H8, V16.H8, V17.H8]             // VLD3R.P	(R15)(R6*1), [V15.H8, V16.H8, V17.H8] // efe5c64d
+	VLD4R	(R0), [V0.B8, V1.B8, V2.B8, V3.B8]              // 00e0600d
+	VLD4R.P	16(RSP), [V31.S4, V0.S4, V1.S4, V2.S4]          // ffebff4d
+	VLD4R.P	(R15)(R9), [V15.H4, V16.H4, V17.H4, V18.H4]     // VLD4R.P	(R15)(R9*1), [V15.H4, V16.H4, V17.H4, V18.H4] // efe5e90d
 	VST1.P	[V24.S2], 8(R2)                                 // 58789f0c
 	VST1	[V29.S2, V30.S2], (R29)                         // bdab000c
 	VST1	[V14.H4, V15.H4, V16.H4], (R27)                 // 6e67000c
diff --git a/src/cmd/asm/internal/asm/testdata/arm64enc.s b/src/cmd/asm/internal/asm/testdata/arm64enc.s
index 56cf51c..e802ee76 100644
--- a/src/cmd/asm/internal/asm/testdata/arm64enc.s
+++ b/src/cmd/asm/internal/asm/testdata/arm64enc.s
@@ -591,7 +591,7 @@
    FMOVS R8, F15                              // 0f01271e
    FMOVD F2, F9                               // 4940601e
    FMOVS F4, F27                              // 9b40201e
-   //TODO VFMOV $3.125, V8.2D                 // 28f5006f
+   //TODO VFMOV $3.125, V8.D2                 // 28f5006f
    FMSUBS F13, F21, F13, F19                  // b3d50d1f
    FMSUBD F11, F7, F15, F31                   // ff9d4b1f
    //TODO VFMUL V9.S[2], F21, F19             // b39a895f
@@ -648,7 +648,7 @@
    FSUBS F25, F23, F0                         // e03a391e
    FSUBD F11, F13, F24                        // b8396b1e
    //TODO SCVTFSS F30, F20                    // d4db215e
-   //TODO VSCVTF V7.2S, V17.2S                // f1d8210e
+   //TODO VSCVTF V7.S2, V17.S2                // f1d8210e
    SCVTFWS R3, F16                            // 7000221e
    SCVTFWD R20, F4                            // 8402621e
    SCVTFS R16, F12                            // 0c02229e
diff --git a/src/cmd/asm/internal/asm/testdata/arm64error.s b/src/cmd/asm/internal/asm/testdata/arm64error.s
index 0661a47..20b1f3e 100644
--- a/src/cmd/asm/internal/asm/testdata/arm64error.s
+++ b/src/cmd/asm/internal/asm/testdata/arm64error.s
@@ -339,4 +339,18 @@
 	MRS	ICV_EOIR1_EL1, R3                                // ERROR "system register is not readable"
 	MRS	PMSWINC_EL0, R3                                  // ERROR "system register is not readable"
 	MRS	OSLAR_EL1, R3                                    // ERROR "system register is not readable"
+	VLD3R.P	24(R15), [V15.H4,V16.H4,V17.H4]                  // ERROR "invalid post-increment offset"
+	VBIT	V1.H4, V12.H4, V3.H4                             // ERROR "invalid arrangement"
+	VBSL	V1.D2, V12.D2, V3.D2                             // ERROR "invalid arrangement"
+	VUXTL	V30.D2, V30.H8                                   // ERROR "operand mismatch"
+	VUXTL2	V20.B8, V21.H8                                   // ERROR "operand mismatch"
+	VUXTL	V3.D2, V4.B8                                     // ERROR "operand mismatch"
+	VUZP1	V0.B8, V30.B8, V1.B16                            // ERROR "operand mismatch"
+	VUZP2	V0.Q1, V30.Q1, V1.Q1                             // ERROR "invalid arrangement"
+	VUSHLL	$0, V30.D2, V30.H8                               // ERROR "operand mismatch"
+	VUSHLL2	$0, V20.B8, V21.H8                               // ERROR "operand mismatch"
+	VUSHLL	$8, V30.B8, V30.H8                               // ERROR "shift amount out of range"
+	VUSHLL2	$32, V30.S4, V2.D2                               // ERROR "shift amount out of range"
+	VBIF	V0.B8, V1.B8, V2.B16                             // ERROR "operand mismatch"
+	VBIF	V0.D2, V1.D2, V2.D2                              // ERROR "invalid arrangement"
 	RET
diff --git a/src/cmd/cgo/doc.go b/src/cmd/cgo/doc.go
index ca18c45..b3f371b 100644
--- a/src/cmd/cgo/doc.go
+++ b/src/cmd/cgo/doc.go
@@ -112,6 +112,13 @@
 environment variables, respectively; those environment variables
 may include command line options.
 
+The cgo tool will always invoke the C compiler with the source file's
+directory in the include path; i.e. -I${SRCDIR} is always implied. This
+means that if a header file foo/bar.h exists both in the source
+directory and also in the system include directory (or some other place
+specified by a -I flag), then "#include <foo/bar.h>" will always find the
+local version in preference to any other version.
+
 The cgo tool is enabled by default for native builds on systems where
 it is expected to work. It is disabled by default when
 cross-compiling. You can control this by setting the CGO_ENABLED
diff --git a/src/cmd/cgo/gcc.go b/src/cmd/cgo/gcc.go
index a59534e..9179b54 100644
--- a/src/cmd/cgo/gcc.go
+++ b/src/cmd/cgo/gcc.go
@@ -369,7 +369,18 @@
 	fmt.Fprintf(&b, "#line 1 \"completed\"\n"+
 		"int __cgo__1 = __cgo__2;\n")
 
-	stderr := p.gccErrors(b.Bytes())
+	// We need to parse the output from this gcc command, so ensure that it
+	// doesn't have any ANSI escape sequences in it. (TERM=dumb is
+	// insufficient; if the user specifies CGO_CFLAGS=-fdiagnostics-color,
+	// GCC will ignore TERM, and GCC can also be configured at compile-time
+	// to ignore TERM.)
+	stderr := p.gccErrors(b.Bytes(), "-fdiagnostics-color=never")
+	if strings.Contains(stderr, "unrecognized command line option") {
+		// We're using an old version of GCC that doesn't understand
+		// -fdiagnostics-color. Those versions can't print color anyway,
+		// so just rerun without that option.
+		stderr = p.gccErrors(b.Bytes())
+	}
 	if stderr == "" {
 		fatalf("%s produced no output\non input:\n%s", p.gccBaseCmd()[0], b.Bytes())
 	}
@@ -1970,22 +1981,25 @@
 // gccErrors runs gcc over the C program stdin and returns
 // the errors that gcc prints. That is, this function expects
 // gcc to fail.
-func (p *Package) gccErrors(stdin []byte) string {
+func (p *Package) gccErrors(stdin []byte, extraArgs ...string) string {
 	// TODO(rsc): require failure
 	args := p.gccCmd()
 
 	// Optimization options can confuse the error messages; remove them.
-	nargs := make([]string, 0, len(args))
+	nargs := make([]string, 0, len(args)+len(extraArgs))
 	for _, arg := range args {
 		if !strings.HasPrefix(arg, "-O") {
 			nargs = append(nargs, arg)
 		}
 	}
 
-	// Force -O0 optimization but keep the trailing "-" at the end.
-	nargs = append(nargs, "-O0")
-	nl := len(nargs)
-	nargs[nl-2], nargs[nl-1] = nargs[nl-1], nargs[nl-2]
+	// Force -O0 optimization and append extra arguments, but keep the
+	// trailing "-" at the end.
+	li := len(nargs) - 1
+	last := nargs[li]
+	nargs[li] = "-O0"
+	nargs = append(nargs, extraArgs...)
+	nargs = append(nargs, last)
 
 	if *debugGcc {
 		fmt.Fprintf(os.Stderr, "$ %s <<EOF\n", strings.Join(nargs, " "))
diff --git a/src/cmd/compile/internal/amd64/ssa.go b/src/cmd/compile/internal/amd64/ssa.go
index 9d8a092..4ac8779 100644
--- a/src/cmd/compile/internal/amd64/ssa.go
+++ b/src/cmd/compile/internal/amd64/ssa.go
@@ -319,8 +319,8 @@
 			// TODO(khr): issue only the -1 fixup code we need.
 			// For instance, if only the quotient is used, no point in zeroing the remainder.
 
-			j1.To.Val = n1
-			j2.To.Val = s.Pc()
+			j1.To.SetTarget(n1)
+			j2.To.SetTarget(s.Pc())
 		}
 
 	case ssa.OpAMD64HMULQ, ssa.OpAMD64HMULL, ssa.OpAMD64HMULQU, ssa.OpAMD64HMULLU:
diff --git a/src/cmd/compile/internal/arm64/ssa.go b/src/cmd/compile/internal/arm64/ssa.go
index b6bb81a..1d6ea6b 100644
--- a/src/cmd/compile/internal/arm64/ssa.go
+++ b/src/cmd/compile/internal/arm64/ssa.go
@@ -816,7 +816,7 @@
 		}
 		p := s.Prog(v.Op.Asm())
 		p.From.Type = obj.TYPE_REG // assembler encodes conditional bits in Reg
-		p.From.Reg = condBits[v.Aux.(ssa.Op)]
+		p.From.Reg = condBits[ssa.Op(v.AuxInt)]
 		p.Reg = v.Args[0].Reg()
 		p.SetFrom3(obj.Addr{Type: obj.TYPE_REG, Reg: r1})
 		p.To.Type = obj.TYPE_REG
diff --git a/src/cmd/compile/internal/gc/alg.go b/src/cmd/compile/internal/gc/alg.go
index e2e2374..c9d71ea 100644
--- a/src/cmd/compile/internal/gc/alg.go
+++ b/src/cmd/compile/internal/gc/alg.go
@@ -429,8 +429,7 @@
 	}
 
 	n := newname(sym)
-	n.SetClass(PFUNC)
-	n.Sym.SetFunc(true)
+	setNodeNameFunc(n)
 	n.Type = functype(nil, []*Node{
 		anonfield(types.NewPtr(t)),
 		anonfield(types.Types[TUINTPTR]),
@@ -646,17 +645,11 @@
 		// Build a list of conditions to satisfy.
 		// The conditions are a list-of-lists. Conditions are reorderable
 		// within each inner list. The outer lists must be evaluated in order.
-		// Even within each inner list, track their order so that we can preserve
-		// aspects of that order. (TODO: latter part needed?)
-		type nodeIdx struct {
-			n   *Node
-			idx int
-		}
-		var conds [][]nodeIdx
-		conds = append(conds, []nodeIdx{})
+		var conds [][]*Node
+		conds = append(conds, []*Node{})
 		and := func(n *Node) {
 			i := len(conds) - 1
-			conds[i] = append(conds[i], nodeIdx{n: n, idx: len(conds[i])})
+			conds[i] = append(conds[i], n)
 		}
 
 		// Walk the struct using memequal for runs of AMEM
@@ -674,7 +667,7 @@
 			if !IsRegularMemory(f.Type) {
 				if EqCanPanic(f.Type) {
 					// Enforce ordering by starting a new set of reorderable conditions.
-					conds = append(conds, []nodeIdx{})
+					conds = append(conds, []*Node{})
 				}
 				p := nodSym(OXDOT, np, f.Sym)
 				q := nodSym(OXDOT, nq, f.Sym)
@@ -688,7 +681,7 @@
 				}
 				if EqCanPanic(f.Type) {
 					// Also enforce ordering after something that can panic.
-					conds = append(conds, []nodeIdx{})
+					conds = append(conds, []*Node{})
 				}
 				i++
 				continue
@@ -713,14 +706,13 @@
 
 		// Sort conditions to put runtime calls last.
 		// Preserve the rest of the ordering.
-		var flatConds []nodeIdx
+		var flatConds []*Node
 		for _, c := range conds {
+			isCall := func(n *Node) bool {
+				return n.Op == OCALL || n.Op == OCALLFUNC
+			}
 			sort.SliceStable(c, func(i, j int) bool {
-				x, y := c[i], c[j]
-				if (x.n.Op != OCALL) == (y.n.Op != OCALL) {
-					return x.idx < y.idx
-				}
-				return x.n.Op != OCALL
+				return !isCall(c[i]) && isCall(c[j])
 			})
 			flatConds = append(flatConds, c...)
 		}
@@ -729,9 +721,9 @@
 		if len(flatConds) == 0 {
 			cond = nodbool(true)
 		} else {
-			cond = flatConds[0].n
+			cond = flatConds[0]
 			for _, c := range flatConds[1:] {
-				cond = nod(OANDAND, cond, c.n)
+				cond = nod(OANDAND, cond, c)
 			}
 		}
 
diff --git a/src/cmd/compile/internal/gc/closure.go b/src/cmd/compile/internal/gc/closure.go
index 23e48939..250be38e 100644
--- a/src/cmd/compile/internal/gc/closure.go
+++ b/src/cmd/compile/internal/gc/closure.go
@@ -107,18 +107,7 @@
 	}
 
 	xfunc.Func.Nname.Sym = closurename(Curfn)
-	disableExport(xfunc.Func.Nname.Sym)
-	if xfunc.Func.Nname.Sym.Def != nil {
-		// The only case we can reach here is when the outer function was redeclared.
-		// In that case, don't bother to redeclare the closure. Otherwise, we will get
-		// a spurious error message, see #17758. While we are here, double check that
-		// we already reported other error.
-		if nsavederrors+nerrors == 0 {
-			Fatalf("unexpected symbol collision %v", xfunc.Func.Nname.Sym)
-		}
-	} else {
-		declare(xfunc.Func.Nname, PFUNC)
-	}
+	setNodeNameFunc(xfunc.Func.Nname)
 	xfunc = typecheck(xfunc, ctxStmt)
 
 	// Type check the body now, but only if we're inside a function.
@@ -473,7 +462,6 @@
 	tfn.List.Set(structargs(t0.Params(), true))
 	tfn.Rlist.Set(structargs(t0.Results(), false))
 
-	disableExport(sym)
 	xfunc := dclfunc(sym, tfn)
 	xfunc.Func.SetDupok(true)
 	xfunc.Func.SetNeedctxt(true)
diff --git a/src/cmd/compile/internal/gc/dcl.go b/src/cmd/compile/internal/gc/dcl.go
index cd64d9a..69eb13f 100644
--- a/src/cmd/compile/internal/gc/dcl.go
+++ b/src/cmd/compile/internal/gc/dcl.go
@@ -90,7 +90,7 @@
 			lineno = n.Pos
 			Fatalf("automatic outside function")
 		}
-		if Curfn != nil {
+		if Curfn != nil && ctxt != PFUNC {
 			Curfn.Func.Dcl = append(Curfn.Func.Dcl, n)
 		}
 		if n.Op == OTYPE {
@@ -297,6 +297,16 @@
 	return n
 }
 
+// importName is like oldname, but it reports an error if sym is from another package and not exported.
+func importName(sym *types.Sym) *Node {
+	n := oldname(sym)
+	if !types.IsExported(sym.Name) && sym.Pkg != localpkg {
+		n.SetDiag(true)
+		yyerror("cannot refer to unexported name %s.%s", sym.Pkg.Name, sym.Name)
+	}
+	return n
+}
+
 // := declarations
 func colasname(n *Node) bool {
 	switch n.Op {
@@ -975,10 +985,14 @@
 	}
 }
 
-// disableExport prevents sym from being included in package export
-// data. To be effectual, it must be called before declare.
-func disableExport(sym *types.Sym) {
-	sym.SetOnExportList(true)
+// setNodeNameFunc marks a node as a function.
+func setNodeNameFunc(n *Node) {
+	if n.Op != ONAME || n.Class() != Pxxx {
+		Fatalf("expected ONAME/Pxxx node, got %v", n)
+	}
+
+	n.SetClass(PFUNC)
+	n.Sym.SetFunc(true)
 }
 
 func dclfunc(sym *types.Sym, tfn *Node) *Node {
@@ -990,7 +1004,7 @@
 	fn.Func.Nname = newfuncnamel(lineno, sym)
 	fn.Func.Nname.Name.Defn = fn
 	fn.Func.Nname.Name.Param.Ntype = tfn
-	declare(fn.Func.Nname, PFUNC)
+	setNodeNameFunc(fn.Func.Nname)
 	funchdr(fn)
 	fn.Func.Nname.Name.Param.Ntype = typecheck(fn.Func.Nname.Name.Param.Ntype, ctxType)
 	return fn
diff --git a/src/cmd/compile/internal/gc/esc.go b/src/cmd/compile/internal/gc/esc.go
index 4b843ab..375331d 100644
--- a/src/cmd/compile/internal/gc/esc.go
+++ b/src/cmd/compile/internal/gc/esc.go
@@ -377,7 +377,7 @@
 		// This really doesn't have much to do with escape analysis per se,
 		// but we are reusing the ability to annotate an individual function
 		// argument and pass those annotations along to importing code.
-		if f.Type.Etype == TUINTPTR {
+		if f.Type.IsUintptr() {
 			if Debug['m'] != 0 {
 				Warnl(f.Pos, "assuming %v is unsafe uintptr", name())
 			}
@@ -407,13 +407,13 @@
 	}
 
 	if fn.Func.Pragma&UintptrEscapes != 0 {
-		if f.Type.Etype == TUINTPTR {
+		if f.Type.IsUintptr() {
 			if Debug['m'] != 0 {
 				Warnl(f.Pos, "marking %v as escaping uintptr", name())
 			}
 			return uintptrEscapesTag
 		}
-		if f.IsDDD() && f.Type.Elem().Etype == TUINTPTR {
+		if f.IsDDD() && f.Type.Elem().IsUintptr() {
 			// final argument is ...uintptr.
 			if Debug['m'] != 0 {
 				Warnl(f.Pos, "marking %v as escaping ...uintptr", name())
diff --git a/src/cmd/compile/internal/gc/escape.go b/src/cmd/compile/internal/gc/escape.go
index ddf89f6..75da439 100644
--- a/src/cmd/compile/internal/gc/escape.go
+++ b/src/cmd/compile/internal/gc/escape.go
@@ -485,7 +485,7 @@
 		e.discard(max)
 
 	case OCONV, OCONVNOP:
-		if checkPtr(e.curfn, 2) && n.Type.Etype == TUNSAFEPTR && n.Left.Type.IsPtr() {
+		if checkPtr(e.curfn, 2) && n.Type.IsUnsafePtr() && n.Left.Type.IsPtr() {
 			// When -d=checkptr=2 is enabled, treat
 			// conversions to unsafe.Pointer as an
 			// escaping operation. This allows better
@@ -493,7 +493,7 @@
 			// easily detect object boundaries on the heap
 			// than the stack.
 			e.assignHeap(n.Left, "conversion to unsafe.Pointer", n)
-		} else if n.Type.Etype == TUNSAFEPTR && n.Left.Type.Etype == TUINTPTR {
+		} else if n.Type.IsUnsafePtr() && n.Left.Type.IsUintptr() {
 			e.unsafeValue(k, n.Left)
 		} else {
 			e.expr(k, n.Left)
@@ -625,7 +625,7 @@
 
 	switch n.Op {
 	case OCONV, OCONVNOP:
-		if n.Left.Type.Etype == TUNSAFEPTR {
+		if n.Left.Type.IsUnsafePtr() {
 			e.expr(k, n.Left)
 		} else {
 			e.discard(n.Left)
@@ -1029,6 +1029,9 @@
 	if e.curfn == nil {
 		Fatalf("e.curfn isn't set")
 	}
+	if n != nil && n.Type != nil && n.Type.NotInHeap() {
+		yyerrorl(n.Pos, "%v is go:notinheap; stack allocation disallowed", n.Type)
+	}
 
 	n = canonicalNode(n)
 	loc := &EscLocation{
diff --git a/src/cmd/compile/internal/gc/fmt.go b/src/cmd/compile/internal/gc/fmt.go
index d6cc9fa..866cd0a 100644
--- a/src/cmd/compile/internal/gc/fmt.go
+++ b/src/cmd/compile/internal/gc/fmt.go
@@ -1616,7 +1616,8 @@
 			}
 			n1.exprfmt(s, nprec, mode)
 		}
-
+	case ODDD:
+		mode.Fprintf(s, "...")
 	default:
 		mode.Fprintf(s, "<node %v>", n.Op)
 	}
diff --git a/src/cmd/compile/internal/gc/gsubr.go b/src/cmd/compile/internal/gc/gsubr.go
index 15a84a8..480d411 100644
--- a/src/cmd/compile/internal/gc/gsubr.go
+++ b/src/cmd/compile/internal/gc/gsubr.go
@@ -342,6 +342,6 @@
 	if p.To.Type != obj.TYPE_BRANCH {
 		Fatalf("patch: not a branch")
 	}
-	p.To.Val = to
+	p.To.SetTarget(to)
 	p.To.Offset = to.Pc
 }
diff --git a/src/cmd/compile/internal/gc/init.go b/src/cmd/compile/internal/gc/init.go
index 03e475e..94cbcf9 100644
--- a/src/cmd/compile/internal/gc/init.go
+++ b/src/cmd/compile/internal/gc/init.go
@@ -45,7 +45,6 @@
 	if len(nf) > 0 {
 		lineno = nf[0].Pos // prolog/epilog gets line number of first init stmt
 		initializers := lookup("init")
-		disableExport(initializers)
 		fn := dclfunc(initializers, nod(OTFUNC, nil, nil))
 		for _, dcl := range dummyInitFn.Func.Dcl {
 			dcl.Name.Curfn = fn
diff --git a/src/cmd/compile/internal/gc/noder.go b/src/cmd/compile/internal/gc/noder.go
index 802aab2..5dce533 100644
--- a/src/cmd/compile/internal/gc/noder.go
+++ b/src/cmd/compile/internal/gc/noder.go
@@ -653,7 +653,7 @@
 		obj := p.expr(expr.X)
 		if obj.Op == OPACK {
 			obj.Name.SetUsed(true)
-			return oldname(restrictlookup(expr.Sel.Value, obj.Name.Pkg))
+			return importName(obj.Name.Pkg.Lookup(expr.Sel.Value))
 		}
 		n := nodSym(OXDOT, obj, p.name(expr.Sel))
 		n.Pos = p.pos(expr) // lineno may have been changed by p.expr(expr.X)
@@ -857,7 +857,7 @@
 		p.setlineno(method)
 		var n *Node
 		if method.Name == nil {
-			n = p.nodSym(method, ODCLFIELD, oldname(p.packname(method.Type)), nil)
+			n = p.nodSym(method, ODCLFIELD, importName(p.packname(method.Type)), nil)
 		} else {
 			mname := p.name(method.Name)
 			sig := p.typeExpr(method.Type)
@@ -896,7 +896,7 @@
 			def.Name.SetUsed(true)
 			pkg = def.Name.Pkg
 		}
-		return restrictlookup(expr.Sel.Value, pkg)
+		return pkg.Lookup(expr.Sel.Value)
 	}
 	panic(fmt.Sprintf("unexpected packname: %#v", expr))
 }
@@ -911,7 +911,7 @@
 	}
 
 	sym := p.packname(typ)
-	n := p.nodSym(typ, ODCLFIELD, oldname(sym), lookup(sym.Name))
+	n := p.nodSym(typ, ODCLFIELD, importName(sym), lookup(sym.Name))
 	n.SetEmbedded(true)
 
 	if isStar {
@@ -1641,10 +1641,3 @@
 	}
 	return n
 }
-
-func unparen(x *Node) *Node {
-	for x.Op == OPAREN {
-		x = x.Left
-	}
-	return x
-}
diff --git a/src/cmd/compile/internal/gc/order.go b/src/cmd/compile/internal/gc/order.go
index aa91160..412f073 100644
--- a/src/cmd/compile/internal/gc/order.go
+++ b/src/cmd/compile/internal/gc/order.go
@@ -502,6 +502,7 @@
 			x := o.copyExpr(arg.Left, arg.Left.Type, false)
 			x.Name.SetKeepalive(true)
 			arg.Left = x
+			n.SetNeedsWrapper(true)
 		}
 	}
 
diff --git a/src/cmd/compile/internal/gc/pgen.go b/src/cmd/compile/internal/gc/pgen.go
index ca8cccf..7426259 100644
--- a/src/cmd/compile/internal/gc/pgen.go
+++ b/src/cmd/compile/internal/gc/pgen.go
@@ -507,7 +507,7 @@
 		if Ctxt.FixedFrameSize() == 0 {
 			offs -= int64(Widthptr)
 		}
-		if objabi.Framepointer_enabled(objabi.GOOS, objabi.GOARCH) || objabi.GOARCH == "arm64" {
+		if objabi.Framepointer_enabled || objabi.GOARCH == "arm64" {
 			// There is a word space for FP on ARM64 even if the frame pointer is disabled
 			offs -= int64(Widthptr)
 		}
@@ -703,7 +703,7 @@
 		if Ctxt.FixedFrameSize() == 0 {
 			base -= int64(Widthptr)
 		}
-		if objabi.Framepointer_enabled(objabi.GOOS, objabi.GOARCH) || objabi.GOARCH == "arm64" {
+		if objabi.Framepointer_enabled || objabi.GOARCH == "arm64" {
 			// There is a word space for FP on ARM64 even if the frame pointer is disabled
 			base -= int64(Widthptr)
 		}
diff --git a/src/cmd/compile/internal/gc/pgen_test.go b/src/cmd/compile/internal/gc/pgen_test.go
index 41f0808a..b1db298 100644
--- a/src/cmd/compile/internal/gc/pgen_test.go
+++ b/src/cmd/compile/internal/gc/pgen_test.go
@@ -20,7 +20,7 @@
 
 func typeWithPointers() *types.Type {
 	t := types.New(TSTRUCT)
-	f := &types.Field{Type: types.New(TPTR)}
+	f := &types.Field{Type: types.NewPtr(types.New(TINT))}
 	t.SetFields([]*types.Field{f})
 	return t
 }
@@ -181,14 +181,6 @@
 		nodeWithClass(Node{Type: &types.Type{}, Sym: &types.Sym{Name: "xyz"}}, PAUTO),
 		nodeWithClass(Node{Type: typeWithoutPointers(), Sym: &types.Sym{}}, PAUTO),
 	}
-	// haspointers updates Type.Haspointers as a side effect, so
-	// exercise this function on all inputs so that reflect.DeepEqual
-	// doesn't produce false positives.
-	for i := range want {
-		want[i].Type.HasPointers()
-		inp[i].Type.HasPointers()
-	}
-
 	sort.Sort(byStackVar(inp))
 	if !reflect.DeepEqual(want, inp) {
 		t.Error("sort failed")
diff --git a/src/cmd/compile/internal/gc/plive.go b/src/cmd/compile/internal/gc/plive.go
index 398bfe5..8976ed6 100644
--- a/src/cmd/compile/internal/gc/plive.go
+++ b/src/cmd/compile/internal/gc/plive.go
@@ -436,7 +436,7 @@
 		case ssa.LocalSlot:
 			return mask
 		case *ssa.Register:
-			if ptrOnly && !v.Type.HasHeapPointer() {
+			if ptrOnly && !v.Type.HasPointers() {
 				return mask
 			}
 			regs[0] = loc
@@ -451,7 +451,7 @@
 				if loc1 == nil {
 					continue
 				}
-				if ptrOnly && !v.Type.FieldType(i).HasHeapPointer() {
+				if ptrOnly && !v.Type.FieldType(i).HasPointers() {
 					continue
 				}
 				regs[nreg] = loc1.(*ssa.Register)
@@ -568,13 +568,13 @@
 	if t.Align > 0 && off&int64(t.Align-1) != 0 {
 		Fatalf("onebitwalktype1: invalid initial alignment: type %v has alignment %d, but offset is %v", t, t.Align, off)
 	}
+	if !t.HasPointers() {
+		// Note: this case ensures that pointers to go:notinheap types
+		// are not considered pointers by garbage collection and stack copying.
+		return
+	}
 
 	switch t.Etype {
-	case TINT8, TUINT8, TINT16, TUINT16,
-		TINT32, TUINT32, TINT64, TUINT64,
-		TINT, TUINT, TUINTPTR, TBOOL,
-		TFLOAT32, TFLOAT64, TCOMPLEX64, TCOMPLEX128:
-
 	case TPTR, TUNSAFEPTR, TFUNC, TCHAN, TMAP:
 		if off&int64(Widthptr-1) != 0 {
 			Fatalf("onebitwalktype1: invalid alignment, %v", t)
diff --git a/src/cmd/compile/internal/gc/range.go b/src/cmd/compile/internal/gc/range.go
index d78a5f0..5434b01 100644
--- a/src/cmd/compile/internal/gc/range.go
+++ b/src/cmd/compile/internal/gc/range.go
@@ -586,7 +586,7 @@
 	n.Nbody.Append(nod(OAS, hn, tmp))
 
 	var fn *Node
-	if a.Type.Elem().HasHeapPointer() {
+	if a.Type.Elem().HasPointers() {
 		// memclrHasPointers(hp, hn)
 		Curfn.Func.setWBPos(stmt.Pos)
 		fn = mkcall("memclrHasPointers", nil, nil, hp, hn)
diff --git a/src/cmd/compile/internal/gc/select.go b/src/cmd/compile/internal/gc/select.go
index 3812a0e..97e0424 100644
--- a/src/cmd/compile/internal/gc/select.go
+++ b/src/cmd/compile/internal/gc/select.go
@@ -251,10 +251,8 @@
 	r = typecheck(r, ctxStmt)
 	init = append(init, r)
 
+	// No initialization for order; runtime.selectgo is responsible for that.
 	order := temp(types.NewArray(types.Types[TUINT16], 2*int64(ncas)))
-	r = nod(OAS, order, nil)
-	r = typecheck(r, ctxStmt)
-	init = append(init, r)
 
 	var pc0, pcs *Node
 	if flag_race {
diff --git a/src/cmd/compile/internal/gc/ssa.go b/src/cmd/compile/internal/gc/ssa.go
index c8fb013..89644cd 100644
--- a/src/cmd/compile/internal/gc/ssa.go
+++ b/src/cmd/compile/internal/gc/ssa.go
@@ -295,7 +295,10 @@
 // worker indicates which of the backend workers is doing the processing.
 func buildssa(fn *Node, worker int) *ssa.Func {
 	name := fn.funcname()
-	printssa := name == ssaDump
+	printssa := false
+	if ssaDump != "" { // match either a simple name e.g. "(*Reader).Reset", or a package.name e.g. "compress/gzip.(*Reader).Reset"
+		printssa = name == ssaDump || myimportpath+"."+name == ssaDump
+	}
 	var astBuf *bytes.Buffer
 	if printssa {
 		astBuf = &bytes.Buffer{}
@@ -2110,7 +2113,7 @@
 		}
 
 		// unsafe.Pointer <--> *T
-		if to.Etype == TUNSAFEPTR && from.IsPtrShaped() || from.Etype == TUNSAFEPTR && to.IsPtrShaped() {
+		if to.IsUnsafePtr() && from.IsPtrShaped() || from.IsUnsafePtr() && to.IsPtrShaped() {
 			return v
 		}
 
@@ -6179,7 +6182,7 @@
 
 	// Resolve branches, and relax DefaultStmt into NotStmt
 	for _, br := range s.Branches {
-		br.P.To.Val = s.bstart[br.B.ID]
+		br.P.To.SetTarget(s.bstart[br.B.ID])
 		if br.P.Pos.IsStmt() != src.PosIsStmt {
 			br.P.Pos = br.P.Pos.WithNotStmt()
 		} else if v0 := br.B.FirstPossibleStmtValue(); v0 != nil && v0.Pos.Line() == br.P.Pos.Line() && v0.Pos.IsStmt() == src.PosIsStmt {
diff --git a/src/cmd/compile/internal/gc/subr.go b/src/cmd/compile/internal/gc/subr.go
index 9362c74..d3ba53f 100644
--- a/src/cmd/compile/internal/gc/subr.go
+++ b/src/cmd/compile/internal/gc/subr.go
@@ -271,13 +271,6 @@
 	return lookupN(prefix, int(n))
 }
 
-func restrictlookup(name string, pkg *types.Pkg) *types.Sym {
-	if !types.IsExported(name) && pkg != localpkg {
-		yyerror("cannot refer to unexported name %s.%s", pkg.Name, name)
-	}
-	return pkg.Lookup(name)
-}
-
 // find all the exported symbols in package opkg
 // and make them available in the current package
 func importdot(opkg *types.Pkg, pack *Node) {
@@ -788,12 +781,12 @@
 	}
 
 	// 8. src is a pointer or uintptr and dst is unsafe.Pointer.
-	if (src.IsPtr() || src.Etype == TUINTPTR) && dst.Etype == TUNSAFEPTR {
+	if (src.IsPtr() || src.IsUintptr()) && dst.IsUnsafePtr() {
 		return OCONVNOP
 	}
 
 	// 9. src is unsafe.Pointer and dst is a pointer or uintptr.
-	if src.Etype == TUNSAFEPTR && (dst.IsPtr() || dst.Etype == TUINTPTR) {
+	if src.IsUnsafePtr() && (dst.IsPtr() || dst.IsUintptr()) {
 		return OCONVNOP
 	}
 
@@ -1550,7 +1543,6 @@
 	tfn.List.Set(structargs(method.Type.Params(), true))
 	tfn.Rlist.Set(structargs(method.Type.Results(), false))
 
-	disableExport(newnam)
 	fn := dclfunc(newnam, tfn)
 	fn.Func.SetDupok(true)
 
@@ -1638,8 +1630,7 @@
 	sym := Runtimepkg.Lookup("memhash")
 
 	n := newname(sym)
-	n.SetClass(PFUNC)
-	n.Sym.SetFunc(true)
+	setNodeNameFunc(n)
 	n.Type = functype(nil, []*Node{
 		anonfield(types.NewPtr(t)),
 		anonfield(types.Types[TUINTPTR]),
diff --git a/src/cmd/compile/internal/gc/syntax.go b/src/cmd/compile/internal/gc/syntax.go
index 47e5e59..5580f78 100644
--- a/src/cmd/compile/internal/gc/syntax.go
+++ b/src/cmd/compile/internal/gc/syntax.go
@@ -141,19 +141,20 @@
 	nodeInitorder, _                   // tracks state during init1; two bits
 	_, _                               // second nodeInitorder bit
 	_, nodeHasBreak
-	_, nodeNoInline  // used internally by inliner to indicate that a function call should not be inlined; set for OCALLFUNC and OCALLMETH only
-	_, nodeImplicit  // implicit OADDR or ODEREF; ++/-- statement represented as OASOP; or ANDNOT lowered to OAND
-	_, nodeIsDDD     // is the argument variadic
-	_, nodeDiag      // already printed error about this
-	_, nodeColas     // OAS resulting from :=
-	_, nodeNonNil    // guaranteed to be non-nil
-	_, nodeTransient // storage can be reused immediately after this statement
-	_, nodeBounded   // bounds check unnecessary
-	_, nodeHasCall   // expression contains a function call
-	_, nodeLikely    // if statement condition likely
-	_, nodeHasVal    // node.E contains a Val
-	_, nodeHasOpt    // node.E contains an Opt
-	_, nodeEmbedded  // ODCLFIELD embedded type
+	_, nodeNoInline     // used internally by inliner to indicate that a function call should not be inlined; set for OCALLFUNC and OCALLMETH only
+	_, nodeImplicit     // implicit OADDR or ODEREF; ++/-- statement represented as OASOP; or ANDNOT lowered to OAND
+	_, nodeIsDDD        // is the argument variadic
+	_, nodeDiag         // already printed error about this
+	_, nodeColas        // OAS resulting from :=
+	_, nodeNonNil       // guaranteed to be non-nil
+	_, nodeTransient    // storage can be reused immediately after this statement
+	_, nodeBounded      // bounds check unnecessary
+	_, nodeHasCall      // expression contains a function call
+	_, nodeLikely       // if statement condition likely
+	_, nodeHasVal       // node.E contains a Val
+	_, nodeHasOpt       // node.E contains an Opt
+	_, nodeEmbedded     // ODCLFIELD embedded type
+	_, nodeNeedsWrapper // OCALLxxx node that needs to be wrapped
 )
 
 func (n *Node) Class() Class     { return Class(n.flags.get3(nodeClass)) }
@@ -286,6 +287,20 @@
 	n.Xoffset = x
 }
 
+func (n *Node) NeedsWrapper() bool {
+	return n.flags&nodeNeedsWrapper != 0
+}
+
+// SetNeedsWrapper indicates that OCALLxxx node needs to be wrapped by a closure.
+func (n *Node) SetNeedsWrapper(b bool) {
+	switch n.Op {
+	case OCALLFUNC, OCALLMETH, OCALLINTER:
+	default:
+		Fatalf("Node.SetNeedsWrapper %v", n.Op)
+	}
+	n.flags.set(nodeNeedsWrapper, b)
+}
+
 // mayBeShared reports whether n may occur in multiple places in the AST.
 // Extra care must be taken when mutating such a node.
 func (n *Node) mayBeShared() bool {
diff --git a/src/cmd/compile/internal/gc/walk.go b/src/cmd/compile/internal/gc/walk.go
index 77f88d8..361de7e 100644
--- a/src/cmd/compile/internal/gc/walk.go
+++ b/src/cmd/compile/internal/gc/walk.go
@@ -232,7 +232,11 @@
 			n.Left = copyany(n.Left, &n.Ninit, true)
 
 		default:
-			n.Left = walkexpr(n.Left, &n.Ninit)
+			if n.Left.NeedsWrapper() {
+				n.Left = wrapCall(n.Left, &n.Ninit)
+			} else {
+				n.Left = walkexpr(n.Left, &n.Ninit)
+			}
 		}
 
 	case OFOR, OFORUNTIL:
@@ -954,11 +958,11 @@
 	case OCONV, OCONVNOP:
 		n.Left = walkexpr(n.Left, init)
 		if n.Op == OCONVNOP && checkPtr(Curfn, 1) {
-			if n.Type.IsPtr() && n.Left.Type.Etype == TUNSAFEPTR { // unsafe.Pointer to *T
+			if n.Type.IsPtr() && n.Left.Type.IsUnsafePtr() { // unsafe.Pointer to *T
 				n = walkCheckPtrAlignment(n, init, nil)
 				break
 			}
-			if n.Type.Etype == TUNSAFEPTR && n.Left.Type.Etype == TUINTPTR { // uintptr to unsafe.Pointer
+			if n.Type.IsUnsafePtr() && n.Left.Type.IsUintptr() { // uintptr to unsafe.Pointer
 				n = walkCheckPtrArithmetic(n, init)
 				break
 			}
@@ -1123,7 +1127,7 @@
 		n.List.SetSecond(walkexpr(n.List.Second(), init))
 
 	case OSLICE, OSLICEARR, OSLICESTR, OSLICE3, OSLICE3ARR:
-		checkSlice := checkPtr(Curfn, 1) && n.Op == OSLICE3ARR && n.Left.Op == OCONVNOP && n.Left.Left.Type.Etype == TUNSAFEPTR
+		checkSlice := checkPtr(Curfn, 1) && n.Op == OSLICE3ARR && n.Left.Op == OCONVNOP && n.Left.Left.Type.IsUnsafePtr()
 		if checkSlice {
 			n.Left.Left = walkexpr(n.Left.Left, init)
 		} else {
@@ -1156,6 +1160,9 @@
 		}
 
 	case ONEW:
+		if n.Type.Elem().NotInHeap() {
+			yyerror("%v is go:notinheap; heap allocation disallowed", n.Type.Elem())
+		}
 		if n.Esc == EscNone {
 			if n.Type.Elem().Width >= maxImplicitStackVarSize {
 				Fatalf("large ONEW with EscNone: %v", n)
@@ -1324,6 +1331,9 @@
 			l = r
 		}
 		t := n.Type
+		if t.Elem().NotInHeap() {
+			yyerror("%v is go:notinheap; heap allocation disallowed", t.Elem())
+		}
 		if n.Esc == EscNone {
 			if !isSmallMakeSlice(n) {
 				Fatalf("non-small OMAKESLICE with EscNone: %v", n)
@@ -1365,10 +1375,6 @@
 			// When len and cap can fit into int, use makeslice instead of
 			// makeslice64, which is faster and shorter on 32 bit platforms.
 
-			if t.Elem().NotInHeap() {
-				yyerror("%v is go:notinheap; heap allocation disallowed", t.Elem())
-			}
-
 			len, cap := l, r
 
 			fnname := "makeslice64"
@@ -1403,7 +1409,7 @@
 
 		t := n.Type
 		if t.Elem().NotInHeap() {
-			Fatalf("%v is go:notinheap; heap allocation disallowed", t.Elem())
+			yyerror("%v is go:notinheap; heap allocation disallowed", t.Elem())
 		}
 
 		length := conv(n.Left, types.Types[TINT])
@@ -2012,9 +2018,6 @@
 }
 
 func callnew(t *types.Type) *Node {
-	if t.NotInHeap() {
-		yyerror("%v is go:notinheap; heap allocation disallowed", t)
-	}
 	dowidth(t)
 	n := nod(ONEWOBJ, typename(t), nil)
 	n.Type = types.NewPtr(t)
@@ -2589,7 +2592,7 @@
 	}
 	switch algtype(t.Key()) {
 	case AMEM32:
-		if !t.Key().HasHeapPointer() {
+		if !t.Key().HasPointers() {
 			return mapfast32
 		}
 		if Widthptr == 4 {
@@ -2597,7 +2600,7 @@
 		}
 		Fatalf("small pointer %v", t.Key())
 	case AMEM64:
-		if !t.Key().HasHeapPointer() {
+		if !t.Key().HasPointers() {
 			return mapfast64
 		}
 		if Widthptr == 8 {
@@ -2744,7 +2747,7 @@
 	nodes.Append(nod(OAS, s, nt))
 
 	var ncopy *Node
-	if elemtype.HasHeapPointer() {
+	if elemtype.HasPointers() {
 		// copy(s[len(l1):], l2)
 		nptr1 := nod(OSLICE, s, nil)
 		nptr1.Type = s.Type
@@ -3082,7 +3085,7 @@
 // Also works if b is a string.
 //
 func copyany(n *Node, init *Nodes, runtimecall bool) *Node {
-	if n.Left.Type.Elem().HasHeapPointer() {
+	if n.Left.Type.Elem().HasPointers() {
 		Curfn.Func.setWBPos(n.Pos)
 		fn := writebarrierfn("typedslicecopy", n.Left.Type.Elem(), n.Right.Type.Elem())
 		n.Left = cheapexpr(n.Left, init)
@@ -3167,8 +3170,7 @@
 	case ASPECIAL:
 		sym := typesymprefix(".eq", t)
 		n := newname(sym)
-		n.SetClass(PFUNC)
-		n.Sym.SetFunc(true)
+		setNodeNameFunc(n)
 		n.Type = functype(nil, []*Node{
 			anonfield(types.NewPtr(t)),
 			anonfield(types.NewPtr(t)),
@@ -3859,6 +3861,14 @@
 //		builtin(a1, a2, a3)
 //	}(x, y, z)
 // for print, println, and delete.
+//
+// Rewrite
+//	go f(x, y, uintptr(unsafe.Pointer(z)))
+// into
+//	go func(a1, a2, a3) {
+//		builtin(a1, a2, uintptr(a3))
+//	}(x, y, unsafe.Pointer(z))
+// for function contains unsafe-uintptr arguments.
 
 var wrapCall_prgen int
 
@@ -3870,9 +3880,17 @@
 		init.AppendNodes(&n.Ninit)
 	}
 
+	isBuiltinCall := n.Op != OCALLFUNC && n.Op != OCALLMETH && n.Op != OCALLINTER
+	// origArgs keeps track of what argument is uintptr-unsafe/unsafe-uintptr conversion.
+	origArgs := make([]*Node, n.List.Len())
 	t := nod(OTFUNC, nil, nil)
 	for i, arg := range n.List.Slice() {
 		s := lookupN("a", i)
+		if !isBuiltinCall && arg.Op == OCONVNOP && arg.Type.IsUintptr() && arg.Left.Type.IsUnsafePtr() {
+			origArgs[i] = arg
+			arg = arg.Left
+			n.List.SetIndex(i, arg)
+		}
 		t.List.Append(symfield(s, arg.Type))
 	}
 
@@ -3880,10 +3898,22 @@
 	sym := lookupN("wrap·", wrapCall_prgen)
 	fn := dclfunc(sym, t)
 
-	a := nod(n.Op, nil, nil)
-	a.List.Set(paramNnames(t.Type))
-	a = typecheck(a, ctxStmt)
-	fn.Nbody.Set1(a)
+	args := paramNnames(t.Type)
+	for i, origArg := range origArgs {
+		if origArg == nil {
+			continue
+		}
+		arg := nod(origArg.Op, args[i], nil)
+		arg.Type = origArg.Type
+		args[i] = arg
+	}
+	call := nod(n.Op, nil, nil)
+	if !isBuiltinCall {
+		call.Op = OCALL
+		call.Left = n.Left
+	}
+	call.List.Set(args)
+	fn.Nbody.Set1(call)
 
 	funcbody()
 
@@ -3891,12 +3921,12 @@
 	typecheckslice(fn.Nbody.Slice(), ctxStmt)
 	xtop = append(xtop, fn)
 
-	a = nod(OCALL, nil, nil)
-	a.Left = fn.Func.Nname
-	a.List.Set(n.List.Slice())
-	a = typecheck(a, ctxStmt)
-	a = walkexpr(a, init)
-	return a
+	call = nod(OCALL, nil, nil)
+	call.Left = fn.Func.Nname
+	call.List.Set(n.List.Slice())
+	call = typecheck(call, ctxStmt)
+	call = walkexpr(call, init)
+	return call
 }
 
 // substArgTypes substitutes the given list of types for
@@ -4011,7 +4041,7 @@
 				walk(n.Left)
 			}
 		case OCONVNOP:
-			if n.Left.Type.Etype == TUNSAFEPTR {
+			if n.Left.Type.IsUnsafePtr() {
 				n.Left = cheapexpr(n.Left, init)
 				originals = append(originals, convnop(n.Left, types.Types[TUNSAFEPTR]))
 			}
diff --git a/src/cmd/compile/internal/ppc64/ssa.go b/src/cmd/compile/internal/ppc64/ssa.go
index 4d2ad48..f8d9ac2 100644
--- a/src/cmd/compile/internal/ppc64/ssa.go
+++ b/src/cmd/compile/internal/ppc64/ssa.go
@@ -629,23 +629,6 @@
 		p.To.Type = obj.TYPE_REG
 		p.To.Reg = r
 
-	case ssa.OpPPC64MaskIfNotCarry:
-		r := v.Reg()
-		p := s.Prog(v.Op.Asm())
-		p.From.Type = obj.TYPE_REG
-		p.From.Reg = ppc64.REGZERO
-		p.To.Type = obj.TYPE_REG
-		p.To.Reg = r
-
-	case ssa.OpPPC64ADDconstForCarry:
-		r1 := v.Args[0].Reg()
-		p := s.Prog(v.Op.Asm())
-		p.Reg = r1
-		p.From.Type = obj.TYPE_CONST
-		p.From.Offset = v.AuxInt
-		p.To.Type = obj.TYPE_REG
-		p.To.Reg = ppc64.REGTMP // Ignored; this is for the carry effect.
-
 	case ssa.OpPPC64NEG, ssa.OpPPC64FNEG, ssa.OpPPC64FSQRT, ssa.OpPPC64FSQRTS, ssa.OpPPC64FFLOOR, ssa.OpPPC64FTRUNC, ssa.OpPPC64FCEIL,
 		ssa.OpPPC64FCTIDZ, ssa.OpPPC64FCTIWZ, ssa.OpPPC64FCFID, ssa.OpPPC64FCFIDS, ssa.OpPPC64FRSP, ssa.OpPPC64CNTLZD, ssa.OpPPC64CNTLZW,
 		ssa.OpPPC64POPCNTD, ssa.OpPPC64POPCNTW, ssa.OpPPC64POPCNTB, ssa.OpPPC64MFVSRD, ssa.OpPPC64MTVSRD, ssa.OpPPC64FABS, ssa.OpPPC64FNABS,
@@ -666,6 +649,14 @@
 		p.To.Type = obj.TYPE_REG
 		p.To.Reg = v.Reg()
 
+	case ssa.OpPPC64SUBFCconst:
+		p := s.Prog(v.Op.Asm())
+		p.SetFrom3(obj.Addr{Type: obj.TYPE_CONST, Offset: v.AuxInt})
+		p.From.Type = obj.TYPE_REG
+		p.From.Reg = v.Args[0].Reg()
+		p.To.Type = obj.TYPE_REG
+		p.To.Reg = v.Reg()
+
 	case ssa.OpPPC64ANDCCconst:
 		p := s.Prog(v.Op.Asm())
 		p.Reg = v.Args[0].Reg()
@@ -1802,7 +1793,7 @@
 		v.Fatalf("Pseudo-op should not make it to codegen: %s ###\n", v.LongString())
 	case ssa.OpPPC64InvertFlags:
 		v.Fatalf("InvertFlags should never make it to codegen %v", v.LongString())
-	case ssa.OpPPC64FlagEQ, ssa.OpPPC64FlagLT, ssa.OpPPC64FlagGT, ssa.OpPPC64FlagCarrySet, ssa.OpPPC64FlagCarryClear:
+	case ssa.OpPPC64FlagEQ, ssa.OpPPC64FlagLT, ssa.OpPPC64FlagGT:
 		v.Fatalf("Flag* ops should never make it to codegen %v", v.LongString())
 	case ssa.OpClobber:
 		// TODO: implement for clobberdead experiment. Nop is ok for now.
diff --git a/src/cmd/compile/internal/s390x/ssa.go b/src/cmd/compile/internal/s390x/ssa.go
index 4cf4b70..00d253c 100644
--- a/src/cmd/compile/internal/s390x/ssa.go
+++ b/src/cmd/compile/internal/s390x/ssa.go
@@ -338,8 +338,8 @@
 				n.To.Reg = dividend
 			}
 
-			j.To.Val = n
-			j2.To.Val = s.Pc()
+			j.To.SetTarget(n)
+			j2.To.SetTarget(s.Pc())
 		}
 	case ssa.OpS390XADDconst, ssa.OpS390XADDWconst:
 		opregregimm(s, v.Op.Asm(), v.Reg(), v.Args[0].Reg(), v.AuxInt)
diff --git a/src/cmd/compile/internal/ssa/addressingmodes.go b/src/cmd/compile/internal/ssa/addressingmodes.go
index 97a5ab4..aae0def 100644
--- a/src/cmd/compile/internal/ssa/addressingmodes.go
+++ b/src/cmd/compile/internal/ssa/addressingmodes.go
@@ -7,12 +7,14 @@
 // addressingModes combines address calculations into memory operations
 // that can perform complicated addressing modes.
 func addressingModes(f *Func) {
+	isInImmediateRange := is32Bit
 	switch f.Config.arch {
 	default:
 		// Most architectures can't do this.
 		return
 	case "amd64", "386":
-		// TODO: s390x?
+	case "s390x":
+		isInImmediateRange = is20Bit
 	}
 
 	var tmp []*Value
@@ -40,7 +42,7 @@
 			switch [2]auxType{opcodeTable[v.Op].auxType, opcodeTable[p.Op].auxType} {
 			case [2]auxType{auxSymOff, auxInt32}:
 				// TODO: introduce auxSymOff32
-				if !is32Bit(v.AuxInt + p.AuxInt) {
+				if !isInImmediateRange(v.AuxInt + p.AuxInt) {
 					continue
 				}
 				v.AuxInt += p.AuxInt
@@ -48,7 +50,7 @@
 				if v.Aux != nil && p.Aux != nil {
 					continue
 				}
-				if !is32Bit(v.AuxInt + p.AuxInt) {
+				if !isInImmediateRange(v.AuxInt + p.AuxInt) {
 					continue
 				}
 				if p.Aux != nil {
@@ -398,4 +400,61 @@
 	[2]Op{Op386ANDLconstmodify, Op386LEAL4}: Op386ANDLconstmodifyidx4,
 	[2]Op{Op386ORLconstmodify, Op386LEAL4}:  Op386ORLconstmodifyidx4,
 	[2]Op{Op386XORLconstmodify, Op386LEAL4}: Op386XORLconstmodifyidx4,
+
+	// s390x
+	[2]Op{OpS390XMOVDload, OpS390XADD}: OpS390XMOVDloadidx,
+	[2]Op{OpS390XMOVWload, OpS390XADD}: OpS390XMOVWloadidx,
+	[2]Op{OpS390XMOVHload, OpS390XADD}: OpS390XMOVHloadidx,
+	[2]Op{OpS390XMOVBload, OpS390XADD}: OpS390XMOVBloadidx,
+
+	[2]Op{OpS390XMOVWZload, OpS390XADD}: OpS390XMOVWZloadidx,
+	[2]Op{OpS390XMOVHZload, OpS390XADD}: OpS390XMOVHZloadidx,
+	[2]Op{OpS390XMOVBZload, OpS390XADD}: OpS390XMOVBZloadidx,
+
+	[2]Op{OpS390XMOVDBRload, OpS390XADD}: OpS390XMOVDBRloadidx,
+	[2]Op{OpS390XMOVWBRload, OpS390XADD}: OpS390XMOVWBRloadidx,
+	[2]Op{OpS390XMOVHBRload, OpS390XADD}: OpS390XMOVHBRloadidx,
+
+	[2]Op{OpS390XFMOVDload, OpS390XADD}: OpS390XFMOVDloadidx,
+	[2]Op{OpS390XFMOVSload, OpS390XADD}: OpS390XFMOVSloadidx,
+
+	[2]Op{OpS390XMOVDstore, OpS390XADD}: OpS390XMOVDstoreidx,
+	[2]Op{OpS390XMOVWstore, OpS390XADD}: OpS390XMOVWstoreidx,
+	[2]Op{OpS390XMOVHstore, OpS390XADD}: OpS390XMOVHstoreidx,
+	[2]Op{OpS390XMOVBstore, OpS390XADD}: OpS390XMOVBstoreidx,
+
+	[2]Op{OpS390XMOVDBRstore, OpS390XADD}: OpS390XMOVDBRstoreidx,
+	[2]Op{OpS390XMOVWBRstore, OpS390XADD}: OpS390XMOVWBRstoreidx,
+	[2]Op{OpS390XMOVHBRstore, OpS390XADD}: OpS390XMOVHBRstoreidx,
+
+	[2]Op{OpS390XFMOVDstore, OpS390XADD}: OpS390XFMOVDstoreidx,
+	[2]Op{OpS390XFMOVSstore, OpS390XADD}: OpS390XFMOVSstoreidx,
+
+	[2]Op{OpS390XMOVDload, OpS390XMOVDaddridx}: OpS390XMOVDloadidx,
+	[2]Op{OpS390XMOVWload, OpS390XMOVDaddridx}: OpS390XMOVWloadidx,
+	[2]Op{OpS390XMOVHload, OpS390XMOVDaddridx}: OpS390XMOVHloadidx,
+	[2]Op{OpS390XMOVBload, OpS390XMOVDaddridx}: OpS390XMOVBloadidx,
+
+	[2]Op{OpS390XMOVWZload, OpS390XMOVDaddridx}: OpS390XMOVWZloadidx,
+	[2]Op{OpS390XMOVHZload, OpS390XMOVDaddridx}: OpS390XMOVHZloadidx,
+	[2]Op{OpS390XMOVBZload, OpS390XMOVDaddridx}: OpS390XMOVBZloadidx,
+
+	[2]Op{OpS390XMOVDBRload, OpS390XMOVDaddridx}: OpS390XMOVDBRloadidx,
+	[2]Op{OpS390XMOVWBRload, OpS390XMOVDaddridx}: OpS390XMOVWBRloadidx,
+	[2]Op{OpS390XMOVHBRload, OpS390XMOVDaddridx}: OpS390XMOVHBRloadidx,
+
+	[2]Op{OpS390XFMOVDload, OpS390XMOVDaddridx}: OpS390XFMOVDloadidx,
+	[2]Op{OpS390XFMOVSload, OpS390XMOVDaddridx}: OpS390XFMOVSloadidx,
+
+	[2]Op{OpS390XMOVDstore, OpS390XMOVDaddridx}: OpS390XMOVDstoreidx,
+	[2]Op{OpS390XMOVWstore, OpS390XMOVDaddridx}: OpS390XMOVWstoreidx,
+	[2]Op{OpS390XMOVHstore, OpS390XMOVDaddridx}: OpS390XMOVHstoreidx,
+	[2]Op{OpS390XMOVBstore, OpS390XMOVDaddridx}: OpS390XMOVBstoreidx,
+
+	[2]Op{OpS390XMOVDBRstore, OpS390XMOVDaddridx}: OpS390XMOVDBRstoreidx,
+	[2]Op{OpS390XMOVWBRstore, OpS390XMOVDaddridx}: OpS390XMOVWBRstoreidx,
+	[2]Op{OpS390XMOVHBRstore, OpS390XMOVDaddridx}: OpS390XMOVHBRstoreidx,
+
+	[2]Op{OpS390XFMOVDstore, OpS390XMOVDaddridx}: OpS390XFMOVDstoreidx,
+	[2]Op{OpS390XFMOVSstore, OpS390XMOVDaddridx}: OpS390XFMOVSstoreidx,
 }
diff --git a/src/cmd/compile/internal/ssa/check.go b/src/cmd/compile/internal/ssa/check.go
index 98e1b79..828f645 100644
--- a/src/cmd/compile/internal/ssa/check.go
+++ b/src/cmd/compile/internal/ssa/check.go
@@ -171,10 +171,10 @@
 				canHaveAuxInt = true
 				canHaveAux = true
 			case auxCCop:
-				if _, ok := v.Aux.(Op); !ok {
-					f.Fatalf("bad type %T for CCop in %v", v.Aux, v)
+				if opcodeTable[Op(v.AuxInt)].name == "OpInvalid" {
+					f.Fatalf("value %v has an AuxInt value that is a valid opcode", v)
 				}
-				canHaveAux = true
+				canHaveAuxInt = true
 			case auxS390XCCMask:
 				if _, ok := v.Aux.(s390x.CCMask); !ok {
 					f.Fatalf("bad type %T for S390XCCMask in %v", v.Aux, v)
diff --git a/src/cmd/compile/internal/ssa/decompose.go b/src/cmd/compile/internal/ssa/decompose.go
index c59ec4c..ab27ba8 100644
--- a/src/cmd/compile/internal/ssa/decompose.go
+++ b/src/cmd/compile/internal/ssa/decompose.go
@@ -23,9 +23,11 @@
 	}
 
 	// Decompose other values
-	applyRewrite(f, rewriteBlockdec, rewriteValuedec)
+	// Note: deadcode is false because we need to keep the original
+	// values around so the name component resolution below can still work.
+	applyRewrite(f, rewriteBlockdec, rewriteValuedec, leaveDeadValues)
 	if f.Config.RegSize == 4 {
-		applyRewrite(f, rewriteBlockdec64, rewriteValuedec64)
+		applyRewrite(f, rewriteBlockdec64, rewriteValuedec64, leaveDeadValues)
 	}
 
 	// Split up named values into their components.
@@ -139,7 +141,7 @@
 
 func decomposeSlicePhi(v *Value) {
 	types := &v.Block.Func.Config.Types
-	ptrType := types.BytePtr
+	ptrType := v.Type.Elem().PtrTo()
 	lenType := types.Int
 
 	ptr := v.Block.NewValue0(v.Pos, OpPhi, ptrType)
@@ -215,7 +217,7 @@
 }
 
 func decomposeArgs(f *Func) {
-	applyRewrite(f, rewriteBlockdecArgs, rewriteValuedecArgs)
+	applyRewrite(f, rewriteBlockdecArgs, rewriteValuedecArgs, removeDeadValues)
 }
 
 func decomposeUser(f *Func) {
diff --git a/src/cmd/compile/internal/ssa/func.go b/src/cmd/compile/internal/ssa/func.go
index 6718b77..32df0c0 100644
--- a/src/cmd/compile/internal/ssa/func.go
+++ b/src/cmd/compile/internal/ssa/func.go
@@ -257,6 +257,49 @@
 	f.Warnl(f.Entry.Pos, "\t%s\t%s%s\t%s", n, key, value, f.Name)
 }
 
+// unCacheLine removes v from f's constant cache "line" for aux,
+// resets v.InCache when it is found (and removed),
+// and returns whether v was found in that line.
+func (f *Func) unCacheLine(v *Value, aux int64) bool {
+	vv := f.constants[aux]
+	for i, cv := range vv {
+		if v == cv {
+			vv[i] = vv[len(vv)-1]
+			vv[len(vv)-1] = nil
+			f.constants[aux] = vv[0 : len(vv)-1]
+			v.InCache = false
+			return true
+		}
+	}
+	return false
+}
+
+// unCache removes v from f's constant cache.
+func (f *Func) unCache(v *Value) {
+	if v.InCache {
+		aux := v.AuxInt
+		if f.unCacheLine(v, aux) {
+			return
+		}
+		if aux == 0 {
+			switch v.Op {
+			case OpConstNil:
+				aux = constNilMagic
+			case OpConstSlice:
+				aux = constSliceMagic
+			case OpConstString:
+				aux = constEmptyStringMagic
+			case OpConstInterface:
+				aux = constInterfaceMagic
+			}
+			if aux != 0 && f.unCacheLine(v, aux) {
+				return
+			}
+		}
+		f.Fatalf("unCached value %s not found in cache, auxInt=0x%x, adjusted aux=0x%x", v.LongString(), v.AuxInt, aux)
+	}
+}
+
 // freeValue frees a value. It must no longer be referenced or have any args.
 func (f *Func) freeValue(v *Value) {
 	if v.Block == nil {
@@ -270,19 +313,8 @@
 	}
 	// Clear everything but ID (which we reuse).
 	id := v.ID
-
-	// Values with zero arguments and OpOffPtr values might be cached, so remove them there.
-	nArgs := opcodeTable[v.Op].argLen
-	if nArgs == 0 || v.Op == OpOffPtr {
-		vv := f.constants[v.AuxInt]
-		for i, cv := range vv {
-			if v == cv {
-				vv[i] = vv[len(vv)-1]
-				vv[len(vv)-1] = nil
-				f.constants[v.AuxInt] = vv[0 : len(vv)-1]
-				break
-			}
-		}
+	if v.InCache {
+		f.unCache(v)
 	}
 	*v = Value{}
 	v.ID = id
@@ -548,6 +580,7 @@
 		v = f.Entry.NewValue0(src.NoXPos, op, t)
 	}
 	f.constants[c] = append(vv, v)
+	v.InCache = true
 	return v
 }
 
diff --git a/src/cmd/compile/internal/ssa/gen/AMD64.rules b/src/cmd/compile/internal/ssa/gen/AMD64.rules
index 5111ef79..8898fe5 100644
--- a/src/cmd/compile/internal/ssa/gen/AMD64.rules
+++ b/src/cmd/compile/internal/ssa/gen/AMD64.rules
@@ -436,69 +436,69 @@
 
 // Absorb InvertFlags
 (CMOVQ(EQ|NE|LT|GT|LE|GE|HI|CS|CC|LS) x y (InvertFlags cond))
-    -> (CMOVQ(EQ|NE|GT|LT|GE|LE|CS|HI|LS|CC) x y cond)
+    => (CMOVQ(EQ|NE|GT|LT|GE|LE|CS|HI|LS|CC) x y cond)
 (CMOVL(EQ|NE|LT|GT|LE|GE|HI|CS|CC|LS) x y (InvertFlags cond))
-    -> (CMOVL(EQ|NE|GT|LT|GE|LE|CS|HI|LS|CC) x y cond)
+    => (CMOVL(EQ|NE|GT|LT|GE|LE|CS|HI|LS|CC) x y cond)
 (CMOVW(EQ|NE|LT|GT|LE|GE|HI|CS|CC|LS) x y (InvertFlags cond))
-    -> (CMOVW(EQ|NE|GT|LT|GE|LE|CS|HI|LS|CC) x y cond)
+    => (CMOVW(EQ|NE|GT|LT|GE|LE|CS|HI|LS|CC) x y cond)
 
 // Absorb constants generated during lower
-(CMOV(QEQ|QLE|QGE|QCC|QLS|LEQ|LLE|LGE|LCC|LLS|WEQ|WLE|WGE|WCC|WLS) _ x (FlagEQ)) -> x
-(CMOV(QNE|QLT|QGT|QCS|QHI|LNE|LLT|LGT|LCS|LHI|WNE|WLT|WGT|WCS|WHI) y _ (FlagEQ)) -> y
-(CMOV(QNE|QGT|QGE|QHI|QCC|LNE|LGT|LGE|LHI|LCC|WNE|WGT|WGE|WHI|WCC) _ x (FlagGT_UGT)) -> x
-(CMOV(QEQ|QLE|QLT|QLS|QCS|LEQ|LLE|LLT|LLS|LCS|WEQ|WLE|WLT|WLS|WCS) y _ (FlagGT_UGT)) -> y
-(CMOV(QNE|QGT|QGE|QLS|QCS|LNE|LGT|LGE|LLS|LCS|WNE|WGT|WGE|WLS|WCS) _ x (FlagGT_ULT)) -> x
-(CMOV(QEQ|QLE|QLT|QHI|QCC|LEQ|LLE|LLT|LHI|LCC|WEQ|WLE|WLT|WHI|WCC) y _ (FlagGT_ULT)) -> y
-(CMOV(QNE|QLT|QLE|QCS|QLS|LNE|LLT|LLE|LCS|LLS|WNE|WLT|WLE|WCS|WLS) _ x (FlagLT_ULT)) -> x
-(CMOV(QEQ|QGT|QGE|QHI|QCC|LEQ|LGT|LGE|LHI|LCC|WEQ|WGT|WGE|WHI|WCC) y _ (FlagLT_ULT)) -> y
-(CMOV(QNE|QLT|QLE|QHI|QCC|LNE|LLT|LLE|LHI|LCC|WNE|WLT|WLE|WHI|WCC) _ x (FlagLT_UGT)) -> x
-(CMOV(QEQ|QGT|QGE|QCS|QLS|LEQ|LGT|LGE|LCS|LLS|WEQ|WGT|WGE|WCS|WLS) y _ (FlagLT_UGT)) -> y
+(CMOV(QEQ|QLE|QGE|QCC|QLS|LEQ|LLE|LGE|LCC|LLS|WEQ|WLE|WGE|WCC|WLS) _ x (FlagEQ)) => x
+(CMOV(QNE|QLT|QGT|QCS|QHI|LNE|LLT|LGT|LCS|LHI|WNE|WLT|WGT|WCS|WHI) y _ (FlagEQ)) => y
+(CMOV(QNE|QGT|QGE|QHI|QCC|LNE|LGT|LGE|LHI|LCC|WNE|WGT|WGE|WHI|WCC) _ x (FlagGT_UGT)) => x
+(CMOV(QEQ|QLE|QLT|QLS|QCS|LEQ|LLE|LLT|LLS|LCS|WEQ|WLE|WLT|WLS|WCS) y _ (FlagGT_UGT)) => y
+(CMOV(QNE|QGT|QGE|QLS|QCS|LNE|LGT|LGE|LLS|LCS|WNE|WGT|WGE|WLS|WCS) _ x (FlagGT_ULT)) => x
+(CMOV(QEQ|QLE|QLT|QHI|QCC|LEQ|LLE|LLT|LHI|LCC|WEQ|WLE|WLT|WHI|WCC) y _ (FlagGT_ULT)) => y
+(CMOV(QNE|QLT|QLE|QCS|QLS|LNE|LLT|LLE|LCS|LLS|WNE|WLT|WLE|WCS|WLS) _ x (FlagLT_ULT)) => x
+(CMOV(QEQ|QGT|QGE|QHI|QCC|LEQ|LGT|LGE|LHI|LCC|WEQ|WGT|WGE|WHI|WCC) y _ (FlagLT_ULT)) => y
+(CMOV(QNE|QLT|QLE|QHI|QCC|LNE|LLT|LLE|LHI|LCC|WNE|WLT|WLE|WHI|WCC) _ x (FlagLT_UGT)) => x
+(CMOV(QEQ|QGT|QGE|QCS|QLS|LEQ|LGT|LGE|LCS|LLS|WEQ|WGT|WGE|WCS|WLS) y _ (FlagLT_UGT)) => y
 
 // Miscellaneous
-(IsNonNil p) -> (SETNE (TESTQ p p))
-(IsInBounds idx len) -> (SETB (CMPQ idx len))
-(IsSliceInBounds idx len) -> (SETBE (CMPQ idx len))
-(NilCheck ...) -> (LoweredNilCheck ...)
-(GetG ...) -> (LoweredGetG ...)
-(GetClosurePtr ...) -> (LoweredGetClosurePtr ...)
-(GetCallerPC ...) -> (LoweredGetCallerPC ...)
-(GetCallerSP ...) -> (LoweredGetCallerSP ...)
+(IsNonNil p) => (SETNE (TESTQ p p))
+(IsInBounds idx len) => (SETB (CMPQ idx len))
+(IsSliceInBounds idx len) => (SETBE (CMPQ idx len))
+(NilCheck ...) => (LoweredNilCheck ...)
+(GetG ...) => (LoweredGetG ...)
+(GetClosurePtr ...) => (LoweredGetClosurePtr ...)
+(GetCallerPC ...) => (LoweredGetCallerPC ...)
+(GetCallerSP ...) => (LoweredGetCallerSP ...)
 
-(HasCPUFeature {s}) -> (SETNE (CMPQconst [0] (LoweredHasCPUFeature {s})))
+(HasCPUFeature {s}) => (SETNE (CMPQconst [0] (LoweredHasCPUFeature {s})))
 (Addr ...) -> (LEAQ ...)
-(LocalAddr {sym} base _) -> (LEAQ {sym} base)
+(LocalAddr {sym} base _) => (LEAQ {sym} base)
 
-(MOVBstore [off] {sym} ptr y:(SETL x) mem) && y.Uses == 1 -> (SETLstore [off] {sym} ptr x mem)
-(MOVBstore [off] {sym} ptr y:(SETLE x) mem) && y.Uses == 1 -> (SETLEstore [off] {sym} ptr x mem)
-(MOVBstore [off] {sym} ptr y:(SETG x) mem) && y.Uses == 1 -> (SETGstore [off] {sym} ptr x mem)
-(MOVBstore [off] {sym} ptr y:(SETGE x) mem) && y.Uses == 1 -> (SETGEstore [off] {sym} ptr x mem)
-(MOVBstore [off] {sym} ptr y:(SETEQ x) mem) && y.Uses == 1 -> (SETEQstore [off] {sym} ptr x mem)
-(MOVBstore [off] {sym} ptr y:(SETNE x) mem) && y.Uses == 1 -> (SETNEstore [off] {sym} ptr x mem)
-(MOVBstore [off] {sym} ptr y:(SETB x) mem) && y.Uses == 1 -> (SETBstore [off] {sym} ptr x mem)
-(MOVBstore [off] {sym} ptr y:(SETBE x) mem) && y.Uses == 1 -> (SETBEstore [off] {sym} ptr x mem)
-(MOVBstore [off] {sym} ptr y:(SETA x) mem) && y.Uses == 1 -> (SETAstore [off] {sym} ptr x mem)
-(MOVBstore [off] {sym} ptr y:(SETAE x) mem) && y.Uses == 1 -> (SETAEstore [off] {sym} ptr x mem)
+(MOVBstore [off] {sym} ptr y:(SETL x) mem) && y.Uses == 1 => (SETLstore [off] {sym} ptr x mem)
+(MOVBstore [off] {sym} ptr y:(SETLE x) mem) && y.Uses == 1 => (SETLEstore [off] {sym} ptr x mem)
+(MOVBstore [off] {sym} ptr y:(SETG x) mem) && y.Uses == 1 => (SETGstore [off] {sym} ptr x mem)
+(MOVBstore [off] {sym} ptr y:(SETGE x) mem) && y.Uses == 1 => (SETGEstore [off] {sym} ptr x mem)
+(MOVBstore [off] {sym} ptr y:(SETEQ x) mem) && y.Uses == 1 => (SETEQstore [off] {sym} ptr x mem)
+(MOVBstore [off] {sym} ptr y:(SETNE x) mem) && y.Uses == 1 => (SETNEstore [off] {sym} ptr x mem)
+(MOVBstore [off] {sym} ptr y:(SETB x) mem) && y.Uses == 1 => (SETBstore [off] {sym} ptr x mem)
+(MOVBstore [off] {sym} ptr y:(SETBE x) mem) && y.Uses == 1 => (SETBEstore [off] {sym} ptr x mem)
+(MOVBstore [off] {sym} ptr y:(SETA x) mem) && y.Uses == 1 => (SETAstore [off] {sym} ptr x mem)
+(MOVBstore [off] {sym} ptr y:(SETAE x) mem) && y.Uses == 1 => (SETAEstore [off] {sym} ptr x mem)
 
 // block rewrites
-(If (SETL  cmp) yes no) -> (LT  cmp yes no)
-(If (SETLE cmp) yes no) -> (LE  cmp yes no)
-(If (SETG  cmp) yes no) -> (GT  cmp yes no)
-(If (SETGE cmp) yes no) -> (GE  cmp yes no)
-(If (SETEQ cmp) yes no) -> (EQ  cmp yes no)
-(If (SETNE cmp) yes no) -> (NE  cmp yes no)
-(If (SETB  cmp) yes no) -> (ULT cmp yes no)
-(If (SETBE cmp) yes no) -> (ULE cmp yes no)
-(If (SETA  cmp) yes no) -> (UGT cmp yes no)
-(If (SETAE cmp) yes no) -> (UGE cmp yes no)
-(If (SETO cmp) yes no) -> (OS cmp yes no)
+(If (SETL  cmp) yes no) => (LT  cmp yes no)
+(If (SETLE cmp) yes no) => (LE  cmp yes no)
+(If (SETG  cmp) yes no) => (GT  cmp yes no)
+(If (SETGE cmp) yes no) => (GE  cmp yes no)
+(If (SETEQ cmp) yes no) => (EQ  cmp yes no)
+(If (SETNE cmp) yes no) => (NE  cmp yes no)
+(If (SETB  cmp) yes no) => (ULT cmp yes no)
+(If (SETBE cmp) yes no) => (ULE cmp yes no)
+(If (SETA  cmp) yes no) => (UGT cmp yes no)
+(If (SETAE cmp) yes no) => (UGE cmp yes no)
+(If (SETO cmp) yes no) => (OS cmp yes no)
 
 // Special case for floating point - LF/LEF not generated
-(If (SETGF  cmp) yes no) -> (UGT  cmp yes no)
-(If (SETGEF cmp) yes no) -> (UGE  cmp yes no)
-(If (SETEQF cmp) yes no) -> (EQF  cmp yes no)
-(If (SETNEF cmp) yes no) -> (NEF  cmp yes no)
+(If (SETGF  cmp) yes no) => (UGT  cmp yes no)
+(If (SETGEF cmp) yes no) => (UGE  cmp yes no)
+(If (SETEQF cmp) yes no) => (EQF  cmp yes no)
+(If (SETNEF cmp) yes no) => (NEF  cmp yes no)
 
-(If cond yes no) -> (NE (TESTB cond cond) yes no)
+(If cond yes no) => (NE (TESTB cond cond) yes no)
 
 // Atomic loads.  Other than preserving their ordering with respect to other loads, nothing special here.
 (AtomicLoad8 ...) -> (MOVBatomicload ...)
@@ -508,22 +508,22 @@
 
 // Atomic stores.  We use XCHG to prevent the hardware reordering a subsequent load.
 // TODO: most runtime uses of atomic stores don't need that property.  Use normal stores for those?
-(AtomicStore8 ptr val mem) -> (Select1 (XCHGB <types.NewTuple(typ.UInt8,types.TypeMem)> val ptr mem))
-(AtomicStore32 ptr val mem) -> (Select1 (XCHGL <types.NewTuple(typ.UInt32,types.TypeMem)> val ptr mem))
-(AtomicStore64 ptr val mem) -> (Select1 (XCHGQ <types.NewTuple(typ.UInt64,types.TypeMem)> val ptr mem))
-(AtomicStorePtrNoWB ptr val mem) -> (Select1 (XCHGQ <types.NewTuple(typ.BytePtr,types.TypeMem)> val ptr mem))
+(AtomicStore8 ptr val mem) => (Select1 (XCHGB <types.NewTuple(typ.UInt8,types.TypeMem)> val ptr mem))
+(AtomicStore32 ptr val mem) => (Select1 (XCHGL <types.NewTuple(typ.UInt32,types.TypeMem)> val ptr mem))
+(AtomicStore64 ptr val mem) => (Select1 (XCHGQ <types.NewTuple(typ.UInt64,types.TypeMem)> val ptr mem))
+(AtomicStorePtrNoWB ptr val mem) => (Select1 (XCHGQ <types.NewTuple(typ.BytePtr,types.TypeMem)> val ptr mem))
 
 // Atomic exchanges.
-(AtomicExchange32 ptr val mem) -> (XCHGL val ptr mem)
-(AtomicExchange64 ptr val mem) -> (XCHGQ val ptr mem)
+(AtomicExchange32 ptr val mem) => (XCHGL val ptr mem)
+(AtomicExchange64 ptr val mem) => (XCHGQ val ptr mem)
 
 // Atomic adds.
-(AtomicAdd32 ptr val mem) -> (AddTupleFirst32 val (XADDLlock val ptr mem))
-(AtomicAdd64 ptr val mem) -> (AddTupleFirst64 val (XADDQlock val ptr mem))
-(Select0 <t> (AddTupleFirst32 val tuple)) -> (ADDL val (Select0 <t> tuple))
-(Select1     (AddTupleFirst32   _ tuple)) -> (Select1 tuple)
-(Select0 <t> (AddTupleFirst64 val tuple)) -> (ADDQ val (Select0 <t> tuple))
-(Select1     (AddTupleFirst64   _ tuple)) -> (Select1 tuple)
+(AtomicAdd32 ptr val mem) => (AddTupleFirst32 val (XADDLlock val ptr mem))
+(AtomicAdd64 ptr val mem) => (AddTupleFirst64 val (XADDQlock val ptr mem))
+(Select0 <t> (AddTupleFirst32 val tuple)) => (ADDL val (Select0 <t> tuple))
+(Select1     (AddTupleFirst32   _ tuple)) => (Select1 tuple)
+(Select0 <t> (AddTupleFirst64 val tuple)) => (ADDQ val (Select0 <t> tuple))
+(Select1     (AddTupleFirst64   _ tuple)) => (Select1 tuple)
 
 // Atomic compare and swap.
 (AtomicCompareAndSwap32 ...) -> (CMPXCHGLlock ...)
@@ -536,9 +536,9 @@
 // Write barrier.
 (WB ...) -> (LoweredWB ...)
 
-(PanicBounds [kind] x y mem) && boundsABI(kind) == 0 -> (LoweredPanicBoundsA [kind] x y mem)
-(PanicBounds [kind] x y mem) && boundsABI(kind) == 1 -> (LoweredPanicBoundsB [kind] x y mem)
-(PanicBounds [kind] x y mem) && boundsABI(kind) == 2 -> (LoweredPanicBoundsC [kind] x y mem)
+(PanicBounds [kind] x y mem) && boundsABI(kind) == 0 => (LoweredPanicBoundsA [kind] x y mem)
+(PanicBounds [kind] x y mem) && boundsABI(kind) == 1 => (LoweredPanicBoundsB [kind] x y mem)
+(PanicBounds [kind] x y mem) && boundsABI(kind) == 2 => (LoweredPanicBoundsC [kind] x y mem)
 
 // ***************************
 // Above: lowering rules
@@ -547,23 +547,23 @@
 // TODO: Should the optimizations be a separate pass?
 
 // Fold boolean tests into blocks
-(NE (TESTB (SETL  cmp) (SETL  cmp)) yes no) -> (LT  cmp yes no)
-(NE (TESTB (SETLE cmp) (SETLE cmp)) yes no) -> (LE  cmp yes no)
-(NE (TESTB (SETG  cmp) (SETG  cmp)) yes no) -> (GT  cmp yes no)
-(NE (TESTB (SETGE cmp) (SETGE cmp)) yes no) -> (GE  cmp yes no)
-(NE (TESTB (SETEQ cmp) (SETEQ cmp)) yes no) -> (EQ  cmp yes no)
-(NE (TESTB (SETNE cmp) (SETNE cmp)) yes no) -> (NE  cmp yes no)
-(NE (TESTB (SETB  cmp) (SETB  cmp)) yes no) -> (ULT cmp yes no)
-(NE (TESTB (SETBE cmp) (SETBE cmp)) yes no) -> (ULE cmp yes no)
-(NE (TESTB (SETA  cmp) (SETA  cmp)) yes no) -> (UGT cmp yes no)
-(NE (TESTB (SETAE cmp) (SETAE cmp)) yes no) -> (UGE cmp yes no)
-(NE (TESTB (SETO cmp) (SETO cmp)) yes no) -> (OS cmp yes no)
+(NE (TESTB (SETL  cmp) (SETL  cmp)) yes no) => (LT  cmp yes no)
+(NE (TESTB (SETLE cmp) (SETLE cmp)) yes no) => (LE  cmp yes no)
+(NE (TESTB (SETG  cmp) (SETG  cmp)) yes no) => (GT  cmp yes no)
+(NE (TESTB (SETGE cmp) (SETGE cmp)) yes no) => (GE  cmp yes no)
+(NE (TESTB (SETEQ cmp) (SETEQ cmp)) yes no) => (EQ  cmp yes no)
+(NE (TESTB (SETNE cmp) (SETNE cmp)) yes no) => (NE  cmp yes no)
+(NE (TESTB (SETB  cmp) (SETB  cmp)) yes no) => (ULT cmp yes no)
+(NE (TESTB (SETBE cmp) (SETBE cmp)) yes no) => (ULE cmp yes no)
+(NE (TESTB (SETA  cmp) (SETA  cmp)) yes no) => (UGT cmp yes no)
+(NE (TESTB (SETAE cmp) (SETAE cmp)) yes no) => (UGE cmp yes no)
+(NE (TESTB (SETO cmp) (SETO cmp)) yes no) => (OS cmp yes no)
 
 // Unsigned comparisons to 0/1
-(ULT (TEST(Q|L|W|B) x x) yes no) -> (First no yes)
-(UGE (TEST(Q|L|W|B) x x) yes no) -> (First yes no)
-(SETB (TEST(Q|L|W|B) x x)) -> (ConstBool [0])
-(SETAE (TEST(Q|L|W|B) x x)) -> (ConstBool [1])
+(ULT (TEST(Q|L|W|B) x x) yes no) => (First no yes)
+(UGE (TEST(Q|L|W|B) x x) yes no) => (First yes no)
+(SETB (TEST(Q|L|W|B) x x)) => (ConstBool [false])
+(SETAE (TEST(Q|L|W|B) x x)) => (ConstBool [true])
 
 // x & 1 != 0 -> x & 1
 (SETNE (TEST(B|W)const [1] x)) => (AND(L|L)const [1] x)
@@ -574,75 +574,75 @@
 // into tests for carry flags.
 // ULT and SETB check the carry flag; they are identical to CS and SETCS. Same, mutatis
 // mutandis, for UGE and SETAE, and CC and SETCC.
-((NE|EQ) (TESTL (SHLL (MOVLconst [1]) x) y)) -> ((ULT|UGE) (BTL x y))
-((NE|EQ) (TESTQ (SHLQ (MOVQconst [1]) x) y)) -> ((ULT|UGE) (BTQ x y))
-((NE|EQ) (TESTLconst [c] x)) && isUint32PowerOfTwo(c)
-    -> ((ULT|UGE) (BTLconst [log2uint32(c)] x))
-((NE|EQ) (TESTQconst [c] x)) && isUint64PowerOfTwo(c)
-    -> ((ULT|UGE) (BTQconst [log2(c)] x))
+((NE|EQ) (TESTL (SHLL (MOVLconst [1]) x) y)) => ((ULT|UGE) (BTL x y))
+((NE|EQ) (TESTQ (SHLQ (MOVQconst [1]) x) y)) => ((ULT|UGE) (BTQ x y))
+((NE|EQ) (TESTLconst [c] x)) && isUint32PowerOfTwo(int64(c))
+    => ((ULT|UGE) (BTLconst [int8(log32(c))] x))
+((NE|EQ) (TESTQconst [c] x)) && isUint64PowerOfTwo(int64(c))
+    => ((ULT|UGE) (BTQconst [int8(log32(c))] x))
 ((NE|EQ) (TESTQ (MOVQconst [c]) x)) && isUint64PowerOfTwo(c)
-    -> ((ULT|UGE) (BTQconst [log2(c)] x))
-(SET(NE|EQ) (TESTL (SHLL (MOVLconst [1]) x) y)) -> (SET(B|AE)  (BTL x y))
-(SET(NE|EQ) (TESTQ (SHLQ (MOVQconst [1]) x) y)) -> (SET(B|AE)  (BTQ x y))
-(SET(NE|EQ) (TESTLconst [c] x)) && isUint32PowerOfTwo(c)
-    -> (SET(B|AE)  (BTLconst [log2uint32(c)] x))
-(SET(NE|EQ) (TESTQconst [c] x)) && isUint64PowerOfTwo(c)
-    -> (SET(B|AE)  (BTQconst [log2(c)] x))
+    => ((ULT|UGE) (BTQconst [int8(log2(c))] x))
+(SET(NE|EQ) (TESTL (SHLL (MOVLconst [1]) x) y)) => (SET(B|AE)  (BTL x y))
+(SET(NE|EQ) (TESTQ (SHLQ (MOVQconst [1]) x) y)) => (SET(B|AE)  (BTQ x y))
+(SET(NE|EQ) (TESTLconst [c] x)) && isUint32PowerOfTwo(int64(c))
+    => (SET(B|AE)  (BTLconst [int8(log32(c))] x))
+(SET(NE|EQ) (TESTQconst [c] x)) && isUint64PowerOfTwo(int64(c))
+    => (SET(B|AE)  (BTQconst [int8(log32(c))] x))
 (SET(NE|EQ) (TESTQ (MOVQconst [c]) x)) && isUint64PowerOfTwo(c)
-    -> (SET(B|AE)  (BTQconst [log2(c)] x))
+    => (SET(B|AE)  (BTQconst [int8(log2(c))] x))
 // SET..store variant
 (SET(NE|EQ)store [off] {sym} ptr (TESTL (SHLL (MOVLconst [1]) x) y) mem)
-    -> (SET(B|AE)store  [off] {sym} ptr (BTL x y) mem)
+    => (SET(B|AE)store  [off] {sym} ptr (BTL x y) mem)
 (SET(NE|EQ)store [off] {sym} ptr (TESTQ (SHLQ (MOVQconst [1]) x) y) mem)
-    -> (SET(B|AE)store  [off] {sym} ptr (BTQ x y) mem)
-(SET(NE|EQ)store [off] {sym} ptr (TESTLconst [c] x) mem) && isUint32PowerOfTwo(c)
-    -> (SET(B|AE)store  [off] {sym} ptr (BTLconst [log2uint32(c)] x) mem)
-(SET(NE|EQ)store [off] {sym} ptr (TESTQconst [c] x) mem) && isUint64PowerOfTwo(c)
-    -> (SET(B|AE)store  [off] {sym} ptr (BTQconst [log2(c)] x) mem)
+    => (SET(B|AE)store  [off] {sym} ptr (BTQ x y) mem)
+(SET(NE|EQ)store [off] {sym} ptr (TESTLconst [c] x) mem) && isUint32PowerOfTwo(int64(c))
+    => (SET(B|AE)store  [off] {sym} ptr (BTLconst [int8(log32(c))] x) mem)
+(SET(NE|EQ)store [off] {sym} ptr (TESTQconst [c] x) mem) && isUint64PowerOfTwo(int64(c))
+    => (SET(B|AE)store  [off] {sym} ptr (BTQconst [int8(log32(c))] x) mem)
 (SET(NE|EQ)store [off] {sym} ptr (TESTQ (MOVQconst [c]) x) mem) && isUint64PowerOfTwo(c)
-    -> (SET(B|AE)store  [off] {sym} ptr (BTQconst [log2(c)] x) mem)
+    => (SET(B|AE)store  [off] {sym} ptr (BTQconst [int8(log2(c))] x) mem)
 
 // Handle bit-testing in the form (a>>b)&1 != 0 by building the above rules
 // and further combining shifts.
-(BT(Q|L)const [c] (SHRQconst [d] x)) && (c+d)<64 -> (BTQconst [c+d] x)
-(BT(Q|L)const [c] (SHLQconst [d] x)) && c>d      -> (BT(Q|L)const [c-d] x)
-(BT(Q|L)const [0] s:(SHRQ x y)) -> (BTQ y x)
-(BTLconst [c] (SHRLconst [d] x)) && (c+d)<32 -> (BTLconst [c+d] x)
-(BTLconst [c] (SHLLconst [d] x)) && c>d      -> (BTLconst [c-d] x)
-(BTLconst [0] s:(SHRL x y)) -> (BTL y x)
+(BT(Q|L)const [c] (SHRQconst [d] x)) && (c+d)<64 => (BTQconst [c+d] x)
+(BT(Q|L)const [c] (SHLQconst [d] x)) && c>d      => (BT(Q|L)const [c-d] x)
+(BT(Q|L)const [0] s:(SHRQ x y)) => (BTQ y x)
+(BTLconst [c] (SHRLconst [d] x)) && (c+d)<32 => (BTLconst [c+d] x)
+(BTLconst [c] (SHLLconst [d] x)) && c>d      => (BTLconst [c-d] x)
+(BTLconst [0] s:(SHRL x y)) => (BTL y x)
 
 // Rewrite a & 1 != 1 into a & 1 == 0.
 // Among other things, this lets us turn (a>>b)&1 != 1 into a bit test.
-(SET(NE|EQ) (CMPLconst [1] s:(ANDLconst [1] _))) -> (SET(EQ|NE) (CMPLconst [0] s))
-(SET(NE|EQ)store [off] {sym} ptr (CMPLconst [1] s:(ANDLconst [1] _)) mem) -> (SET(EQ|NE)store [off] {sym} ptr (CMPLconst [0] s) mem)
-(SET(NE|EQ) (CMPQconst [1] s:(ANDQconst [1] _))) -> (SET(EQ|NE) (CMPQconst [0] s))
-(SET(NE|EQ)store [off] {sym} ptr (CMPQconst [1] s:(ANDQconst [1] _)) mem) -> (SET(EQ|NE)store [off] {sym} ptr (CMPQconst [0] s) mem)
+(SET(NE|EQ) (CMPLconst [1] s:(ANDLconst [1] _))) => (SET(EQ|NE) (CMPLconst [0] s))
+(SET(NE|EQ)store [off] {sym} ptr (CMPLconst [1] s:(ANDLconst [1] _)) mem) => (SET(EQ|NE)store [off] {sym} ptr (CMPLconst [0] s) mem)
+(SET(NE|EQ) (CMPQconst [1] s:(ANDQconst [1] _))) => (SET(EQ|NE) (CMPQconst [0] s))
+(SET(NE|EQ)store [off] {sym} ptr (CMPQconst [1] s:(ANDQconst [1] _)) mem) => (SET(EQ|NE)store [off] {sym} ptr (CMPQconst [0] s) mem)
 
 // Recognize bit setting (a |= 1<<b) and toggling (a ^= 1<<b)
-(OR(Q|L) (SHL(Q|L) (MOV(Q|L)const [1]) y) x) -> (BTS(Q|L) x y)
-(XOR(Q|L) (SHL(Q|L) (MOV(Q|L)const [1]) y) x) -> (BTC(Q|L) x y)
+(OR(Q|L) (SHL(Q|L) (MOV(Q|L)const [1]) y) x) => (BTS(Q|L) x y)
+(XOR(Q|L) (SHL(Q|L) (MOV(Q|L)const [1]) y) x) => (BTC(Q|L) x y)
 
 // Convert ORconst into BTS, if the code gets smaller, with boundary being
 // (ORL $40,AX is 3 bytes, ORL $80,AX is 6 bytes).
-((ORQ|XORQ)const [c] x) && isUint64PowerOfTwo(c) && uint64(c) >= 128
-    -> (BT(S|C)Qconst [log2(c)] x)
-((ORL|XORL)const [c] x) && isUint32PowerOfTwo(c) && uint64(c) >= 128
-    -> (BT(S|C)Lconst [log2uint32(c)] x)
+((ORQ|XORQ)const [c] x) && isUint64PowerOfTwo(int64(c)) && uint64(c) >= 128
+    => (BT(S|C)Qconst [int8(log32(c))] x)
+((ORL|XORL)const [c] x) && isUint32PowerOfTwo(int64(c)) && uint64(c) >= 128
+    => (BT(S|C)Lconst [int8(log32(c))] x)
 ((ORQ|XORQ) (MOVQconst [c]) x) && isUint64PowerOfTwo(c) && uint64(c) >= 128
-    -> (BT(S|C)Qconst [log2(c)] x)
-((ORL|XORL) (MOVLconst [c]) x) && isUint32PowerOfTwo(c) && uint64(c) >= 128
-    -> (BT(S|C)Lconst [log2uint32(c)] x)
+    => (BT(S|C)Qconst [int8(log2(c))] x)
+((ORL|XORL) (MOVLconst [c]) x) && isUint32PowerOfTwo(int64(c)) && uint64(c) >= 128
+    => (BT(S|C)Lconst [int8(log32(c))] x)
 
 // Recognize bit clearing: a &^= 1<<b
-(AND(Q|L) (NOT(Q|L) (SHL(Q|L) (MOV(Q|L)const [1]) y)) x) -> (BTR(Q|L) x y)
-(ANDQconst [c] x) && isUint64PowerOfTwo(^c) && uint64(^c) >= 128
-    -> (BTRQconst [log2(^c)] x)
-(ANDLconst [c] x) && isUint32PowerOfTwo(^c) && uint64(^c) >= 128
-    -> (BTRLconst [log2uint32(^c)] x)
+(AND(Q|L) (NOT(Q|L) (SHL(Q|L) (MOV(Q|L)const [1]) y)) x) => (BTR(Q|L) x y)
+(ANDQconst [c] x) && isUint64PowerOfTwo(int64(^c)) && uint64(^c) >= 128
+    => (BTRQconst [int8(log32(^c))] x)
+(ANDLconst [c] x) && isUint32PowerOfTwo(int64(^c)) && uint64(^c) >= 128
+    => (BTRLconst [int8(log32(^c))] x)
 (ANDQ (MOVQconst [c]) x) && isUint64PowerOfTwo(^c) && uint64(^c) >= 128
-    -> (BTRQconst [log2(^c)] x)
-(ANDL (MOVLconst [c]) x) && isUint32PowerOfTwo(^c) && uint64(^c) >= 128
-    -> (BTRLconst [log2uint32(^c)] x)
+    => (BTRQconst [int8(log2(^c))] x)
+(ANDL (MOVLconst [c]) x) && isUint32PowerOfTwo(int64(^c)) && uint64(^c) >= 128
+    => (BTRLconst [int8(log32(^c))] x)
 
 // Special-case bit patterns on first/last bit.
 // generic.rules changes ANDs of high-part/low-part masks into a couple of shifts,
@@ -656,84 +656,84 @@
 
 // Special case resetting first/last bit
 (SHL(L|Q)const [1] (SHR(L|Q)const [1] x))
-	-> (BTR(L|Q)const [0] x)
+	=> (BTR(L|Q)const [0] x)
 (SHRLconst [1] (SHLLconst [1] x))
-	-> (BTRLconst [31] x)
+	=> (BTRLconst [31] x)
 (SHRQconst [1] (SHLQconst [1] x))
-	-> (BTRQconst [63] x)
+	=> (BTRQconst [63] x)
 
 // Special case testing first/last bit (with double-shift generated by generic.rules)
 ((SETNE|SETEQ|NE|EQ) (TESTQ z1:(SHLQconst [63] (SHRQconst [63] x)) z2)) && z1==z2
-    -> ((SETB|SETAE|ULT|UGE) (BTQconst [63] x))
+    => ((SETB|SETAE|ULT|UGE) (BTQconst [63] x))
 ((SETNE|SETEQ|NE|EQ) (TESTL z1:(SHLLconst [31] (SHRQconst [31] x)) z2)) && z1==z2
-    -> ((SETB|SETAE|ULT|UGE) (BTQconst [31] x))
+    => ((SETB|SETAE|ULT|UGE) (BTQconst [31] x))
 (SET(NE|EQ)store [off] {sym} ptr (TESTQ z1:(SHLQconst [63] (SHRQconst [63] x)) z2) mem) && z1==z2
-    -> (SET(B|AE)store [off] {sym} ptr (BTQconst [63] x) mem)
+    => (SET(B|AE)store [off] {sym} ptr (BTQconst [63] x) mem)
 (SET(NE|EQ)store [off] {sym} ptr (TESTL z1:(SHLLconst [31] (SHRLconst [31] x)) z2) mem) && z1==z2
-    -> (SET(B|AE)store [off] {sym} ptr (BTLconst [31] x) mem)
+    => (SET(B|AE)store [off] {sym} ptr (BTLconst [31] x) mem)
 
 ((SETNE|SETEQ|NE|EQ) (TESTQ z1:(SHRQconst [63] (SHLQconst [63] x)) z2)) && z1==z2
-    -> ((SETB|SETAE|ULT|UGE)  (BTQconst [0] x))
+    => ((SETB|SETAE|ULT|UGE)  (BTQconst [0] x))
 ((SETNE|SETEQ|NE|EQ) (TESTL z1:(SHRLconst [31] (SHLLconst [31] x)) z2)) && z1==z2
-    -> ((SETB|SETAE|ULT|UGE)  (BTLconst [0] x))
+    => ((SETB|SETAE|ULT|UGE)  (BTLconst [0] x))
 (SET(NE|EQ)store [off] {sym} ptr (TESTQ z1:(SHRQconst [63] (SHLQconst [63] x)) z2) mem) && z1==z2
-    -> (SET(B|AE)store [off] {sym} ptr (BTQconst [0] x) mem)
+    => (SET(B|AE)store [off] {sym} ptr (BTQconst [0] x) mem)
 (SET(NE|EQ)store [off] {sym} ptr (TESTL z1:(SHRLconst [31] (SHLLconst [31] x)) z2) mem) && z1==z2
-    -> (SET(B|AE)store [off] {sym} ptr (BTLconst [0] x) mem)
+    => (SET(B|AE)store [off] {sym} ptr (BTLconst [0] x) mem)
 
 // Special-case manually testing last bit with "a>>63 != 0" (without "&1")
 ((SETNE|SETEQ|NE|EQ) (TESTQ z1:(SHRQconst [63] x) z2)) && z1==z2
-    -> ((SETB|SETAE|ULT|UGE) (BTQconst [63] x))
+    => ((SETB|SETAE|ULT|UGE) (BTQconst [63] x))
 ((SETNE|SETEQ|NE|EQ) (TESTL z1:(SHRLconst [31] x) z2)) && z1==z2
-    -> ((SETB|SETAE|ULT|UGE) (BTLconst [31] x))
+    => ((SETB|SETAE|ULT|UGE) (BTLconst [31] x))
 (SET(NE|EQ)store [off] {sym} ptr (TESTQ z1:(SHRQconst [63] x) z2) mem) && z1==z2
-    -> (SET(B|AE)store [off] {sym} ptr (BTQconst [63] x) mem)
+    => (SET(B|AE)store [off] {sym} ptr (BTQconst [63] x) mem)
 (SET(NE|EQ)store [off] {sym} ptr (TESTL z1:(SHRLconst [31] x) z2) mem) && z1==z2
-    -> (SET(B|AE)store [off] {sym} ptr (BTLconst [31] x) mem)
+    => (SET(B|AE)store [off] {sym} ptr (BTLconst [31] x) mem)
 
 // Fold combinations of bit ops on same bit. An example is math.Copysign(c,-1)
-(BTS(Q|L)const [c] (BTR(Q|L)const [c] x)) -> (BTS(Q|L)const [c] x)
-(BTS(Q|L)const [c] (BTC(Q|L)const [c] x)) -> (BTS(Q|L)const [c] x)
-(BTR(Q|L)const [c] (BTS(Q|L)const [c] x)) -> (BTR(Q|L)const [c] x)
-(BTR(Q|L)const [c] (BTC(Q|L)const [c] x)) -> (BTR(Q|L)const [c] x)
+(BTS(Q|L)const [c] (BTR(Q|L)const [c] x)) => (BTS(Q|L)const [c] x)
+(BTS(Q|L)const [c] (BTC(Q|L)const [c] x)) => (BTS(Q|L)const [c] x)
+(BTR(Q|L)const [c] (BTS(Q|L)const [c] x)) => (BTR(Q|L)const [c] x)
+(BTR(Q|L)const [c] (BTC(Q|L)const [c] x)) => (BTR(Q|L)const [c] x)
 
 // Fold boolean negation into SETcc.
-(XORLconst [1] (SETNE x)) -> (SETEQ x)
-(XORLconst [1] (SETEQ x)) -> (SETNE x)
-(XORLconst [1] (SETL  x)) -> (SETGE x)
-(XORLconst [1] (SETGE x)) -> (SETL  x)
-(XORLconst [1] (SETLE x)) -> (SETG  x)
-(XORLconst [1] (SETG  x)) -> (SETLE x)
-(XORLconst [1] (SETB  x)) -> (SETAE x)
-(XORLconst [1] (SETAE x)) -> (SETB  x)
-(XORLconst [1] (SETBE x)) -> (SETA  x)
-(XORLconst [1] (SETA  x)) -> (SETBE x)
+(XORLconst [1] (SETNE x)) => (SETEQ x)
+(XORLconst [1] (SETEQ x)) => (SETNE x)
+(XORLconst [1] (SETL  x)) => (SETGE x)
+(XORLconst [1] (SETGE x)) => (SETL  x)
+(XORLconst [1] (SETLE x)) => (SETG  x)
+(XORLconst [1] (SETG  x)) => (SETLE x)
+(XORLconst [1] (SETB  x)) => (SETAE x)
+(XORLconst [1] (SETAE x)) => (SETB  x)
+(XORLconst [1] (SETBE x)) => (SETA  x)
+(XORLconst [1] (SETA  x)) => (SETBE x)
 
 // Special case for floating point - LF/LEF not generated
-(NE (TESTB (SETGF  cmp) (SETGF  cmp)) yes no) -> (UGT  cmp yes no)
-(NE (TESTB (SETGEF cmp) (SETGEF cmp)) yes no) -> (UGE  cmp yes no)
-(NE (TESTB (SETEQF cmp) (SETEQF cmp)) yes no) -> (EQF  cmp yes no)
-(NE (TESTB (SETNEF cmp) (SETNEF cmp)) yes no) -> (NEF  cmp yes no)
+(NE (TESTB (SETGF  cmp) (SETGF  cmp)) yes no) => (UGT  cmp yes no)
+(NE (TESTB (SETGEF cmp) (SETGEF cmp)) yes no) => (UGE  cmp yes no)
+(NE (TESTB (SETEQF cmp) (SETEQF cmp)) yes no) => (EQF  cmp yes no)
+(NE (TESTB (SETNEF cmp) (SETNEF cmp)) yes no) => (NEF  cmp yes no)
 
 // Disabled because it interferes with the pattern match above and makes worse code.
-// (SETNEF x) -> (ORQ (SETNE <typ.Int8> x) (SETNAN <typ.Int8> x))
-// (SETEQF x) -> (ANDQ (SETEQ <typ.Int8> x) (SETORD <typ.Int8> x))
+// (SETNEF x) => (ORQ (SETNE <typ.Int8> x) (SETNAN <typ.Int8> x))
+// (SETEQF x) => (ANDQ (SETEQ <typ.Int8> x) (SETORD <typ.Int8> x))
 
 // fold constants into instructions
-(ADDQ x (MOVQconst [c])) && is32Bit(c) -> (ADDQconst [c] x)
-(ADDQ x (MOVLconst [c])) && is32Bit(c) -> (ADDQconst [int64(int32(c))] x)
-(ADDL x (MOVLconst [c])) -> (ADDLconst [c] x)
+(ADDQ x (MOVQconst [c])) && is32Bit(c) => (ADDQconst [int32(c)] x)
+(ADDQ x (MOVLconst [c])) => (ADDQconst [c] x)
+(ADDL x (MOVLconst [c])) => (ADDLconst [c] x)
 
-(SUBQ x (MOVQconst [c])) && is32Bit(c) -> (SUBQconst x [c])
-(SUBQ (MOVQconst [c]) x) && is32Bit(c) -> (NEGQ (SUBQconst <v.Type> x [c]))
-(SUBL x (MOVLconst [c])) -> (SUBLconst x [c])
-(SUBL (MOVLconst [c]) x) -> (NEGL (SUBLconst <v.Type> x [c]))
+(SUBQ x (MOVQconst [c])) && is32Bit(c) => (SUBQconst x [int32(c)])
+(SUBQ (MOVQconst [c]) x) && is32Bit(c) => (NEGQ (SUBQconst <v.Type> x [int32(c)]))
+(SUBL x (MOVLconst [c])) => (SUBLconst x [c])
+(SUBL (MOVLconst [c]) x) => (NEGL (SUBLconst <v.Type> x [c]))
 
-(MULQ x (MOVQconst [c])) && is32Bit(c) -> (MULQconst [c] x)
-(MULL x (MOVLconst [c])) -> (MULLconst [c] x)
+(MULQ x (MOVQconst [c])) && is32Bit(c) => (MULQconst [int32(c)] x)
+(MULL x (MOVLconst [c])) => (MULLconst [c] x)
 
-(ANDQ x (MOVQconst [c])) && is32Bit(c) -> (ANDQconst [c] x)
-(ANDL x (MOVLconst [c])) -> (ANDLconst [c] x)
+(ANDQ x (MOVQconst [c])) && is32Bit(c) => (ANDQconst [int32(c)] x)
+(ANDL x (MOVLconst [c])) => (ANDLconst [c] x)
 
 (AND(L|Q)const [c] (AND(L|Q)const [d] x)) => (AND(L|Q)const [c & d] x)
 (XOR(L|Q)const [c] (XOR(L|Q)const [d] x)) => (XOR(L|Q)const [c ^ d] x)
@@ -763,68 +763,70 @@
 (ORQconst  [c] (BTSQconst [d] x)) && is32Bit(int64(c) | 1<<uint32(d))     => (ORQconst [c | 1<<uint32(d)] x)
 (BTSQconst [c] (BTSQconst [d] x)) && is32Bit(1<<uint32(c) | 1<<uint32(d)) => (ORQconst [1<<uint32(c) | 1<<uint32(d)] x)
 
-(MULLconst [c] (MULLconst [d] x)) -> (MULLconst [int64(int32(c * d))] x)
-(MULQconst [c] (MULQconst [d] x)) && is32Bit(c*d) -> (MULQconst [c * d] x)
 
-(ORQ x (MOVQconst [c])) && is32Bit(c) -> (ORQconst [c] x)
-(ORQ x (MOVLconst [c])) -> (ORQconst [c] x)
-(ORL x (MOVLconst [c])) -> (ORLconst [c] x)
+(MULLconst [c] (MULLconst [d] x)) => (MULLconst [c * d] x)
+(MULQconst [c] (MULQconst [d] x)) && is32Bit(int64(c)*int64(d)) => (MULQconst [c * d] x)
 
-(XORQ x (MOVQconst [c])) && is32Bit(c) -> (XORQconst [c] x)
-(XORL x (MOVLconst [c])) -> (XORLconst [c] x)
+(ORQ x (MOVQconst [c])) && is32Bit(c) => (ORQconst [int32(c)] x)
+(ORQ x (MOVLconst [c])) => (ORQconst [c] x)
+(ORL x (MOVLconst [c])) => (ORLconst [c] x)
 
-(SHLQ x (MOV(Q|L)const [c])) -> (SHLQconst [c&63] x)
-(SHLL x (MOV(Q|L)const [c])) -> (SHLLconst [c&31] x)
+(XORQ x (MOVQconst [c])) && is32Bit(c) => (XORQconst [int32(c)] x)
+(XORL x (MOVLconst [c])) => (XORLconst [c] x)
 
-(SHRQ x (MOV(Q|L)const [c])) -> (SHRQconst [c&63] x)
-(SHRL x (MOV(Q|L)const [c])) -> (SHRLconst [c&31] x)
-(SHRW x (MOV(Q|L)const [c])) && c&31 < 16 -> (SHRWconst [c&31] x)
-(SHRW _ (MOV(Q|L)const [c])) && c&31 >= 16 -> (MOVLconst [0])
-(SHRB x (MOV(Q|L)const [c])) && c&31 < 8 -> (SHRBconst [c&31] x)
-(SHRB _ (MOV(Q|L)const [c])) && c&31 >= 8 -> (MOVLconst [0])
+(SHLQ x (MOV(Q|L)const [c])) => (SHLQconst [int8(c&63)] x)
+(SHLL x (MOV(Q|L)const [c])) => (SHLLconst [int8(c&31)] x)
 
-(SARQ x (MOV(Q|L)const [c])) -> (SARQconst [c&63] x)
-(SARL x (MOV(Q|L)const [c])) -> (SARLconst [c&31] x)
-(SARW x (MOV(Q|L)const [c])) -> (SARWconst [min(c&31,15)] x)
-(SARB x (MOV(Q|L)const [c])) -> (SARBconst [min(c&31,7)] x)
+(SHRQ x (MOV(Q|L)const [c])) => (SHRQconst [int8(c&63)] x)
+(SHRL x (MOV(Q|L)const [c])) => (SHRLconst [int8(c&31)] x)
+(SHRW x (MOV(Q|L)const [c])) && c&31 < 16 => (SHRWconst [int8(c&31)] x)
+(SHRW _ (MOV(Q|L)const [c])) && c&31 >= 16 => (MOVLconst [0])
+(SHRB x (MOV(Q|L)const [c])) && c&31 < 8 => (SHRBconst [int8(c&31)] x)
+(SHRB _ (MOV(Q|L)const [c])) && c&31 >= 8 => (MOVLconst [0])
+
+(SARQ x (MOV(Q|L)const [c])) => (SARQconst [int8(c&63)] x)
+(SARL x (MOV(Q|L)const [c])) => (SARLconst [int8(c&31)] x)
+(SARW x (MOV(Q|L)const [c])) => (SARWconst [int8(min(int64(c)&31,15))] x)
+(SARB x (MOV(Q|L)const [c])) => (SARBconst [int8(min(int64(c)&31,7))] x)
+
 
 // Operations which don't affect the low 6/5 bits of the shift amount are NOPs.
-((SHLQ|SHRQ|SARQ) x (ADDQconst [c] y)) && c & 63 == 0  -> ((SHLQ|SHRQ|SARQ) x y)
-((SHLQ|SHRQ|SARQ) x (NEGQ <t> (ADDQconst [c] y))) && c & 63 == 0  -> ((SHLQ|SHRQ|SARQ) x (NEGQ <t> y))
-((SHLQ|SHRQ|SARQ) x (ANDQconst [c] y)) && c & 63 == 63 -> ((SHLQ|SHRQ|SARQ) x y)
-((SHLQ|SHRQ|SARQ) x (NEGQ <t> (ANDQconst [c] y))) && c & 63 == 63 -> ((SHLQ|SHRQ|SARQ) x (NEGQ <t> y))
+((SHLQ|SHRQ|SARQ) x (ADDQconst [c] y)) && c & 63 == 0  => ((SHLQ|SHRQ|SARQ) x y)
+((SHLQ|SHRQ|SARQ) x (NEGQ <t> (ADDQconst [c] y))) && c & 63 == 0  => ((SHLQ|SHRQ|SARQ) x (NEGQ <t> y))
+((SHLQ|SHRQ|SARQ) x (ANDQconst [c] y)) && c & 63 == 63 => ((SHLQ|SHRQ|SARQ) x y)
+((SHLQ|SHRQ|SARQ) x (NEGQ <t> (ANDQconst [c] y))) && c & 63 == 63 => ((SHLQ|SHRQ|SARQ) x (NEGQ <t> y))
 
-((SHLL|SHRL|SARL) x (ADDQconst [c] y)) && c & 31 == 0  -> ((SHLL|SHRL|SARL) x y)
-((SHLL|SHRL|SARL) x (NEGQ <t> (ADDQconst [c] y))) && c & 31 == 0  -> ((SHLL|SHRL|SARL) x (NEGQ <t> y))
-((SHLL|SHRL|SARL) x (ANDQconst [c] y)) && c & 31 == 31 -> ((SHLL|SHRL|SARL) x y)
-((SHLL|SHRL|SARL) x (NEGQ <t> (ANDQconst [c] y))) && c & 31 == 31 -> ((SHLL|SHRL|SARL) x (NEGQ <t> y))
+((SHLL|SHRL|SARL) x (ADDQconst [c] y)) && c & 31 == 0  => ((SHLL|SHRL|SARL) x y)
+((SHLL|SHRL|SARL) x (NEGQ <t> (ADDQconst [c] y))) && c & 31 == 0  => ((SHLL|SHRL|SARL) x (NEGQ <t> y))
+((SHLL|SHRL|SARL) x (ANDQconst [c] y)) && c & 31 == 31 => ((SHLL|SHRL|SARL) x y)
+((SHLL|SHRL|SARL) x (NEGQ <t> (ANDQconst [c] y))) && c & 31 == 31 => ((SHLL|SHRL|SARL) x (NEGQ <t> y))
 
-((SHLQ|SHRQ|SARQ) x (ADDLconst [c] y)) && c & 63 == 0  -> ((SHLQ|SHRQ|SARQ) x y)
-((SHLQ|SHRQ|SARQ) x (NEGL <t> (ADDLconst [c] y))) && c & 63 == 0  -> ((SHLQ|SHRQ|SARQ) x (NEGL <t> y))
-((SHLQ|SHRQ|SARQ) x (ANDLconst [c] y)) && c & 63 == 63 -> ((SHLQ|SHRQ|SARQ) x y)
-((SHLQ|SHRQ|SARQ) x (NEGL <t> (ANDLconst [c] y))) && c & 63 == 63 -> ((SHLQ|SHRQ|SARQ) x (NEGL <t> y))
+((SHLQ|SHRQ|SARQ) x (ADDLconst [c] y)) && c & 63 == 0  => ((SHLQ|SHRQ|SARQ) x y)
+((SHLQ|SHRQ|SARQ) x (NEGL <t> (ADDLconst [c] y))) && c & 63 == 0  => ((SHLQ|SHRQ|SARQ) x (NEGL <t> y))
+((SHLQ|SHRQ|SARQ) x (ANDLconst [c] y)) && c & 63 == 63 => ((SHLQ|SHRQ|SARQ) x y)
+((SHLQ|SHRQ|SARQ) x (NEGL <t> (ANDLconst [c] y))) && c & 63 == 63 => ((SHLQ|SHRQ|SARQ) x (NEGL <t> y))
 
-((SHLL|SHRL|SARL) x (ADDLconst [c] y)) && c & 31 == 0  -> ((SHLL|SHRL|SARL) x y)
-((SHLL|SHRL|SARL) x (NEGL <t> (ADDLconst [c] y))) && c & 31 == 0  -> ((SHLL|SHRL|SARL) x (NEGL <t> y))
-((SHLL|SHRL|SARL) x (ANDLconst [c] y)) && c & 31 == 31 -> ((SHLL|SHRL|SARL) x y)
-((SHLL|SHRL|SARL) x (NEGL <t> (ANDLconst [c] y))) && c & 31 == 31 -> ((SHLL|SHRL|SARL) x (NEGL <t> y))
+((SHLL|SHRL|SARL) x (ADDLconst [c] y)) && c & 31 == 0  => ((SHLL|SHRL|SARL) x y)
+((SHLL|SHRL|SARL) x (NEGL <t> (ADDLconst [c] y))) && c & 31 == 0  => ((SHLL|SHRL|SARL) x (NEGL <t> y))
+((SHLL|SHRL|SARL) x (ANDLconst [c] y)) && c & 31 == 31 => ((SHLL|SHRL|SARL) x y)
+((SHLL|SHRL|SARL) x (NEGL <t> (ANDLconst [c] y))) && c & 31 == 31 => ((SHLL|SHRL|SARL) x (NEGL <t> y))
 
 // Constant rotate instructions
-((ADDQ|ORQ|XORQ) (SHLQconst x [c]) (SHRQconst x [d])) && d==64-c -> (ROLQconst x [c])
-((ADDL|ORL|XORL) (SHLLconst x [c]) (SHRLconst x [d])) && d==32-c -> (ROLLconst x [c])
+((ADDQ|ORQ|XORQ) (SHLQconst x [c]) (SHRQconst x [d])) && d==64-c => (ROLQconst x [c])
+((ADDL|ORL|XORL) (SHLLconst x [c]) (SHRLconst x [d])) && d==32-c => (ROLLconst x [c])
 
-((ADDL|ORL|XORL) <t> (SHLLconst x [c]) (SHRWconst x [d])) && d==16-c && c < 16 && t.Size() == 2 -> (ROLWconst x [c])
-((ADDL|ORL|XORL) <t> (SHLLconst x [c]) (SHRBconst x [d])) && d==8-c  && c < 8  && t.Size() == 1 -> (ROLBconst x [c])
+((ADDL|ORL|XORL) <t> (SHLLconst x [c]) (SHRWconst x [d])) && d==16-c && c < 16 && t.Size() == 2 => (ROLWconst x [c])
+((ADDL|ORL|XORL) <t> (SHLLconst x [c]) (SHRBconst x [d])) && d==8-c  && c < 8  && t.Size() == 1 => (ROLBconst x [c])
 
-(ROLQconst [c] (ROLQconst [d] x)) -> (ROLQconst [(c+d)&63] x)
-(ROLLconst [c] (ROLLconst [d] x)) -> (ROLLconst [(c+d)&31] x)
-(ROLWconst [c] (ROLWconst [d] x)) -> (ROLWconst [(c+d)&15] x)
-(ROLBconst [c] (ROLBconst [d] x)) -> (ROLBconst [(c+d)& 7] x)
+(ROLQconst [c] (ROLQconst [d] x)) => (ROLQconst [(c+d)&63] x)
+(ROLLconst [c] (ROLLconst [d] x)) => (ROLLconst [(c+d)&31] x)
+(ROLWconst [c] (ROLWconst [d] x)) => (ROLWconst [(c+d)&15] x)
+(ROLBconst [c] (ROLBconst [d] x)) => (ROLBconst [(c+d)& 7] x)
 
-(RotateLeft8  ...) -> (ROLB ...)
-(RotateLeft16 ...) -> (ROLW ...)
-(RotateLeft32 ...) -> (ROLL ...)
-(RotateLeft64 ...) -> (ROLQ ...)
+(RotateLeft8  ...) => (ROLB ...)
+(RotateLeft16 ...) => (ROLW ...)
+(RotateLeft32 ...) => (ROLL ...)
+(RotateLeft64 ...) => (ROLQ ...)
 
 // Non-constant rotates.
 // We want to issue a rotate when the Go source contains code like
@@ -837,15 +839,15 @@
 // But x >> 64 is 0, not x. So there's an additional mask that is ANDed in
 // to force the second term to 0. We don't need that mask, but we must match
 // it in order to strip it out.
-(ORQ (SHLQ x y) (ANDQ (SHRQ x (NEG(Q|L) y)) (SBBQcarrymask (CMP(Q|L)const (NEG(Q|L) (ADD(Q|L)const (AND(Q|L)const y [63]) [-64])) [64])))) -> (ROLQ x y)
-(ORQ (SHRQ x y) (ANDQ (SHLQ x (NEG(Q|L) y)) (SBBQcarrymask (CMP(Q|L)const (NEG(Q|L) (ADD(Q|L)const (AND(Q|L)const y [63]) [-64])) [64])))) -> (RORQ x y)
+(ORQ (SHLQ x y) (ANDQ (SHRQ x (NEG(Q|L) y)) (SBBQcarrymask (CMP(Q|L)const (NEG(Q|L) (ADD(Q|L)const (AND(Q|L)const y [63]) [-64])) [64])))) => (ROLQ x y)
+(ORQ (SHRQ x y) (ANDQ (SHLQ x (NEG(Q|L) y)) (SBBQcarrymask (CMP(Q|L)const (NEG(Q|L) (ADD(Q|L)const (AND(Q|L)const y [63]) [-64])) [64])))) => (RORQ x y)
 
-(ORL (SHLL x y) (ANDL (SHRL x (NEG(Q|L) y)) (SBBLcarrymask (CMP(Q|L)const (NEG(Q|L) (ADD(Q|L)const (AND(Q|L)const y [31]) [-32])) [32])))) -> (ROLL x y)
-(ORL (SHRL x y) (ANDL (SHLL x (NEG(Q|L) y)) (SBBLcarrymask (CMP(Q|L)const (NEG(Q|L) (ADD(Q|L)const (AND(Q|L)const y [31]) [-32])) [32])))) -> (RORL x y)
+(ORL (SHLL x y) (ANDL (SHRL x (NEG(Q|L) y)) (SBBLcarrymask (CMP(Q|L)const (NEG(Q|L) (ADD(Q|L)const (AND(Q|L)const y [31]) [-32])) [32])))) => (ROLL x y)
+(ORL (SHRL x y) (ANDL (SHLL x (NEG(Q|L) y)) (SBBLcarrymask (CMP(Q|L)const (NEG(Q|L) (ADD(Q|L)const (AND(Q|L)const y [31]) [-32])) [32])))) => (RORL x y)
 
 // Help with rotate detection
-(CMPQconst (NEGQ (ADDQconst [-16] (ANDQconst [15] _))) [32]) -> (FlagLT_ULT)
-(CMPQconst (NEGQ (ADDQconst [ -8] (ANDQconst  [7] _))) [32]) -> (FlagLT_ULT)
+(CMPQconst (NEGQ (ADDQconst [-16] (ANDQconst [15] _))) [32]) => (FlagLT_ULT)
+(CMPQconst (NEGQ (ADDQconst [ -8] (ANDQconst  [7] _))) [32]) => (FlagLT_ULT)
 
 (ORL (SHLL x (AND(Q|L)const y [15]))
      (ANDL (SHRW x (NEG(Q|L) (ADD(Q|L)const (AND(Q|L)const y [15]) [-16])))
@@ -855,69 +857,74 @@
 (ORL (SHRW x (AND(Q|L)const y [15]))
      (SHLL x (NEG(Q|L) (ADD(Q|L)const (AND(Q|L)const y [15]) [-16]))))
   && v.Type.Size() == 2
-  -> (RORW x y)
+  => (RORW x y)
 
 (ORL (SHLL x (AND(Q|L)const y [ 7]))
      (ANDL (SHRB x (NEG(Q|L) (ADD(Q|L)const (AND(Q|L)const y [ 7]) [ -8])))
            (SBBLcarrymask (CMP(Q|L)const (NEG(Q|L) (ADD(Q|L)const (AND(Q|L)const y [ 7]) [ -8])) [ 8]))))
   && v.Type.Size() == 1
-  -> (ROLB x y)
+  => (ROLB x y)
 (ORL (SHRB x (AND(Q|L)const y [ 7]))
      (SHLL x (NEG(Q|L) (ADD(Q|L)const (AND(Q|L)const y [ 7]) [ -8]))))
   && v.Type.Size() == 1
-  -> (RORB x y)
+  => (RORB x y)
 
 // rotate left negative = rotate right
-(ROLQ x (NEG(Q|L) y)) -> (RORQ x y)
-(ROLL x (NEG(Q|L) y)) -> (RORL x y)
-(ROLW x (NEG(Q|L) y)) -> (RORW x y)
-(ROLB x (NEG(Q|L) y)) -> (RORB x y)
+(ROLQ x (NEG(Q|L) y)) => (RORQ x y)
+(ROLL x (NEG(Q|L) y)) => (RORL x y)
+(ROLW x (NEG(Q|L) y)) => (RORW x y)
+(ROLB x (NEG(Q|L) y)) => (RORB x y)
 
 // rotate right negative = rotate left
-(RORQ x (NEG(Q|L) y)) -> (ROLQ x y)
-(RORL x (NEG(Q|L) y)) -> (ROLL x y)
-(RORW x (NEG(Q|L) y)) -> (ROLW x y)
-(RORB x (NEG(Q|L) y)) -> (ROLB x y)
+(RORQ x (NEG(Q|L) y)) => (ROLQ x y)
+(RORL x (NEG(Q|L) y)) => (ROLL x y)
+(RORW x (NEG(Q|L) y)) => (ROLW x y)
+(RORB x (NEG(Q|L) y)) => (ROLB x y)
 
 // rotate by constants
-(ROLQ x (MOV(Q|L)const [c])) -> (ROLQconst [c&63] x)
-(ROLL x (MOV(Q|L)const [c])) -> (ROLLconst [c&31] x)
-(ROLW x (MOV(Q|L)const [c])) -> (ROLWconst [c&15] x)
-(ROLB x (MOV(Q|L)const [c])) -> (ROLBconst [c&7 ] x)
+(ROLQ x (MOV(Q|L)const [c])) => (ROLQconst [int8(c&63)] x)
+(ROLL x (MOV(Q|L)const [c])) => (ROLLconst [int8(c&31)] x)
+(ROLW x (MOV(Q|L)const [c])) => (ROLWconst [int8(c&15)] x)
+(ROLB x (MOV(Q|L)const [c])) => (ROLBconst [int8(c&7) ] x)
 
-(RORQ x (MOV(Q|L)const [c])) -> (ROLQconst [(-c)&63] x)
-(RORL x (MOV(Q|L)const [c])) -> (ROLLconst [(-c)&31] x)
-(RORW x (MOV(Q|L)const [c])) -> (ROLWconst [(-c)&15] x)
-(RORB x (MOV(Q|L)const [c])) -> (ROLBconst [(-c)&7 ] x)
+(RORQ x (MOV(Q|L)const [c])) => (ROLQconst [int8((-c)&63)] x)
+(RORL x (MOV(Q|L)const [c])) => (ROLLconst [int8((-c)&31)] x)
+(RORW x (MOV(Q|L)const [c])) => (ROLWconst [int8((-c)&15)] x)
+(RORB x (MOV(Q|L)const [c])) => (ROLBconst [int8((-c)&7) ] x)
 
 // Constant shift simplifications
-((SHLQ|SHRQ|SARQ)const      x [0]) -> x
-((SHLL|SHRL|SARL)const      x [0]) -> x
-((SHRW|SARW)const           x [0]) -> x
-((SHRB|SARB)const           x [0]) -> x
-((ROLQ|ROLL|ROLW|ROLB)const x [0]) -> x
+((SHLQ|SHRQ|SARQ)const      x [0]) => x
+((SHLL|SHRL|SARL)const      x [0]) => x
+((SHRW|SARW)const           x [0]) => x
+((SHRB|SARB)const           x [0]) => x
+((ROLQ|ROLL|ROLW|ROLB)const x [0]) => x
 
 // Note: the word and byte shifts keep the low 5 bits (not the low 4 or 3 bits)
 // because the x86 instructions are defined to use all 5 bits of the shift even
 // for the small shifts. I don't think we'll ever generate a weird shift (e.g.
 // (SHRW x (MOVLconst [24])), but just in case.
 
-(CMPQ x (MOVQconst [c])) && is32Bit(c) -> (CMPQconst x [c])
-(CMPQ (MOVQconst [c]) x) && is32Bit(c) -> (InvertFlags (CMPQconst x [c]))
-(CMPL x (MOVLconst [c])) -> (CMPLconst x [c])
-(CMPL (MOVLconst [c]) x) -> (InvertFlags (CMPLconst x [c]))
-(CMPW x (MOVLconst [c])) -> (CMPWconst x [int64(int16(c))])
-(CMPW (MOVLconst [c]) x) -> (InvertFlags (CMPWconst x [int64(int16(c))]))
-(CMPB x (MOVLconst [c])) -> (CMPBconst x [int64(int8(c))])
-(CMPB (MOVLconst [c]) x) -> (InvertFlags (CMPBconst x [int64(int8(c))]))
+(CMPQ x (MOVQconst [c])) && is32Bit(c) => (CMPQconst x [int32(c)])
+(CMPQ (MOVQconst [c]) x) && is32Bit(c) => (InvertFlags (CMPQconst x [int32(c)]))
+(CMPL x (MOVLconst [c])) => (CMPLconst x [c])
+(CMPL (MOVLconst [c]) x) => (InvertFlags (CMPLconst x [c]))
+(CMPW x (MOVLconst [c])) => (CMPWconst x [int16(c)])
+(CMPW (MOVLconst [c]) x) => (InvertFlags (CMPWconst x [int16(c)]))
+(CMPB x (MOVLconst [c])) => (CMPBconst x [int8(c)])
+(CMPB (MOVLconst [c]) x) => (InvertFlags (CMPBconst x [int8(c)]))
 
 // Canonicalize the order of arguments to comparisons - helps with CSE.
-(CMP(Q|L|W|B) x y) && x.ID > y.ID -> (InvertFlags (CMP(Q|L|W|B) y x))
+(CMP(Q|L|W|B) x y) && x.ID > y.ID => (InvertFlags (CMP(Q|L|W|B) y x))
 
 // Using MOVZX instead of AND is cheaper.
-(AND(Q|L)const [  0xFF] x) -> (MOVBQZX x)
-(AND(Q|L)const [0xFFFF] x) -> (MOVWQZX x)
-(ANDQconst [0xFFFFFFFF] x) -> (MOVLQZX x)
+(AND(Q|L)const [  0xFF] x) => (MOVBQZX x)
+(AND(Q|L)const [0xFFFF] x) => (MOVWQZX x)
+// This rule is currently invalid because 0xFFFFFFFF is not representable by a signed int32.
+// Commenting out for now, because it also can't trigger because of the is32bit guard on the
+// ANDQconst lowering-rule, above, prevents 0xFFFFFFFF from matching (for the same reason)
+// Using an alternate form of this rule segfaults some binaries because of
+// adverse interactions with other passes.
+// (ANDQconst [0xFFFFFFFF] x) => (MOVLQZX x)
 
 // strength reduction
 // Assumes that the following costs from https://gmplib.org/~tege/x86-timing.pdf:
@@ -928,98 +935,98 @@
 // which can require a register-register move
 // to preserve the original value,
 // so it must be used with care.
-(MUL(Q|L)const [-9] x) -> (NEG(Q|L) (LEA(Q|L)8 <v.Type> x x))
-(MUL(Q|L)const [-5] x) -> (NEG(Q|L) (LEA(Q|L)4 <v.Type> x x))
-(MUL(Q|L)const [-3] x) -> (NEG(Q|L) (LEA(Q|L)2 <v.Type> x x))
-(MUL(Q|L)const [-1] x) -> (NEG(Q|L) x)
-(MUL(Q|L)const [ 0] _) -> (MOV(Q|L)const [0])
-(MUL(Q|L)const [ 1] x) -> x
-(MUL(Q|L)const [ 3] x) -> (LEA(Q|L)2 x x)
-(MUL(Q|L)const [ 5] x) -> (LEA(Q|L)4 x x)
-(MUL(Q|L)const [ 7] x) -> (LEA(Q|L)2 x (LEA(Q|L)2 <v.Type> x x))
-(MUL(Q|L)const [ 9] x) -> (LEA(Q|L)8 x x)
-(MUL(Q|L)const [11] x) -> (LEA(Q|L)2 x (LEA(Q|L)4 <v.Type> x x))
-(MUL(Q|L)const [13] x) -> (LEA(Q|L)4 x (LEA(Q|L)2 <v.Type> x x))
-(MUL(Q|L)const [19] x) -> (LEA(Q|L)2 x (LEA(Q|L)8 <v.Type> x x))
-(MUL(Q|L)const [21] x) -> (LEA(Q|L)4 x (LEA(Q|L)4 <v.Type> x x))
-(MUL(Q|L)const [25] x) -> (LEA(Q|L)8 x (LEA(Q|L)2 <v.Type> x x))
-(MUL(Q|L)const [27] x) -> (LEA(Q|L)8 (LEA(Q|L)2 <v.Type> x x) (LEA(Q|L)2 <v.Type> x x))
-(MUL(Q|L)const [37] x) -> (LEA(Q|L)4 x (LEA(Q|L)8 <v.Type> x x))
-(MUL(Q|L)const [41] x) -> (LEA(Q|L)8 x (LEA(Q|L)4 <v.Type> x x))
-(MUL(Q|L)const [45] x) -> (LEA(Q|L)8 (LEA(Q|L)4 <v.Type> x x) (LEA(Q|L)4 <v.Type> x x))
-(MUL(Q|L)const [73] x) -> (LEA(Q|L)8 x (LEA(Q|L)8 <v.Type> x x))
-(MUL(Q|L)const [81] x) -> (LEA(Q|L)8 (LEA(Q|L)8 <v.Type> x x) (LEA(Q|L)8 <v.Type> x x))
+(MUL(Q|L)const [-9] x) => (NEG(Q|L) (LEA(Q|L)8 <v.Type> x x))
+(MUL(Q|L)const [-5] x) => (NEG(Q|L) (LEA(Q|L)4 <v.Type> x x))
+(MUL(Q|L)const [-3] x) => (NEG(Q|L) (LEA(Q|L)2 <v.Type> x x))
+(MUL(Q|L)const [-1] x) => (NEG(Q|L) x)
+(MUL(Q|L)const [ 0] _) => (MOV(Q|L)const [0])
+(MUL(Q|L)const [ 1] x) => x
+(MUL(Q|L)const [ 3] x) => (LEA(Q|L)2 x x)
+(MUL(Q|L)const [ 5] x) => (LEA(Q|L)4 x x)
+(MUL(Q|L)const [ 7] x) => (LEA(Q|L)2 x (LEA(Q|L)2 <v.Type> x x))
+(MUL(Q|L)const [ 9] x) => (LEA(Q|L)8 x x)
+(MUL(Q|L)const [11] x) => (LEA(Q|L)2 x (LEA(Q|L)4 <v.Type> x x))
+(MUL(Q|L)const [13] x) => (LEA(Q|L)4 x (LEA(Q|L)2 <v.Type> x x))
+(MUL(Q|L)const [19] x) => (LEA(Q|L)2 x (LEA(Q|L)8 <v.Type> x x))
+(MUL(Q|L)const [21] x) => (LEA(Q|L)4 x (LEA(Q|L)4 <v.Type> x x))
+(MUL(Q|L)const [25] x) => (LEA(Q|L)8 x (LEA(Q|L)2 <v.Type> x x))
+(MUL(Q|L)const [27] x) => (LEA(Q|L)8 (LEA(Q|L)2 <v.Type> x x) (LEA(Q|L)2 <v.Type> x x))
+(MUL(Q|L)const [37] x) => (LEA(Q|L)4 x (LEA(Q|L)8 <v.Type> x x))
+(MUL(Q|L)const [41] x) => (LEA(Q|L)8 x (LEA(Q|L)4 <v.Type> x x))
+(MUL(Q|L)const [45] x) => (LEA(Q|L)8 (LEA(Q|L)4 <v.Type> x x) (LEA(Q|L)4 <v.Type> x x))
+(MUL(Q|L)const [73] x) => (LEA(Q|L)8 x (LEA(Q|L)8 <v.Type> x x))
+(MUL(Q|L)const [81] x) => (LEA(Q|L)8 (LEA(Q|L)8 <v.Type> x x) (LEA(Q|L)8 <v.Type> x x))
 
-(MUL(Q|L)const [c] x) && isPowerOfTwo(c+1) && c >=  15 -> (SUB(Q|L)  (SHL(Q|L)const <v.Type> [log2(c+1)] x) x)
-(MUL(Q|L)const [c] x) && isPowerOfTwo(c-1) && c >=  17 -> (LEA(Q|L)1 (SHL(Q|L)const <v.Type> [log2(c-1)] x) x)
-(MUL(Q|L)const [c] x) && isPowerOfTwo(c-2) && c >=  34 -> (LEA(Q|L)2 (SHL(Q|L)const <v.Type> [log2(c-2)] x) x)
-(MUL(Q|L)const [c] x) && isPowerOfTwo(c-4) && c >=  68 -> (LEA(Q|L)4 (SHL(Q|L)const <v.Type> [log2(c-4)] x) x)
-(MUL(Q|L)const [c] x) && isPowerOfTwo(c-8) && c >= 136 -> (LEA(Q|L)8 (SHL(Q|L)const <v.Type> [log2(c-8)] x) x)
-(MUL(Q|L)const [c] x) && c%3 == 0 && isPowerOfTwo(c/3) -> (SHL(Q|L)const [log2(c/3)] (LEA(Q|L)2 <v.Type> x x))
-(MUL(Q|L)const [c] x) && c%5 == 0 && isPowerOfTwo(c/5) -> (SHL(Q|L)const [log2(c/5)] (LEA(Q|L)4 <v.Type> x x))
-(MUL(Q|L)const [c] x) && c%9 == 0 && isPowerOfTwo(c/9) -> (SHL(Q|L)const [log2(c/9)] (LEA(Q|L)8 <v.Type> x x))
+(MUL(Q|L)const [c] x) && isPowerOfTwo(int64(c)+1) && c >=  15 => (SUB(Q|L)  (SHL(Q|L)const <v.Type> [int8(log2(int64(c)+1))] x) x)
+(MUL(Q|L)const [c] x) && isPowerOfTwo32(c-1) && c >=  17 => (LEA(Q|L)1 (SHL(Q|L)const <v.Type> [int8(log32(c-1))] x) x)
+(MUL(Q|L)const [c] x) && isPowerOfTwo32(c-2) && c >=  34 => (LEA(Q|L)2 (SHL(Q|L)const <v.Type> [int8(log32(c-2))] x) x)
+(MUL(Q|L)const [c] x) && isPowerOfTwo32(c-4) && c >=  68 => (LEA(Q|L)4 (SHL(Q|L)const <v.Type> [int8(log32(c-4))] x) x)
+(MUL(Q|L)const [c] x) && isPowerOfTwo32(c-8) && c >= 136 => (LEA(Q|L)8 (SHL(Q|L)const <v.Type> [int8(log32(c-8))] x) x)
+(MUL(Q|L)const [c] x) && c%3 == 0 && isPowerOfTwo32(c/3) => (SHL(Q|L)const [int8(log32(c/3))] (LEA(Q|L)2 <v.Type> x x))
+(MUL(Q|L)const [c] x) && c%5 == 0 && isPowerOfTwo32(c/5) => (SHL(Q|L)const [int8(log32(c/5))] (LEA(Q|L)4 <v.Type> x x))
+(MUL(Q|L)const [c] x) && c%9 == 0 && isPowerOfTwo32(c/9) => (SHL(Q|L)const [int8(log32(c/9))] (LEA(Q|L)8 <v.Type> x x))
 
 // combine add/shift into LEAQ/LEAL
-(ADD(L|Q) x (SHL(L|Q)const [3] y)) -> (LEA(L|Q)8 x y)
-(ADD(L|Q) x (SHL(L|Q)const [2] y)) -> (LEA(L|Q)4 x y)
-(ADD(L|Q) x (SHL(L|Q)const [1] y)) -> (LEA(L|Q)2 x y)
-(ADD(L|Q) x (ADD(L|Q) y y))        -> (LEA(L|Q)2 x y)
-(ADD(L|Q) x (ADD(L|Q) x y))        -> (LEA(L|Q)2 y x)
+(ADD(L|Q) x (SHL(L|Q)const [3] y)) => (LEA(L|Q)8 x y)
+(ADD(L|Q) x (SHL(L|Q)const [2] y)) => (LEA(L|Q)4 x y)
+(ADD(L|Q) x (SHL(L|Q)const [1] y)) => (LEA(L|Q)2 x y)
+(ADD(L|Q) x (ADD(L|Q) y y))        => (LEA(L|Q)2 x y)
+(ADD(L|Q) x (ADD(L|Q) x y))        => (LEA(L|Q)2 y x)
 
 // combine ADDQ/ADDQconst into LEAQ1/LEAL1
-(ADD(Q|L)const [c] (ADD(Q|L) x y)) -> (LEA(Q|L)1 [c] x y)
-(ADD(Q|L) (ADD(Q|L)const [c] x) y) -> (LEA(Q|L)1 [c] x y)
-(ADD(Q|L)const [c] (SHL(Q|L)const [1] x)) -> (LEA(Q|L)1 [c] x x)
+(ADD(Q|L)const [c] (ADD(Q|L) x y)) => (LEA(Q|L)1 [c] x y)
+(ADD(Q|L) (ADD(Q|L)const [c] x) y) => (LEA(Q|L)1 [c] x y)
+(ADD(Q|L)const [c] (SHL(Q|L)const [1] x)) => (LEA(Q|L)1 [c] x x)
 
 // fold ADDQ/ADDL into LEAQ/LEAL
-(ADD(Q|L)const [c] (LEA(Q|L) [d] {s} x)) && is32Bit(c+d) -> (LEA(Q|L) [c+d] {s} x)
-(LEA(Q|L) [c] {s} (ADD(Q|L)const [d] x)) && is32Bit(c+d) -> (LEA(Q|L) [c+d] {s} x)
-(LEA(Q|L) [c] {s} (ADD(Q|L) x y)) && x.Op != OpSB && y.Op != OpSB -> (LEA(Q|L)1 [c] {s} x y)
-(ADD(Q|L) x (LEA(Q|L) [c] {s} y)) && x.Op != OpSB && y.Op != OpSB -> (LEA(Q|L)1 [c] {s} x y)
+(ADD(Q|L)const [c] (LEA(Q|L) [d] {s} x)) && is32Bit(int64(c)+int64(d)) => (LEA(Q|L) [c+d] {s} x)
+(LEA(Q|L) [c] {s} (ADD(Q|L)const [d] x)) && is32Bit(int64(c)+int64(d)) => (LEA(Q|L) [c+d] {s} x)
+(LEA(Q|L) [c] {s} (ADD(Q|L) x y)) && x.Op != OpSB && y.Op != OpSB => (LEA(Q|L)1 [c] {s} x y)
+(ADD(Q|L) x (LEA(Q|L) [c] {s} y)) && x.Op != OpSB && y.Op != OpSB => (LEA(Q|L)1 [c] {s} x y)
 
 // fold ADDQconst/ADDLconst into LEAQx/LEALx
-(ADD(Q|L)const [c] (LEA(Q|L)1 [d] {s} x y)) && is32Bit(c+d) -> (LEA(Q|L)1 [c+d] {s} x y)
-(ADD(Q|L)const [c] (LEA(Q|L)2 [d] {s} x y)) && is32Bit(c+d) -> (LEA(Q|L)2 [c+d] {s} x y)
-(ADD(Q|L)const [c] (LEA(Q|L)4 [d] {s} x y)) && is32Bit(c+d) -> (LEA(Q|L)4 [c+d] {s} x y)
-(ADD(Q|L)const [c] (LEA(Q|L)8 [d] {s} x y)) && is32Bit(c+d) -> (LEA(Q|L)8 [c+d] {s} x y)
-(LEA(Q|L)1 [c] {s} (ADD(Q|L)const [d] x) y) && is32Bit(c+d)   && x.Op != OpSB -> (LEA(Q|L)1 [c+d] {s} x y)
-(LEA(Q|L)2 [c] {s} (ADD(Q|L)const [d] x) y) && is32Bit(c+d)   && x.Op != OpSB -> (LEA(Q|L)2 [c+d] {s} x y)
-(LEA(Q|L)2 [c] {s} x (ADD(Q|L)const [d] y)) && is32Bit(c+2*d) && y.Op != OpSB -> (LEA(Q|L)2 [c+2*d] {s} x y)
-(LEA(Q|L)4 [c] {s} (ADD(Q|L)const [d] x) y) && is32Bit(c+d)   && x.Op != OpSB -> (LEA(Q|L)4 [c+d] {s} x y)
-(LEA(Q|L)4 [c] {s} x (ADD(Q|L)const [d] y)) && is32Bit(c+4*d) && y.Op != OpSB -> (LEA(Q|L)4 [c+4*d] {s} x y)
-(LEA(Q|L)8 [c] {s} (ADD(Q|L)const [d] x) y) && is32Bit(c+d)   && x.Op != OpSB -> (LEA(Q|L)8 [c+d] {s} x y)
-(LEA(Q|L)8 [c] {s} x (ADD(Q|L)const [d] y)) && is32Bit(c+8*d) && y.Op != OpSB -> (LEA(Q|L)8 [c+8*d] {s} x y)
+(ADD(Q|L)const [c] (LEA(Q|L)1 [d] {s} x y)) && is32Bit(int64(c)+int64(d)) => (LEA(Q|L)1 [c+d] {s} x y)
+(ADD(Q|L)const [c] (LEA(Q|L)2 [d] {s} x y)) && is32Bit(int64(c)+int64(d)) => (LEA(Q|L)2 [c+d] {s} x y)
+(ADD(Q|L)const [c] (LEA(Q|L)4 [d] {s} x y)) && is32Bit(int64(c)+int64(d)) => (LEA(Q|L)4 [c+d] {s} x y)
+(ADD(Q|L)const [c] (LEA(Q|L)8 [d] {s} x y)) && is32Bit(int64(c)+int64(d)) => (LEA(Q|L)8 [c+d] {s} x y)
+(LEA(Q|L)1 [c] {s} (ADD(Q|L)const [d] x) y) && is32Bit(int64(c)+int64(d))   && x.Op != OpSB => (LEA(Q|L)1 [c+d] {s} x y)
+(LEA(Q|L)2 [c] {s} (ADD(Q|L)const [d] x) y) && is32Bit(int64(c)+int64(d))   && x.Op != OpSB => (LEA(Q|L)2 [c+d] {s} x y)
+(LEA(Q|L)2 [c] {s} x (ADD(Q|L)const [d] y)) && is32Bit(int64(c)+2*int64(d)) && y.Op != OpSB => (LEA(Q|L)2 [c+2*d] {s} x y)
+(LEA(Q|L)4 [c] {s} (ADD(Q|L)const [d] x) y) && is32Bit(int64(c)+int64(d))   && x.Op != OpSB => (LEA(Q|L)4 [c+d] {s} x y)
+(LEA(Q|L)4 [c] {s} x (ADD(Q|L)const [d] y)) && is32Bit(int64(c)+4*int64(d)) && y.Op != OpSB => (LEA(Q|L)4 [c+4*d] {s} x y)
+(LEA(Q|L)8 [c] {s} (ADD(Q|L)const [d] x) y) && is32Bit(int64(c)+int64(d))   && x.Op != OpSB => (LEA(Q|L)8 [c+d] {s} x y)
+(LEA(Q|L)8 [c] {s} x (ADD(Q|L)const [d] y)) && is32Bit(int64(c)+8*int64(d)) && y.Op != OpSB => (LEA(Q|L)8 [c+8*d] {s} x y)
 
 // fold shifts into LEAQx/LEALx
-(LEA(Q|L)1 [c] {s} x (SHL(Q|L)const [1] y)) -> (LEA(Q|L)2 [c] {s} x y)
-(LEA(Q|L)1 [c] {s} x (SHL(Q|L)const [2] y)) -> (LEA(Q|L)4 [c] {s} x y)
-(LEA(Q|L)1 [c] {s} x (SHL(Q|L)const [3] y)) -> (LEA(Q|L)8 [c] {s} x y)
-(LEA(Q|L)2 [c] {s} x (SHL(Q|L)const [1] y)) -> (LEA(Q|L)4 [c] {s} x y)
-(LEA(Q|L)2 [c] {s} x (SHL(Q|L)const [2] y)) -> (LEA(Q|L)8 [c] {s} x y)
-(LEA(Q|L)4 [c] {s} x (SHL(Q|L)const [1] y)) -> (LEA(Q|L)8 [c] {s} x y)
+(LEA(Q|L)1 [c] {s} x (SHL(Q|L)const [1] y)) => (LEA(Q|L)2 [c] {s} x y)
+(LEA(Q|L)1 [c] {s} x (SHL(Q|L)const [2] y)) => (LEA(Q|L)4 [c] {s} x y)
+(LEA(Q|L)1 [c] {s} x (SHL(Q|L)const [3] y)) => (LEA(Q|L)8 [c] {s} x y)
+(LEA(Q|L)2 [c] {s} x (SHL(Q|L)const [1] y)) => (LEA(Q|L)4 [c] {s} x y)
+(LEA(Q|L)2 [c] {s} x (SHL(Q|L)const [2] y)) => (LEA(Q|L)8 [c] {s} x y)
+(LEA(Q|L)4 [c] {s} x (SHL(Q|L)const [1] y)) => (LEA(Q|L)8 [c] {s} x y)
 
 // reverse ordering of compare instruction
-(SETL (InvertFlags x)) -> (SETG x)
-(SETG (InvertFlags x)) -> (SETL x)
-(SETB (InvertFlags x)) -> (SETA x)
-(SETA (InvertFlags x)) -> (SETB x)
-(SETLE (InvertFlags x)) -> (SETGE x)
-(SETGE (InvertFlags x)) -> (SETLE x)
-(SETBE (InvertFlags x)) -> (SETAE x)
-(SETAE (InvertFlags x)) -> (SETBE x)
-(SETEQ (InvertFlags x)) -> (SETEQ x)
-(SETNE (InvertFlags x)) -> (SETNE x)
+(SETL (InvertFlags x)) => (SETG x)
+(SETG (InvertFlags x)) => (SETL x)
+(SETB (InvertFlags x)) => (SETA x)
+(SETA (InvertFlags x)) => (SETB x)
+(SETLE (InvertFlags x)) => (SETGE x)
+(SETGE (InvertFlags x)) => (SETLE x)
+(SETBE (InvertFlags x)) => (SETAE x)
+(SETAE (InvertFlags x)) => (SETBE x)
+(SETEQ (InvertFlags x)) => (SETEQ x)
+(SETNE (InvertFlags x)) => (SETNE x)
 
-(SETLstore [off] {sym} ptr (InvertFlags x) mem) -> (SETGstore [off] {sym} ptr x mem)
-(SETGstore [off] {sym} ptr (InvertFlags x) mem) -> (SETLstore [off] {sym} ptr x mem)
-(SETBstore [off] {sym} ptr (InvertFlags x) mem) -> (SETAstore [off] {sym} ptr x mem)
-(SETAstore [off] {sym} ptr (InvertFlags x) mem) -> (SETBstore [off] {sym} ptr x mem)
-(SETLEstore [off] {sym} ptr (InvertFlags x) mem) -> (SETGEstore [off] {sym} ptr x mem)
-(SETGEstore [off] {sym} ptr (InvertFlags x) mem) -> (SETLEstore [off] {sym} ptr x mem)
-(SETBEstore [off] {sym} ptr (InvertFlags x) mem) -> (SETAEstore [off] {sym} ptr x mem)
-(SETAEstore [off] {sym} ptr (InvertFlags x) mem) -> (SETBEstore [off] {sym} ptr x mem)
-(SETEQstore [off] {sym} ptr (InvertFlags x) mem) -> (SETEQstore [off] {sym} ptr x mem)
-(SETNEstore [off] {sym} ptr (InvertFlags x) mem) -> (SETNEstore [off] {sym} ptr x mem)
+(SETLstore [off] {sym} ptr (InvertFlags x) mem) => (SETGstore [off] {sym} ptr x mem)
+(SETGstore [off] {sym} ptr (InvertFlags x) mem) => (SETLstore [off] {sym} ptr x mem)
+(SETBstore [off] {sym} ptr (InvertFlags x) mem) => (SETAstore [off] {sym} ptr x mem)
+(SETAstore [off] {sym} ptr (InvertFlags x) mem) => (SETBstore [off] {sym} ptr x mem)
+(SETLEstore [off] {sym} ptr (InvertFlags x) mem) => (SETGEstore [off] {sym} ptr x mem)
+(SETGEstore [off] {sym} ptr (InvertFlags x) mem) => (SETLEstore [off] {sym} ptr x mem)
+(SETBEstore [off] {sym} ptr (InvertFlags x) mem) => (SETAEstore [off] {sym} ptr x mem)
+(SETAEstore [off] {sym} ptr (InvertFlags x) mem) => (SETBEstore [off] {sym} ptr x mem)
+(SETEQstore [off] {sym} ptr (InvertFlags x) mem) => (SETEQstore [off] {sym} ptr x mem)
+(SETNEstore [off] {sym} ptr (InvertFlags x) mem) => (SETNEstore [off] {sym} ptr x mem)
 
 // sign extended loads
 // Note: The combined instruction must end up in the same block
@@ -1029,100 +1036,100 @@
 // Make sure we don't combine these ops if the load has another use.
 // This prevents a single load from being split into multiple loads
 // which then might return different values.  See test/atomicload.go.
-(MOVBQSX x:(MOVBload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVBQSXload <v.Type> [off] {sym} ptr mem)
-(MOVBQSX x:(MOVWload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVBQSXload <v.Type> [off] {sym} ptr mem)
-(MOVBQSX x:(MOVLload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVBQSXload <v.Type> [off] {sym} ptr mem)
-(MOVBQSX x:(MOVQload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVBQSXload <v.Type> [off] {sym} ptr mem)
-(MOVBQZX x:(MOVBload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVBload <v.Type> [off] {sym} ptr mem)
-(MOVBQZX x:(MOVWload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVBload <v.Type> [off] {sym} ptr mem)
-(MOVBQZX x:(MOVLload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVBload <v.Type> [off] {sym} ptr mem)
-(MOVBQZX x:(MOVQload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVBload <v.Type> [off] {sym} ptr mem)
-(MOVWQSX x:(MOVWload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVWQSXload <v.Type> [off] {sym} ptr mem)
-(MOVWQSX x:(MOVLload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVWQSXload <v.Type> [off] {sym} ptr mem)
-(MOVWQSX x:(MOVQload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVWQSXload <v.Type> [off] {sym} ptr mem)
-(MOVWQZX x:(MOVWload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVWload <v.Type> [off] {sym} ptr mem)
-(MOVWQZX x:(MOVLload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVWload <v.Type> [off] {sym} ptr mem)
-(MOVWQZX x:(MOVQload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVWload <v.Type> [off] {sym} ptr mem)
-(MOVLQSX x:(MOVLload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVLQSXload <v.Type> [off] {sym} ptr mem)
-(MOVLQSX x:(MOVQload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVLQSXload <v.Type> [off] {sym} ptr mem)
-(MOVLQZX x:(MOVLload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVLload <v.Type> [off] {sym} ptr mem)
-(MOVLQZX x:(MOVQload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVLload <v.Type> [off] {sym} ptr mem)
+(MOVBQSX x:(MOVBload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) => @x.Block (MOVBQSXload <v.Type> [off] {sym} ptr mem)
+(MOVBQSX x:(MOVWload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) => @x.Block (MOVBQSXload <v.Type> [off] {sym} ptr mem)
+(MOVBQSX x:(MOVLload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) => @x.Block (MOVBQSXload <v.Type> [off] {sym} ptr mem)
+(MOVBQSX x:(MOVQload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) => @x.Block (MOVBQSXload <v.Type> [off] {sym} ptr mem)
+(MOVBQZX x:(MOVBload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) => @x.Block (MOVBload <v.Type> [off] {sym} ptr mem)
+(MOVBQZX x:(MOVWload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) => @x.Block (MOVBload <v.Type> [off] {sym} ptr mem)
+(MOVBQZX x:(MOVLload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) => @x.Block (MOVBload <v.Type> [off] {sym} ptr mem)
+(MOVBQZX x:(MOVQload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) => @x.Block (MOVBload <v.Type> [off] {sym} ptr mem)
+(MOVWQSX x:(MOVWload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) => @x.Block (MOVWQSXload <v.Type> [off] {sym} ptr mem)
+(MOVWQSX x:(MOVLload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) => @x.Block (MOVWQSXload <v.Type> [off] {sym} ptr mem)
+(MOVWQSX x:(MOVQload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) => @x.Block (MOVWQSXload <v.Type> [off] {sym} ptr mem)
+(MOVWQZX x:(MOVWload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) => @x.Block (MOVWload <v.Type> [off] {sym} ptr mem)
+(MOVWQZX x:(MOVLload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) => @x.Block (MOVWload <v.Type> [off] {sym} ptr mem)
+(MOVWQZX x:(MOVQload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) => @x.Block (MOVWload <v.Type> [off] {sym} ptr mem)
+(MOVLQSX x:(MOVLload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) => @x.Block (MOVLQSXload <v.Type> [off] {sym} ptr mem)
+(MOVLQSX x:(MOVQload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) => @x.Block (MOVLQSXload <v.Type> [off] {sym} ptr mem)
+(MOVLQZX x:(MOVLload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) => @x.Block (MOVLload <v.Type> [off] {sym} ptr mem)
+(MOVLQZX x:(MOVQload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) => @x.Block (MOVLload <v.Type> [off] {sym} ptr mem)
 
-(MOVLQZX x) && zeroUpper32Bits(x,3) -> x
-(MOVWQZX x) && zeroUpper48Bits(x,3) -> x
-(MOVBQZX x) && zeroUpper56Bits(x,3) -> x
+(MOVLQZX x) && zeroUpper32Bits(x,3) => x
+(MOVWQZX x) && zeroUpper48Bits(x,3) => x
+(MOVBQZX x) && zeroUpper56Bits(x,3) => x
 
 // replace load from same location as preceding store with zero/sign extension (or copy in case of full width)
-(MOVBload [off] {sym} ptr (MOVBstore [off2] {sym2} ptr2 x _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) -> (MOVBQZX x)
-(MOVWload [off] {sym} ptr (MOVWstore [off2] {sym2} ptr2 x _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) -> (MOVWQZX x)
-(MOVLload [off] {sym} ptr (MOVLstore [off2] {sym2} ptr2 x _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) -> (MOVLQZX x)
-(MOVQload [off] {sym} ptr (MOVQstore [off2] {sym2} ptr2 x _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) -> x
-(MOVBQSXload [off] {sym} ptr (MOVBstore [off2] {sym2} ptr2 x _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) -> (MOVBQSX x)
-(MOVWQSXload [off] {sym} ptr (MOVWstore [off2] {sym2} ptr2 x _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) -> (MOVWQSX x)
-(MOVLQSXload [off] {sym} ptr (MOVLstore [off2] {sym2} ptr2 x _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) -> (MOVLQSX x)
+(MOVBload [off] {sym} ptr (MOVBstore [off2] {sym2} ptr2 x _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) => (MOVBQZX x)
+(MOVWload [off] {sym} ptr (MOVWstore [off2] {sym2} ptr2 x _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) => (MOVWQZX x)
+(MOVLload [off] {sym} ptr (MOVLstore [off2] {sym2} ptr2 x _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) => (MOVLQZX x)
+(MOVQload [off] {sym} ptr (MOVQstore [off2] {sym2} ptr2 x _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) => x
+(MOVBQSXload [off] {sym} ptr (MOVBstore [off2] {sym2} ptr2 x _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) => (MOVBQSX x)
+(MOVWQSXload [off] {sym} ptr (MOVWstore [off2] {sym2} ptr2 x _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) => (MOVWQSX x)
+(MOVLQSXload [off] {sym} ptr (MOVLstore [off2] {sym2} ptr2 x _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) => (MOVLQSX x)
 
 // Fold extensions and ANDs together.
-(MOVBQZX (ANDLconst [c] x)) -> (ANDLconst [c & 0xff] x)
-(MOVWQZX (ANDLconst [c] x)) -> (ANDLconst [c & 0xffff] x)
-(MOVLQZX (ANDLconst [c] x)) -> (ANDLconst [c] x)
-(MOVBQSX (ANDLconst [c] x)) && c & 0x80 == 0 -> (ANDLconst [c & 0x7f] x)
-(MOVWQSX (ANDLconst [c] x)) && c & 0x8000 == 0 -> (ANDLconst [c & 0x7fff] x)
-(MOVLQSX (ANDLconst [c] x)) && c & 0x80000000 == 0 -> (ANDLconst [c & 0x7fffffff] x)
+(MOVBQZX (ANDLconst [c] x)) => (ANDLconst [c & 0xff] x)
+(MOVWQZX (ANDLconst [c] x)) => (ANDLconst [c & 0xffff] x)
+(MOVLQZX (ANDLconst [c] x)) => (ANDLconst [c] x)
+(MOVBQSX (ANDLconst [c] x)) && c & 0x80 == 0 => (ANDLconst [c & 0x7f] x)
+(MOVWQSX (ANDLconst [c] x)) && c & 0x8000 == 0 => (ANDLconst [c & 0x7fff] x)
+(MOVLQSX (ANDLconst [c] x)) && uint32(c) & 0x80000000 == 0 => (ANDLconst [c & 0x7fffffff] x)
 
 // Don't extend before storing
-(MOVLstore [off] {sym} ptr (MOVLQSX x) mem) -> (MOVLstore [off] {sym} ptr x mem)
-(MOVWstore [off] {sym} ptr (MOVWQSX x) mem) -> (MOVWstore [off] {sym} ptr x mem)
-(MOVBstore [off] {sym} ptr (MOVBQSX x) mem) -> (MOVBstore [off] {sym} ptr x mem)
-(MOVLstore [off] {sym} ptr (MOVLQZX x) mem) -> (MOVLstore [off] {sym} ptr x mem)
-(MOVWstore [off] {sym} ptr (MOVWQZX x) mem) -> (MOVWstore [off] {sym} ptr x mem)
-(MOVBstore [off] {sym} ptr (MOVBQZX x) mem) -> (MOVBstore [off] {sym} ptr x mem)
+(MOVLstore [off] {sym} ptr (MOVLQSX x) mem) => (MOVLstore [off] {sym} ptr x mem)
+(MOVWstore [off] {sym} ptr (MOVWQSX x) mem) => (MOVWstore [off] {sym} ptr x mem)
+(MOVBstore [off] {sym} ptr (MOVBQSX x) mem) => (MOVBstore [off] {sym} ptr x mem)
+(MOVLstore [off] {sym} ptr (MOVLQZX x) mem) => (MOVLstore [off] {sym} ptr x mem)
+(MOVWstore [off] {sym} ptr (MOVWQZX x) mem) => (MOVWstore [off] {sym} ptr x mem)
+(MOVBstore [off] {sym} ptr (MOVBQZX x) mem) => (MOVBstore [off] {sym} ptr x mem)
 
 // fold constants into memory operations
 // Note that this is not always a good idea because if not all the uses of
 // the ADDQconst get eliminated, we still have to compute the ADDQconst and we now
 // have potentially two live values (ptr and (ADDQconst [off] ptr)) instead of one.
 // Nevertheless, let's do it!
-(MOV(Q|L|W|B|SS|SD|O)load  [off1] {sym} (ADDQconst [off2] ptr) mem) && is32Bit(off1+off2) ->
+(MOV(Q|L|W|B|SS|SD|O)load  [off1] {sym} (ADDQconst [off2] ptr) mem) && is32Bit(int64(off1)+int64(off2)) =>
     (MOV(Q|L|W|B|SS|SD|O)load  [off1+off2] {sym} ptr mem)
-(MOV(Q|L|W|B|SS|SD|O)store  [off1] {sym} (ADDQconst [off2] ptr) val mem) && is32Bit(off1+off2) ->
+(MOV(Q|L|W|B|SS|SD|O)store  [off1] {sym} (ADDQconst [off2] ptr) val mem) && is32Bit(int64(off1)+int64(off2)) =>
 	(MOV(Q|L|W|B|SS|SD|O)store  [off1+off2] {sym} ptr val mem)
-(SET(L|G|B|A|LE|GE|BE|AE|EQ|NE)store [off1] {sym} (ADDQconst [off2] base) val mem) && is32Bit(off1+off2) ->
+(SET(L|G|B|A|LE|GE|BE|AE|EQ|NE)store [off1] {sym} (ADDQconst [off2] base) val mem) && is32Bit(int64(off1)+int64(off2)) =>
 	(SET(L|G|B|A|LE|GE|BE|AE|EQ|NE)store [off1+off2] {sym} base val mem)
-((ADD|SUB|AND|OR|XOR)Qload [off1] {sym} val (ADDQconst [off2] base) mem) && is32Bit(off1+off2) ->
+((ADD|SUB|AND|OR|XOR)Qload [off1] {sym} val (ADDQconst [off2] base) mem) && is32Bit(int64(off1)+int64(off2)) =>
 	((ADD|SUB|AND|OR|XOR)Qload [off1+off2] {sym} val base mem)
-((ADD|SUB|AND|OR|XOR)Lload [off1] {sym} val (ADDQconst [off2] base) mem) && is32Bit(off1+off2) ->
+((ADD|SUB|AND|OR|XOR)Lload [off1] {sym} val (ADDQconst [off2] base) mem) && is32Bit(int64(off1)+int64(off2)) =>
 	((ADD|SUB|AND|OR|XOR)Lload [off1+off2] {sym} val base mem)
-(CMP(Q|L|W|B)load [off1] {sym} (ADDQconst [off2] base) val mem) && is32Bit(off1+off2) ->
+(CMP(Q|L|W|B)load [off1] {sym} (ADDQconst [off2] base) val mem) && is32Bit(int64(off1)+int64(off2)) =>
 	(CMP(Q|L|W|B)load [off1+off2] {sym} base val mem)
-(CMP(Q|L|W|B)constload [valoff1] {sym} (ADDQconst [off2] base) mem) && ValAndOff(valoff1).canAdd(off2) ->
-	(CMP(Q|L|W|B)constload [ValAndOff(valoff1).add(off2)] {sym} base mem)
+(CMP(Q|L|W|B)constload [valoff1] {sym} (ADDQconst [off2] base) mem) && ValAndOff(valoff1).canAdd32(off2) =>
+	(CMP(Q|L|W|B)constload [ValAndOff(valoff1).addOffset32(off2)] {sym} base mem)
 
-((ADD|SUB|MUL|DIV)SSload [off1] {sym} val (ADDQconst [off2] base) mem) && is32Bit(off1+off2) ->
+((ADD|SUB|MUL|DIV)SSload [off1] {sym} val (ADDQconst [off2] base) mem) && is32Bit(int64(off1)+int64(off2)) =>
 	((ADD|SUB|MUL|DIV)SSload [off1+off2] {sym} val base mem)
-((ADD|SUB|MUL|DIV)SDload [off1] {sym} val (ADDQconst [off2] base) mem) && is32Bit(off1+off2) ->
+((ADD|SUB|MUL|DIV)SDload [off1] {sym} val (ADDQconst [off2] base) mem) && is32Bit(int64(off1)+int64(off2)) =>
 	((ADD|SUB|MUL|DIV)SDload [off1+off2] {sym} val base mem)
-((ADD|AND|OR|XOR|BTC|BTR|BTS)Qconstmodify [valoff1] {sym} (ADDQconst [off2] base) mem) && ValAndOff(valoff1).canAdd(off2) ->
-	((ADD|AND|OR|XOR|BTC|BTR|BTS)Qconstmodify [ValAndOff(valoff1).add(off2)] {sym} base mem)
-((ADD|AND|OR|XOR|BTC|BTR|BTS)Lconstmodify [valoff1] {sym} (ADDQconst [off2] base) mem) && ValAndOff(valoff1).canAdd(off2) ->
-	((ADD|AND|OR|XOR|BTC|BTR|BTS)Lconstmodify [ValAndOff(valoff1).add(off2)] {sym} base mem)
-((ADD|SUB|AND|OR|XOR|BTC|BTR|BTS)Qmodify [off1] {sym} (ADDQconst [off2] base) val mem) && is32Bit(off1+off2) ->
+((ADD|AND|OR|XOR|BTC|BTR|BTS)Qconstmodify [valoff1] {sym} (ADDQconst [off2] base) mem) && ValAndOff(valoff1).canAdd32(off2) =>
+	((ADD|AND|OR|XOR|BTC|BTR|BTS)Qconstmodify [ValAndOff(valoff1).addOffset32(off2)] {sym} base mem)
+((ADD|AND|OR|XOR|BTC|BTR|BTS)Lconstmodify [valoff1] {sym} (ADDQconst [off2] base) mem) && ValAndOff(valoff1).canAdd32(off2) =>
+	((ADD|AND|OR|XOR|BTC|BTR|BTS)Lconstmodify [ValAndOff(valoff1).addOffset32(off2)] {sym} base mem)
+((ADD|SUB|AND|OR|XOR|BTC|BTR|BTS)Qmodify [off1] {sym} (ADDQconst [off2] base) val mem) && is32Bit(int64(off1)+int64(off2)) =>
 	((ADD|SUB|AND|OR|XOR|BTC|BTR|BTS)Qmodify [off1+off2] {sym} base val mem)
-((ADD|SUB|AND|OR|XOR|BTC|BTR|BTS)Lmodify [off1] {sym} (ADDQconst [off2] base) val mem) && is32Bit(off1+off2) ->
+((ADD|SUB|AND|OR|XOR|BTC|BTR|BTS)Lmodify [off1] {sym} (ADDQconst [off2] base) val mem) && is32Bit(int64(off1)+int64(off2)) =>
 	((ADD|SUB|AND|OR|XOR|BTC|BTR|BTS)Lmodify [off1+off2] {sym} base val mem)
 
 // Fold constants into stores.
-(MOVQstore [off] {sym} ptr (MOVQconst [c]) mem) && validValAndOff(c,off) ->
-	(MOVQstoreconst [makeValAndOff(c,off)] {sym} ptr mem)
-(MOVLstore [off] {sym} ptr (MOV(L|Q)const [c]) mem) && validOff(off) ->
-	(MOVLstoreconst [makeValAndOff(int64(int32(c)),off)] {sym} ptr mem)
-(MOVWstore [off] {sym} ptr (MOV(L|Q)const [c]) mem) && validOff(off) ->
-	(MOVWstoreconst [makeValAndOff(int64(int16(c)),off)] {sym} ptr mem)
-(MOVBstore [off] {sym} ptr (MOV(L|Q)const [c]) mem) && validOff(off) ->
-	(MOVBstoreconst [makeValAndOff(int64(int8(c)),off)] {sym} ptr mem)
+(MOVQstore [off] {sym} ptr (MOVQconst [c]) mem) && validVal(c) =>
+	(MOVQstoreconst [makeValAndOff32(int32(c),off)] {sym} ptr mem)
+(MOVLstore [off] {sym} ptr (MOV(L|Q)const [c]) mem) =>
+	(MOVLstoreconst [makeValAndOff32(int32(c),off)] {sym} ptr mem)
+(MOVWstore [off] {sym} ptr (MOV(L|Q)const [c]) mem) =>
+	(MOVWstoreconst [makeValAndOff32(int32(int16(c)),off)] {sym} ptr mem)
+(MOVBstore [off] {sym} ptr (MOV(L|Q)const [c]) mem) =>
+	(MOVBstoreconst [makeValAndOff32(int32(int8(c)),off)] {sym} ptr mem)
 
 // Fold address offsets into constant stores.
-(MOV(Q|L|W|B)storeconst [sc] {s} (ADDQconst [off] ptr) mem) && ValAndOff(sc).canAdd(off) ->
-	(MOV(Q|L|W|B)storeconst [ValAndOff(sc).add(off)] {s} ptr mem)
+(MOV(Q|L|W|B)storeconst [sc] {s} (ADDQconst [off] ptr) mem) && ValAndOff(sc).canAdd32(off) =>
+	(MOV(Q|L|W|B)storeconst [ValAndOff(sc).addOffset32(off)] {s} ptr mem)
 
 // We need to fold LEAQ into the MOVx ops so that the live variable analysis knows
 // what variables are being read/written by the ops.
diff --git a/src/cmd/compile/internal/ssa/gen/ARM64.rules b/src/cmd/compile/internal/ssa/gen/ARM64.rules
index c29e7f7..311067e 100644
--- a/src/cmd/compile/internal/ssa/gen/ARM64.rules
+++ b/src/cmd/compile/internal/ssa/gen/ARM64.rules
@@ -132,65 +132,65 @@
 // we compare to 64 to ensure Go semantics for large shifts
 // Rules about rotates with non-const shift are based on the following rules,
 // if the following rules change, please also modify the rules based on them.
-(Lsh64x64 <t> x y) => (CSEL {OpARM64LessThanU} (SLL <t> x y) (Const64 <t> [0]) (CMPconst [64] y))
-(Lsh64x32 <t> x y) => (CSEL {OpARM64LessThanU} (SLL <t> x (ZeroExt32to64 y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt32to64 y)))
-(Lsh64x16 <t> x y) => (CSEL {OpARM64LessThanU} (SLL <t> x (ZeroExt16to64 y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt16to64 y)))
-(Lsh64x8  <t> x y) => (CSEL {OpARM64LessThanU} (SLL <t> x (ZeroExt8to64  y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt8to64  y)))
+(Lsh64x64 <t> x y) => (CSEL [OpARM64LessThanU] (SLL <t> x y) (Const64 <t> [0]) (CMPconst [64] y))
+(Lsh64x32 <t> x y) => (CSEL [OpARM64LessThanU] (SLL <t> x (ZeroExt32to64 y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt32to64 y)))
+(Lsh64x16 <t> x y) => (CSEL [OpARM64LessThanU] (SLL <t> x (ZeroExt16to64 y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt16to64 y)))
+(Lsh64x8  <t> x y) => (CSEL [OpARM64LessThanU] (SLL <t> x (ZeroExt8to64  y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt8to64  y)))
 
-(Lsh32x64 <t> x y) => (CSEL {OpARM64LessThanU} (SLL <t> x y) (Const64 <t> [0]) (CMPconst [64] y))
-(Lsh32x32 <t> x y) => (CSEL {OpARM64LessThanU} (SLL <t> x (ZeroExt32to64 y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt32to64 y)))
-(Lsh32x16 <t> x y) => (CSEL {OpARM64LessThanU} (SLL <t> x (ZeroExt16to64 y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt16to64 y)))
-(Lsh32x8  <t> x y) => (CSEL {OpARM64LessThanU} (SLL <t> x (ZeroExt8to64  y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt8to64  y)))
+(Lsh32x64 <t> x y) => (CSEL [OpARM64LessThanU] (SLL <t> x y) (Const64 <t> [0]) (CMPconst [64] y))
+(Lsh32x32 <t> x y) => (CSEL [OpARM64LessThanU] (SLL <t> x (ZeroExt32to64 y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt32to64 y)))
+(Lsh32x16 <t> x y) => (CSEL [OpARM64LessThanU] (SLL <t> x (ZeroExt16to64 y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt16to64 y)))
+(Lsh32x8  <t> x y) => (CSEL [OpARM64LessThanU] (SLL <t> x (ZeroExt8to64  y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt8to64  y)))
 
-(Lsh16x64 <t> x y) => (CSEL {OpARM64LessThanU} (SLL <t> x y) (Const64 <t> [0]) (CMPconst [64] y))
-(Lsh16x32 <t> x y) => (CSEL {OpARM64LessThanU} (SLL <t> x (ZeroExt32to64 y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt32to64 y)))
-(Lsh16x16 <t> x y) => (CSEL {OpARM64LessThanU} (SLL <t> x (ZeroExt16to64 y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt16to64 y)))
-(Lsh16x8  <t> x y) => (CSEL {OpARM64LessThanU} (SLL <t> x (ZeroExt8to64  y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt8to64  y)))
+(Lsh16x64 <t> x y) => (CSEL [OpARM64LessThanU] (SLL <t> x y) (Const64 <t> [0]) (CMPconst [64] y))
+(Lsh16x32 <t> x y) => (CSEL [OpARM64LessThanU] (SLL <t> x (ZeroExt32to64 y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt32to64 y)))
+(Lsh16x16 <t> x y) => (CSEL [OpARM64LessThanU] (SLL <t> x (ZeroExt16to64 y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt16to64 y)))
+(Lsh16x8  <t> x y) => (CSEL [OpARM64LessThanU] (SLL <t> x (ZeroExt8to64  y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt8to64  y)))
 
-(Lsh8x64 <t> x y) => (CSEL {OpARM64LessThanU} (SLL <t> x y) (Const64 <t> [0]) (CMPconst [64] y))
-(Lsh8x32 <t> x y) => (CSEL {OpARM64LessThanU} (SLL <t> x (ZeroExt32to64 y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt32to64 y)))
-(Lsh8x16 <t> x y) => (CSEL {OpARM64LessThanU} (SLL <t> x (ZeroExt16to64 y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt16to64 y)))
-(Lsh8x8  <t> x y) => (CSEL {OpARM64LessThanU} (SLL <t> x (ZeroExt8to64  y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt8to64  y)))
+(Lsh8x64 <t> x y) => (CSEL [OpARM64LessThanU] (SLL <t> x y) (Const64 <t> [0]) (CMPconst [64] y))
+(Lsh8x32 <t> x y) => (CSEL [OpARM64LessThanU] (SLL <t> x (ZeroExt32to64 y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt32to64 y)))
+(Lsh8x16 <t> x y) => (CSEL [OpARM64LessThanU] (SLL <t> x (ZeroExt16to64 y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt16to64 y)))
+(Lsh8x8  <t> x y) => (CSEL [OpARM64LessThanU] (SLL <t> x (ZeroExt8to64  y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt8to64  y)))
 
-(Rsh64Ux64 <t> x y) => (CSEL {OpARM64LessThanU} (SRL <t> x y) (Const64 <t> [0]) (CMPconst [64] y))
-(Rsh64Ux32 <t> x y) => (CSEL {OpARM64LessThanU} (SRL <t> x (ZeroExt32to64 y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt32to64 y)))
-(Rsh64Ux16 <t> x y) => (CSEL {OpARM64LessThanU} (SRL <t> x (ZeroExt16to64 y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt16to64 y)))
-(Rsh64Ux8  <t> x y) => (CSEL {OpARM64LessThanU} (SRL <t> x (ZeroExt8to64  y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt8to64  y)))
+(Rsh64Ux64 <t> x y) => (CSEL [OpARM64LessThanU] (SRL <t> x y) (Const64 <t> [0]) (CMPconst [64] y))
+(Rsh64Ux32 <t> x y) => (CSEL [OpARM64LessThanU] (SRL <t> x (ZeroExt32to64 y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt32to64 y)))
+(Rsh64Ux16 <t> x y) => (CSEL [OpARM64LessThanU] (SRL <t> x (ZeroExt16to64 y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt16to64 y)))
+(Rsh64Ux8  <t> x y) => (CSEL [OpARM64LessThanU] (SRL <t> x (ZeroExt8to64  y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt8to64  y)))
 
-(Rsh32Ux64 <t> x y) => (CSEL {OpARM64LessThanU} (SRL <t> (ZeroExt32to64 x) y) (Const64 <t> [0]) (CMPconst [64] y))
-(Rsh32Ux32 <t> x y) => (CSEL {OpARM64LessThanU} (SRL <t> (ZeroExt32to64 x) (ZeroExt32to64 y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt32to64 y)))
-(Rsh32Ux16 <t> x y) => (CSEL {OpARM64LessThanU} (SRL <t> (ZeroExt32to64 x) (ZeroExt16to64 y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt16to64 y)))
-(Rsh32Ux8  <t> x y) => (CSEL {OpARM64LessThanU} (SRL <t> (ZeroExt32to64 x) (ZeroExt8to64  y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt8to64  y)))
+(Rsh32Ux64 <t> x y) => (CSEL [OpARM64LessThanU] (SRL <t> (ZeroExt32to64 x) y) (Const64 <t> [0]) (CMPconst [64] y))
+(Rsh32Ux32 <t> x y) => (CSEL [OpARM64LessThanU] (SRL <t> (ZeroExt32to64 x) (ZeroExt32to64 y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt32to64 y)))
+(Rsh32Ux16 <t> x y) => (CSEL [OpARM64LessThanU] (SRL <t> (ZeroExt32to64 x) (ZeroExt16to64 y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt16to64 y)))
+(Rsh32Ux8  <t> x y) => (CSEL [OpARM64LessThanU] (SRL <t> (ZeroExt32to64 x) (ZeroExt8to64  y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt8to64  y)))
 
-(Rsh16Ux64 <t> x y) => (CSEL {OpARM64LessThanU} (SRL <t> (ZeroExt16to64 x) y) (Const64 <t> [0]) (CMPconst [64] y))
-(Rsh16Ux32 <t> x y) => (CSEL {OpARM64LessThanU} (SRL <t> (ZeroExt16to64 x) (ZeroExt32to64 y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt32to64 y)))
-(Rsh16Ux16 <t> x y) => (CSEL {OpARM64LessThanU} (SRL <t> (ZeroExt16to64 x) (ZeroExt16to64 y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt16to64 y)))
-(Rsh16Ux8  <t> x y) => (CSEL {OpARM64LessThanU} (SRL <t> (ZeroExt16to64 x) (ZeroExt8to64  y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt8to64  y)))
+(Rsh16Ux64 <t> x y) => (CSEL [OpARM64LessThanU] (SRL <t> (ZeroExt16to64 x) y) (Const64 <t> [0]) (CMPconst [64] y))
+(Rsh16Ux32 <t> x y) => (CSEL [OpARM64LessThanU] (SRL <t> (ZeroExt16to64 x) (ZeroExt32to64 y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt32to64 y)))
+(Rsh16Ux16 <t> x y) => (CSEL [OpARM64LessThanU] (SRL <t> (ZeroExt16to64 x) (ZeroExt16to64 y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt16to64 y)))
+(Rsh16Ux8  <t> x y) => (CSEL [OpARM64LessThanU] (SRL <t> (ZeroExt16to64 x) (ZeroExt8to64  y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt8to64  y)))
 
-(Rsh8Ux64 <t> x y) => (CSEL {OpARM64LessThanU} (SRL <t> (ZeroExt8to64 x) y) (Const64 <t> [0]) (CMPconst [64] y))
-(Rsh8Ux32 <t> x y) => (CSEL {OpARM64LessThanU} (SRL <t> (ZeroExt8to64 x) (ZeroExt32to64 y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt32to64 y)))
-(Rsh8Ux16 <t> x y) => (CSEL {OpARM64LessThanU} (SRL <t> (ZeroExt8to64 x) (ZeroExt16to64 y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt16to64 y)))
-(Rsh8Ux8  <t> x y) => (CSEL {OpARM64LessThanU} (SRL <t> (ZeroExt8to64 x) (ZeroExt8to64  y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt8to64  y)))
+(Rsh8Ux64 <t> x y) => (CSEL [OpARM64LessThanU] (SRL <t> (ZeroExt8to64 x) y) (Const64 <t> [0]) (CMPconst [64] y))
+(Rsh8Ux32 <t> x y) => (CSEL [OpARM64LessThanU] (SRL <t> (ZeroExt8to64 x) (ZeroExt32to64 y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt32to64 y)))
+(Rsh8Ux16 <t> x y) => (CSEL [OpARM64LessThanU] (SRL <t> (ZeroExt8to64 x) (ZeroExt16to64 y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt16to64 y)))
+(Rsh8Ux8  <t> x y) => (CSEL [OpARM64LessThanU] (SRL <t> (ZeroExt8to64 x) (ZeroExt8to64  y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt8to64  y)))
 
-(Rsh64x64 x y) => (SRA x (CSEL {OpARM64LessThanU} <y.Type> y (Const64 <y.Type> [63]) (CMPconst [64] y)))
-(Rsh64x32 x y) => (SRA x (CSEL {OpARM64LessThanU} <y.Type> (ZeroExt32to64 y) (Const64 <y.Type> [63]) (CMPconst [64] (ZeroExt32to64 y))))
-(Rsh64x16 x y) => (SRA x (CSEL {OpARM64LessThanU} <y.Type> (ZeroExt16to64 y) (Const64 <y.Type> [63]) (CMPconst [64] (ZeroExt16to64 y))))
-(Rsh64x8  x y) => (SRA x (CSEL {OpARM64LessThanU} <y.Type> (ZeroExt8to64  y) (Const64 <y.Type> [63]) (CMPconst [64] (ZeroExt8to64  y))))
+(Rsh64x64 x y) => (SRA x (CSEL [OpARM64LessThanU] <y.Type> y (Const64 <y.Type> [63]) (CMPconst [64] y)))
+(Rsh64x32 x y) => (SRA x (CSEL [OpARM64LessThanU] <y.Type> (ZeroExt32to64 y) (Const64 <y.Type> [63]) (CMPconst [64] (ZeroExt32to64 y))))
+(Rsh64x16 x y) => (SRA x (CSEL [OpARM64LessThanU] <y.Type> (ZeroExt16to64 y) (Const64 <y.Type> [63]) (CMPconst [64] (ZeroExt16to64 y))))
+(Rsh64x8  x y) => (SRA x (CSEL [OpARM64LessThanU] <y.Type> (ZeroExt8to64  y) (Const64 <y.Type> [63]) (CMPconst [64] (ZeroExt8to64  y))))
 
-(Rsh32x64 x y) => (SRA (SignExt32to64 x) (CSEL {OpARM64LessThanU} <y.Type> y (Const64 <y.Type> [63]) (CMPconst [64] y)))
-(Rsh32x32 x y) => (SRA (SignExt32to64 x) (CSEL {OpARM64LessThanU} <y.Type> (ZeroExt32to64 y) (Const64 <y.Type> [63]) (CMPconst [64] (ZeroExt32to64 y))))
-(Rsh32x16 x y) => (SRA (SignExt32to64 x) (CSEL {OpARM64LessThanU} <y.Type> (ZeroExt16to64 y) (Const64 <y.Type> [63]) (CMPconst [64] (ZeroExt16to64 y))))
-(Rsh32x8  x y) => (SRA (SignExt32to64 x) (CSEL {OpARM64LessThanU} <y.Type> (ZeroExt8to64  y) (Const64 <y.Type> [63]) (CMPconst [64] (ZeroExt8to64  y))))
+(Rsh32x64 x y) => (SRA (SignExt32to64 x) (CSEL [OpARM64LessThanU] <y.Type> y (Const64 <y.Type> [63]) (CMPconst [64] y)))
+(Rsh32x32 x y) => (SRA (SignExt32to64 x) (CSEL [OpARM64LessThanU] <y.Type> (ZeroExt32to64 y) (Const64 <y.Type> [63]) (CMPconst [64] (ZeroExt32to64 y))))
+(Rsh32x16 x y) => (SRA (SignExt32to64 x) (CSEL [OpARM64LessThanU] <y.Type> (ZeroExt16to64 y) (Const64 <y.Type> [63]) (CMPconst [64] (ZeroExt16to64 y))))
+(Rsh32x8  x y) => (SRA (SignExt32to64 x) (CSEL [OpARM64LessThanU] <y.Type> (ZeroExt8to64  y) (Const64 <y.Type> [63]) (CMPconst [64] (ZeroExt8to64  y))))
 
-(Rsh16x64 x y) => (SRA (SignExt16to64 x) (CSEL {OpARM64LessThanU} <y.Type> y (Const64 <y.Type> [63]) (CMPconst [64] y)))
-(Rsh16x32 x y) => (SRA (SignExt16to64 x) (CSEL {OpARM64LessThanU} <y.Type> (ZeroExt32to64 y) (Const64 <y.Type> [63]) (CMPconst [64] (ZeroExt32to64 y))))
-(Rsh16x16 x y) => (SRA (SignExt16to64 x) (CSEL {OpARM64LessThanU} <y.Type> (ZeroExt16to64 y) (Const64 <y.Type> [63]) (CMPconst [64] (ZeroExt16to64 y))))
-(Rsh16x8  x y) => (SRA (SignExt16to64 x) (CSEL {OpARM64LessThanU} <y.Type> (ZeroExt8to64  y) (Const64 <y.Type> [63]) (CMPconst [64] (ZeroExt8to64  y))))
+(Rsh16x64 x y) => (SRA (SignExt16to64 x) (CSEL [OpARM64LessThanU] <y.Type> y (Const64 <y.Type> [63]) (CMPconst [64] y)))
+(Rsh16x32 x y) => (SRA (SignExt16to64 x) (CSEL [OpARM64LessThanU] <y.Type> (ZeroExt32to64 y) (Const64 <y.Type> [63]) (CMPconst [64] (ZeroExt32to64 y))))
+(Rsh16x16 x y) => (SRA (SignExt16to64 x) (CSEL [OpARM64LessThanU] <y.Type> (ZeroExt16to64 y) (Const64 <y.Type> [63]) (CMPconst [64] (ZeroExt16to64 y))))
+(Rsh16x8  x y) => (SRA (SignExt16to64 x) (CSEL [OpARM64LessThanU] <y.Type> (ZeroExt8to64  y) (Const64 <y.Type> [63]) (CMPconst [64] (ZeroExt8to64  y))))
 
-(Rsh8x64 x y) => (SRA (SignExt8to64 x) (CSEL {OpARM64LessThanU} <y.Type> y (Const64 <y.Type> [63]) (CMPconst [64] y)))
-(Rsh8x32 x y) => (SRA (SignExt8to64 x) (CSEL {OpARM64LessThanU} <y.Type> (ZeroExt32to64 y) (Const64 <y.Type> [63]) (CMPconst [64] (ZeroExt32to64 y))))
-(Rsh8x16 x y) => (SRA (SignExt8to64 x) (CSEL {OpARM64LessThanU} <y.Type> (ZeroExt16to64 y) (Const64 <y.Type> [63]) (CMPconst [64] (ZeroExt16to64 y))))
-(Rsh8x8  x y) => (SRA (SignExt8to64 x) (CSEL {OpARM64LessThanU} <y.Type> (ZeroExt8to64  y) (Const64 <y.Type> [63]) (CMPconst [64] (ZeroExt8to64  y))))
+(Rsh8x64 x y) => (SRA (SignExt8to64 x) (CSEL [OpARM64LessThanU] <y.Type> y (Const64 <y.Type> [63]) (CMPconst [64] y)))
+(Rsh8x32 x y) => (SRA (SignExt8to64 x) (CSEL [OpARM64LessThanU] <y.Type> (ZeroExt32to64 y) (Const64 <y.Type> [63]) (CMPconst [64] (ZeroExt32to64 y))))
+(Rsh8x16 x y) => (SRA (SignExt8to64 x) (CSEL [OpARM64LessThanU] <y.Type> (ZeroExt16to64 y) (Const64 <y.Type> [63]) (CMPconst [64] (ZeroExt16to64 y))))
+(Rsh8x8  x y) => (SRA (SignExt8to64 x) (CSEL [OpARM64LessThanU] <y.Type> (ZeroExt8to64  y) (Const64 <y.Type> [63]) (CMPconst [64] (ZeroExt8to64  y))))
 
 // constants
 (Const(64|32|16|8) [val]) => (MOVDconst [int64(val)])
@@ -315,8 +315,8 @@
 (FCMPD (FMOVDconst [0]) x) => (InvertFlags (FCMPD0 x))
 
 // CSEL needs a flag-generating argument. Synthesize a CMPW if necessary.
-(CondSelect x y boolval) && flagArg(boolval) != nil => (CSEL {boolval.Op} x y flagArg(boolval))
-(CondSelect x y boolval) && flagArg(boolval) == nil => (CSEL {OpARM64NotEqual} x y (CMPWconst [0] boolval))
+(CondSelect x y boolval) && flagArg(boolval) != nil => (CSEL [boolval.Op] x y flagArg(boolval))
+(CondSelect x y boolval) && flagArg(boolval) == nil => (CSEL [OpARM64NotEqual] x y (CMPWconst [0] boolval))
 
 (OffPtr [off] ptr:(SP)) && is32Bit(off) => (MOVDaddr [int32(off)] ptr)
 (OffPtr [off] ptr) => (ADDconst [off] ptr)
@@ -1324,8 +1324,8 @@
 (XOR x (MVN y)) -> (EON x y)
 (OR  x (MVN y)) -> (ORN x y)
 (MVN (XOR x y)) -> (EON x y)
-(CSEL {cc} x (MOVDconst [0]) flag) -> (CSEL0 {cc} x flag)
-(CSEL {cc} (MOVDconst [0]) y flag) -> (CSEL0 {arm64Negate(cc.(Op))} y flag)
+(CSEL [cc] x (MOVDconst [0]) flag) => (CSEL0 [cc] x flag)
+(CSEL [cc] (MOVDconst [0]) y flag) => (CSEL0 [arm64Negate(cc)] y flag)
 (SUB x (SUB y z)) -> (SUB (ADD <v.Type> x z) y)
 (SUB (SUB x y) z) -> (SUB x (ADD <y.Type> y z))
 
@@ -1481,8 +1481,8 @@
 (GTnoov (InvertFlags cmp) yes no) => (LTnoov cmp yes no)
 
 // absorb InvertFlags into CSEL(0)
-(CSEL {cc} x y (InvertFlags cmp)) => (CSEL {arm64Invert(cc)} x y cmp)
-(CSEL0 {cc} x (InvertFlags cmp)) => (CSEL0 {arm64Invert(cc)} x cmp)
+(CSEL [cc] x y (InvertFlags cmp)) => (CSEL [arm64Invert(cc)] x y cmp)
+(CSEL0 [cc] x (InvertFlags cmp)) => (CSEL0 [arm64Invert(cc)] x cmp)
 
 // absorb flag constants into boolean values
 (Equal (FlagConstant [fc])) => (MOVDconst [b2i(fc.eq())])
@@ -1517,20 +1517,20 @@
 (MOVBUreg x) && x.Type.IsBoolean() => (MOVDreg x)
 
 // absorb flag constants into conditional instructions
-(CSEL {cc} x _ flag) && ccARM64Eval(cc, flag) > 0 => x
-(CSEL {cc} _ y flag) && ccARM64Eval(cc, flag) < 0 => y
-(CSEL0 {cc} x flag) && ccARM64Eval(cc, flag) > 0 => x
-(CSEL0 {cc} _ flag) && ccARM64Eval(cc, flag) < 0 => (MOVDconst [0])
+(CSEL [cc] x _ flag) && ccARM64Eval(cc, flag) > 0 => x
+(CSEL [cc] _ y flag) && ccARM64Eval(cc, flag) < 0 => y
+(CSEL0 [cc] x flag) && ccARM64Eval(cc, flag) > 0 => x
+(CSEL0 [cc] _ flag) && ccARM64Eval(cc, flag) < 0 => (MOVDconst [0])
 
 // absorb flags back into boolean CSEL
-(CSEL {cc} x y (CMPWconst [0] boolval)) && cc == OpARM64NotEqual && flagArg(boolval) != nil =>
-      (CSEL {boolval.Op} x y flagArg(boolval))
-(CSEL {cc} x y (CMPWconst [0] boolval)) && cc == OpARM64Equal && flagArg(boolval) != nil =>
-      (CSEL {arm64Negate(boolval.Op)} x y flagArg(boolval))
-(CSEL0 {cc} x (CMPWconst [0] boolval)) && cc == OpARM64NotEqual && flagArg(boolval) != nil =>
-      (CSEL0 {boolval.Op} x flagArg(boolval))
-(CSEL0 {cc} x (CMPWconst [0] boolval)) && cc == OpARM64Equal && flagArg(boolval) != nil =>
-      (CSEL0 {arm64Negate(boolval.Op)} x flagArg(boolval))
+(CSEL [cc] x y (CMPWconst [0] boolval)) && cc == OpARM64NotEqual && flagArg(boolval) != nil =>
+      (CSEL [boolval.Op] x y flagArg(boolval))
+(CSEL [cc] x y (CMPWconst [0] boolval)) && cc == OpARM64Equal && flagArg(boolval) != nil =>
+      (CSEL [arm64Negate(boolval.Op)] x y flagArg(boolval))
+(CSEL0 [cc] x (CMPWconst [0] boolval)) && cc == OpARM64NotEqual && flagArg(boolval) != nil =>
+      (CSEL0 [boolval.Op] x flagArg(boolval))
+(CSEL0 [cc] x (CMPWconst [0] boolval)) && cc == OpARM64Equal && flagArg(boolval) != nil =>
+      (CSEL0 [arm64Negate(boolval.Op)] x flagArg(boolval))
 
 // absorb shifts into ops
 (NEG x:(SLLconst [c] y)) && clobberIfDead(x) => (NEGshiftLL [c] y)
@@ -1691,11 +1691,11 @@
 // "|" can also be "^" or "+".
 // As arm64 does not have a ROL instruction, so ROL(x, y) is replaced by ROR(x, -y).
 ((ADD|OR|XOR) (SLL x (ANDconst <t> [63] y))
-	(CSEL0 <typ.UInt64> {cc} (SRL <typ.UInt64> x (SUB <t> (MOVDconst [64]) (ANDconst <t> [63] y)))
+	(CSEL0 <typ.UInt64> [cc] (SRL <typ.UInt64> x (SUB <t> (MOVDconst [64]) (ANDconst <t> [63] y)))
 		(CMPconst [64] (SUB <t> (MOVDconst [64]) (ANDconst <t> [63] y))))) && cc == OpARM64LessThanU
 	=> (ROR x (NEG <t> y))
 ((ADD|OR|XOR) (SRL <typ.UInt64> x (ANDconst <t> [63] y))
-	(CSEL0 <typ.UInt64> {cc} (SLL x (SUB <t> (MOVDconst [64]) (ANDconst <t> [63] y)))
+	(CSEL0 <typ.UInt64> [cc] (SLL x (SUB <t> (MOVDconst [64]) (ANDconst <t> [63] y)))
 		(CMPconst [64] (SUB <t> (MOVDconst [64]) (ANDconst <t> [63] y))))) && cc == OpARM64LessThanU
 	=> (ROR x y)
 
@@ -1705,11 +1705,11 @@
 // "|" can also be "^" or "+".
 // As arm64 does not have a ROLW instruction, so ROLW(x, y) is replaced by RORW(x, -y).
 ((ADD|OR|XOR) (SLL x (ANDconst <t> [31] y))
-	(CSEL0 <typ.UInt32> {cc} (SRL <typ.UInt32> (MOVWUreg x) (SUB <t> (MOVDconst [32]) (ANDconst <t> [31] y)))
+	(CSEL0 <typ.UInt32> [cc] (SRL <typ.UInt32> (MOVWUreg x) (SUB <t> (MOVDconst [32]) (ANDconst <t> [31] y)))
 		(CMPconst [64]  (SUB <t> (MOVDconst [32]) (ANDconst <t> [31] y))))) && cc == OpARM64LessThanU
 	=> (RORW x (NEG <t> y))
 ((ADD|OR|XOR) (SRL <typ.UInt32> (MOVWUreg x) (ANDconst <t> [31] y))
-	(CSEL0 <typ.UInt32> {cc} (SLL x (SUB <t> (MOVDconst [32]) (ANDconst <t> [31] y)))
+	(CSEL0 <typ.UInt32> [cc] (SLL x (SUB <t> (MOVDconst [32]) (ANDconst <t> [31] y)))
 		(CMPconst [64]  (SUB <t> (MOVDconst [32]) (ANDconst <t> [31] y))))) && cc == OpARM64LessThanU
 	=> (RORW x y)
 
diff --git a/src/cmd/compile/internal/ssa/gen/ARM64Ops.go b/src/cmd/compile/internal/ssa/gen/ARM64Ops.go
index 2424e67..e9af261 100644
--- a/src/cmd/compile/internal/ssa/gen/ARM64Ops.go
+++ b/src/cmd/compile/internal/ssa/gen/ARM64Ops.go
@@ -467,8 +467,8 @@
 
 		// conditional instructions; auxint is
 		// one of the arm64 comparison pseudo-ops (LessThan, LessThanU, etc.)
-		{name: "CSEL", argLength: 3, reg: gp2flags1, asm: "CSEL", aux: "CCop"},  // aux(flags) ? arg0 : arg1
-		{name: "CSEL0", argLength: 2, reg: gp1flags1, asm: "CSEL", aux: "CCop"}, // aux(flags) ? arg0 : 0
+		{name: "CSEL", argLength: 3, reg: gp2flags1, asm: "CSEL", aux: "CCop"},  // auxint(flags) ? arg0 : arg1
+		{name: "CSEL0", argLength: 2, reg: gp1flags1, asm: "CSEL", aux: "CCop"}, // auxint(flags) ? arg0 : 0
 
 		// function calls
 		{name: "CALLstatic", argLength: 1, reg: regInfo{clobbers: callerSave}, aux: "SymOff", clobberFlags: true, call: true, symEffect: "None"},                           // call static function aux.(*obj.LSym).  arg0=mem, auxint=argsize, returns mem
diff --git a/src/cmd/compile/internal/ssa/gen/PPC64.rules b/src/cmd/compile/internal/ssa/gen/PPC64.rules
index 14942d5..e5fb1e9 100644
--- a/src/cmd/compile/internal/ssa/gen/PPC64.rules
+++ b/src/cmd/compile/internal/ssa/gen/PPC64.rules
@@ -110,13 +110,21 @@
 // Rotate generation with non-const shift
 // these match patterns from math/bits/RotateLeft[32|64], but there could be others
 (ADD (SLD x (ANDconst <typ.Int64> [63] y)) (SRD x (SUB <typ.UInt> (MOVDconst [64]) (ANDconst <typ.UInt> [63] y)))) => (ROTL x y)
+(ADD (SLD x (ANDconst <typ.Int64> [63] y)) (SRD x (SUBFCconst <typ.UInt> [64] (ANDconst <typ.UInt> [63] y)))) => (ROTL x y)
 ( OR (SLD x (ANDconst <typ.Int64> [63] y)) (SRD x (SUB <typ.UInt> (MOVDconst [64]) (ANDconst <typ.UInt> [63] y)))) => (ROTL x y)
+( OR (SLD x (ANDconst <typ.Int64> [63] y)) (SRD x (SUBFCconst <typ.UInt> [64] (ANDconst <typ.UInt> [63] y)))) => (ROTL x y)
 (XOR (SLD x (ANDconst <typ.Int64> [63] y)) (SRD x (SUB <typ.UInt> (MOVDconst [64]) (ANDconst <typ.UInt> [63] y)))) => (ROTL x y)
+(XOR (SLD x (ANDconst <typ.Int64> [63] y)) (SRD x (SUBFCconst <typ.UInt> [64] (ANDconst <typ.UInt> [63] y)))) => (ROTL x y)
 
+
+(ADD (SLW x (ANDconst <typ.Int32> [31] y)) (SRW x (SUBFCconst <typ.UInt> [32] (ANDconst <typ.UInt> [31] y)))) => (ROTLW x y)
 (ADD (SLW x (ANDconst <typ.Int32> [31] y)) (SRW x (SUB <typ.UInt> (MOVDconst [32]) (ANDconst <typ.UInt> [31] y)))) => (ROTLW x y)
+( OR (SLW x (ANDconst <typ.Int32> [31] y)) (SRW x (SUBFCconst <typ.UInt> [32] (ANDconst <typ.UInt> [31] y)))) => (ROTLW x y)
 ( OR (SLW x (ANDconst <typ.Int32> [31] y)) (SRW x (SUB <typ.UInt> (MOVDconst [32]) (ANDconst <typ.UInt> [31] y)))) => (ROTLW x y)
+(XOR (SLW x (ANDconst <typ.Int32> [31] y)) (SRW x (SUBFCconst <typ.UInt> [32] (ANDconst <typ.UInt> [31] y)))) => (ROTLW x y)
 (XOR (SLW x (ANDconst <typ.Int32> [31] y)) (SRW x (SUB <typ.UInt> (MOVDconst [32]) (ANDconst <typ.UInt> [31] y)))) => (ROTLW x y)
 
+
 // Lowering rotates
 (RotateLeft32 x y) => (ROTLW x y)
 (RotateLeft64 x y) => (ROTL x y)
@@ -192,11 +200,15 @@
 (Rsh64Ux64 x (AND y (MOVDconst [63]))) => (SRD x (ANDconst <typ.Int64> [63] y))
 (Rsh64Ux64 x (ANDconst <typ.UInt> [63] y)) => (SRD x (ANDconst <typ.UInt> [63] y))
 (Rsh64Ux64 x (SUB <typ.UInt> (MOVDconst [64]) (ANDconst <typ.UInt> [63] y))) => (SRD x (SUB <typ.UInt> (MOVDconst [64]) (ANDconst <typ.UInt> [63] y)))
+(Rsh64Ux64 x (SUBFCconst <typ.UInt> [64] (ANDconst <typ.UInt> [63] y))) => (SRD x (SUBFCconst <typ.UInt> [64]  (ANDconst <typ.UInt> [63] y)))
 (Rsh64Ux64 x (SUB <typ.UInt> (MOVDconst [64]) (AND <typ.UInt> y (MOVDconst [63])))) => (SRD x (SUB <typ.UInt> (MOVDconst [64]) (ANDconst <typ.UInt> [63] y)))
+(Rsh64Ux64 x (SUBFCconst <typ.UInt> [64] (AND <typ.UInt> y (MOVDconst [63])))) => (SRD x (SUBFCconst <typ.UInt> [64] (ANDconst <typ.UInt> [63] y)))
 (Rsh64x64 x (AND y (MOVDconst [63]))) => (SRAD x (ANDconst <typ.Int64> [63] y))
 (Rsh64x64 x (ANDconst <typ.UInt> [63] y)) => (SRAD x (ANDconst <typ.UInt> [63] y))
 (Rsh64x64 x (SUB <typ.UInt> (MOVDconst [64]) (ANDconst <typ.UInt> [63] y))) => (SRAD x (SUB <typ.UInt> (MOVDconst [64]) (ANDconst <typ.UInt> [63] y)))
+(Rsh64x64 x (SUBFCconst <typ.UInt> [64] (ANDconst <typ.UInt> [63] y))) => (SRAD x (SUBFCconst <typ.UInt> [64]  (ANDconst <typ.UInt> [63] y)))
 (Rsh64x64 x (SUB <typ.UInt> (MOVDconst [64]) (AND <typ.UInt> y (MOVDconst [63])))) => (SRAD x (SUB <typ.UInt> (MOVDconst [64]) (ANDconst <typ.UInt> [63] y)))
+(Rsh64x64 x (SUBFCconst <typ.UInt> [64] (AND <typ.UInt> y (MOVDconst [63])))) => (SRAD x (SUBFCconst <typ.UInt> [64] (ANDconst <typ.UInt> [63] y)))
 
 (Lsh64x64 x y)  => (SLD  x (ISEL [0] y (MOVDconst [-1]) (CMPU y (MOVDconst [64]))))
 (Rsh64x64 x y) => (SRAD x (ISEL [0] y (MOVDconst [-1]) (CMPU y (MOVDconst [64]))))
@@ -208,12 +220,16 @@
 (Rsh32Ux64 x (AND y (MOVDconst [31]))) => (SRW x (ANDconst <typ.Int32> [31] y))
 (Rsh32Ux64 x (ANDconst <typ.UInt> [31] y)) => (SRW x (ANDconst <typ.UInt> [31] y))
 (Rsh32Ux64 x (SUB <typ.UInt> (MOVDconst [32]) (ANDconst <typ.UInt> [31] y))) => (SRW x (SUB <typ.UInt> (MOVDconst [32]) (ANDconst <typ.UInt> [31] y)))
+(Rsh32Ux64 x (SUBFCconst <typ.UInt> [32] (ANDconst <typ.UInt> [31] y))) => (SRW x (SUBFCconst <typ.UInt> [32] (ANDconst <typ.UInt> [31] y)))
 (Rsh32Ux64 x (SUB <typ.UInt> (MOVDconst [32]) (AND <typ.UInt> y (MOVDconst [31])))) => (SRW x (SUB <typ.UInt> (MOVDconst [32]) (ANDconst <typ.UInt> [31] y)))
+(Rsh32Ux64 x (SUBFCconst <typ.UInt> [32] (AND <typ.UInt> y (MOVDconst [31])))) => (SRW x (SUBFCconst <typ.UInt> [32] (ANDconst <typ.UInt> [31] y)))
 
 (Rsh32x64 x (AND y (MOVDconst [31]))) => (SRAW x (ANDconst <typ.Int32> [31] y))
 (Rsh32x64 x (ANDconst <typ.UInt> [31] y)) => (SRAW x (ANDconst <typ.UInt> [31] y))
 (Rsh32x64 x (SUB <typ.UInt> (MOVDconst [32]) (ANDconst <typ.UInt> [31] y))) => (SRAW x (SUB <typ.UInt> (MOVDconst [32]) (ANDconst <typ.UInt> [31] y)))
+(Rsh32x64 x (SUBFCconst <typ.UInt> [32] (ANDconst <typ.UInt> [31] y))) => (SRAW x (SUBFCconst <typ.UInt> [32] (ANDconst <typ.UInt> [31] y)))
 (Rsh32x64 x (SUB <typ.UInt> (MOVDconst [32]) (AND <typ.UInt> y (MOVDconst [31])))) => (SRAW x (SUB <typ.UInt> (MOVDconst [32]) (ANDconst <typ.UInt> [31] y)))
+(Rsh32x64 x (SUBFCconst <typ.UInt> [32] (AND <typ.UInt> y (MOVDconst [31])))) => (SRAW x (SUBFCconst <typ.UInt> [32] (ANDconst <typ.UInt> [31] y)))
 
 (Rsh32x64 x y)  => (SRAW x (ISEL [0] y (MOVDconst [-1]) (CMPU y (MOVDconst [32]))))
 (Rsh32Ux64 x y) => (SRW  x (ISEL [0] y (MOVDconst [-1]) (CMPU y (MOVDconst [32]))))
@@ -276,18 +292,11 @@
 (Rsh8Ux8 x y) => (SRW  (ZeroExt8to32 x) (ISEL [0] y (MOVDconst [-1]) (CMPU (ZeroExt8to64 y) (MOVDconst [8]))))
 (Lsh8x8 x y)  => (SLW  x                (ISEL [0] y (MOVDconst [-1]) (CMPU (ZeroExt8to64 y) (MOVDconst [8]))))
 
-// Cleaning up shift ops when input is masked
-(MaskIfNotCarry (ADDconstForCarry [c] (ANDconst [d] _))) && c < 0 && d > 0 && int64(c) + d < 0 => (MOVDconst [-1])
+// Cleaning up shift ops
 (ISEL [0] (ANDconst [d] y) (MOVDconst [-1]) (CMPU (ANDconst [d] y) (MOVDconst [c]))) && c >= d => (ANDconst [d] y)
 (ISEL [0] (ANDconst [d] y) (MOVDconst [-1]) (CMPUconst [c] (ANDconst [d] y))) && c >= d => (ANDconst [d] y)
 (ORN x (MOVDconst [-1])) => x
 
-(ADDconstForCarry [c] (MOVDconst [d])) && c < 0 && (c <  0 || int64(c) + d >= 0) => (FlagCarryClear)
-(ADDconstForCarry [c] (MOVDconst [d])) && c < 0 &&  c >= 0 && int64(c) + d <  0  => (FlagCarrySet)
-
-(MaskIfNotCarry (FlagCarrySet)) => (MOVDconst [0])
-(MaskIfNotCarry (FlagCarryClear)) => (MOVDconst [-1])
-
 (S(RAD|RD|LD) x (MOVDconst [c])) => (S(RAD|RD|LD)const [c&63 | (c>>6&1*63)] x)
 (S(RAW|RW|LW) x (MOVDconst [c])) => (S(RAW|RW|LW)const [c&31 | (c>>5&1*31)] x)
 
@@ -306,8 +315,8 @@
 (Ctz16 x) => (POPCNTW (MOVHZreg (ANDN <typ.Int16> (ADDconst <typ.Int16> [-1] x) x)))
 (Ctz8 x)  => (POPCNTB (MOVBZreg (ANDN <typ.UInt8> (ADDconst <typ.UInt8> [-1] x) x)))
 
-(BitLen64 x) => (SUB (MOVDconst [64]) (CNTLZD <typ.Int> x))
-(BitLen32 x) => (SUB (MOVDconst [32]) (CNTLZW <typ.Int> x))
+(BitLen64 x) => (SUBFCconst [64] (CNTLZD <typ.Int> x))
+(BitLen32 x) => (SUBFCconst [32] (CNTLZW <typ.Int> x))
 
 (PopCount64 ...) => (POPCNTD ...)
 (PopCount32 x) => (POPCNTW (MOVWZreg x))
@@ -777,10 +786,19 @@
 (ADDconst [c] (ADDconst [d] x)) && is32Bit(c+d) => (ADDconst [c+d] x)
 (ADDconst [0] x) => x
 (SUB x (MOVDconst [c])) && is32Bit(-c) => (ADDconst [-c] x)
-// TODO deal with subtract-from-const
 
 (ADDconst [c] (MOVDaddr [d] {sym} x)) && is32Bit(c+int64(d)) => (MOVDaddr [int32(c+int64(d))] {sym} x)
 
+// Subtract from (with carry, but ignored) constant.
+// Note, these clobber the carry bit.
+(SUB (MOVDconst [c]) x) && is32Bit(c) => (SUBFCconst [c] x)
+(SUBFCconst [c] (NEG x)) => (ADDconst [c] x)
+(SUBFCconst [c] (SUBFCconst [d] x)) && is32Bit(c-d) => (ADDconst [c-d] x)
+(SUBFCconst [0] x) => (NEG x)
+(ADDconst [c] (SUBFCconst [d] x)) && is32Bit(c+d) => (SUBFCconst [c+d] x)
+(NEG (ADDconst [c] x)) && is32Bit(-c) => (SUBFCconst [-c] x)
+(NEG (SUBFCconst [c] x)) && is32Bit(-c) => (ADDconst [-c] x)
+
 // Use register moves instead of stores and loads to move int<=>float values
 // Common with math Float64bits, Float64frombits
 (MOVDload [off] {sym} ptr (FMOVDstore [off] {sym} ptr x _)) => (MFVSRD x)
diff --git a/src/cmd/compile/internal/ssa/gen/PPC64Ops.go b/src/cmd/compile/internal/ssa/gen/PPC64Ops.go
index 825d0fa..44f6a74 100644
--- a/src/cmd/compile/internal/ssa/gen/PPC64Ops.go
+++ b/src/cmd/compile/internal/ssa/gen/PPC64Ops.go
@@ -175,6 +175,7 @@
 		{name: "FADD", argLength: 2, reg: fp21, asm: "FADD", commutative: true},   // arg0+arg1
 		{name: "FADDS", argLength: 2, reg: fp21, asm: "FADDS", commutative: true}, // arg0+arg1
 		{name: "SUB", argLength: 2, reg: gp21, asm: "SUB"},                        // arg0-arg1
+		{name: "SUBFCconst", argLength: 1, reg: gp11, asm: "SUBC", aux: "Int64"},  // auxInt - arg0 (with carry)
 		{name: "FSUB", argLength: 2, reg: fp21, asm: "FSUB"},                      // arg0-arg1
 		{name: "FSUBS", argLength: 2, reg: fp21, asm: "FSUBS"},                    // arg0-arg1
 
@@ -206,9 +207,7 @@
 		{name: "ROTL", argLength: 2, reg: gp21, asm: "ROTL"},   // arg0 rotate left by arg1 mod 64
 		{name: "ROTLW", argLength: 2, reg: gp21, asm: "ROTLW"}, // uint32(arg0) rotate left by arg1 mod 32
 
-		{name: "LoweredAdd64Carry", argLength: 3, reg: gp32, resultNotInArgs: true},                                                                     // arg0 + arg1 + carry, returns (sum, carry)
-		{name: "ADDconstForCarry", argLength: 1, reg: regInfo{inputs: []regMask{gp | sp | sb}, clobbers: tmp}, aux: "Int16", asm: "ADDC", typ: "Flags"}, // _, carry := arg0 + auxint
-		{name: "MaskIfNotCarry", argLength: 1, reg: crgp, asm: "ADDME", typ: "Int64"},                                                                   // carry - 1 (if carry then 0 else -1)
+		{name: "LoweredAdd64Carry", argLength: 3, reg: gp32, resultNotInArgs: true}, // arg0 + arg1 + carry, returns (sum, carry)
 
 		{name: "SRADconst", argLength: 1, reg: gp11, asm: "SRAD", aux: "Int64"}, // signed arg0 >> auxInt, 0 <= auxInt < 64, 64 bit width
 		{name: "SRAWconst", argLength: 1, reg: gp11, asm: "SRAW", aux: "Int64"}, // signed arg0 >> auxInt, 0 <= auxInt < 32, 32 bit width
@@ -674,11 +673,9 @@
 
 		// These ops are for temporary use by rewrite rules. They
 		// cannot appear in the generated assembly.
-		{name: "FlagEQ"},         // equal
-		{name: "FlagLT"},         // signed < or unsigned <
-		{name: "FlagGT"},         // signed > or unsigned >
-		{name: "FlagCarrySet"},   // carry flag set
-		{name: "FlagCarryClear"}, // carry flag clear
+		{name: "FlagEQ"}, // equal
+		{name: "FlagLT"}, // signed < or unsigned <
+		{name: "FlagGT"}, // signed > or unsigned >
 	}
 
 	blocks := []blockData{
diff --git a/src/cmd/compile/internal/ssa/gen/S390X.rules b/src/cmd/compile/internal/ssa/gen/S390X.rules
index 5e4c436..e564f63 100644
--- a/src/cmd/compile/internal/ssa/gen/S390X.rules
+++ b/src/cmd/compile/internal/ssa/gen/S390X.rules
@@ -498,27 +498,19 @@
 // Remove zero extensions after zero extending load.
 // Note: take care that if x is spilled it is restored correctly.
 (MOV(B|H|W)Zreg x:(MOVBZload    _   _)) && (!x.Type.IsSigned() || x.Type.Size() > 1) => x
-(MOV(B|H|W)Zreg x:(MOVBZloadidx _ _ _)) && (!x.Type.IsSigned() || x.Type.Size() > 1) => x
 (MOV(H|W)Zreg   x:(MOVHZload    _   _)) && (!x.Type.IsSigned() || x.Type.Size() > 2) => x
-(MOV(H|W)Zreg   x:(MOVHZloadidx _ _ _)) && (!x.Type.IsSigned() || x.Type.Size() > 2) => x
 (MOVWZreg       x:(MOVWZload    _   _)) && (!x.Type.IsSigned() || x.Type.Size() > 4) => x
-(MOVWZreg       x:(MOVWZloadidx _ _ _)) && (!x.Type.IsSigned() || x.Type.Size() > 4) => x
 
 // Remove sign extensions after sign extending load.
 // Note: take care that if x is spilled it is restored correctly.
 (MOV(B|H|W)reg x:(MOVBload    _   _)) && (x.Type.IsSigned() || x.Type.Size() == 8) => x
-(MOV(B|H|W)reg x:(MOVBloadidx _ _ _)) && (x.Type.IsSigned() || x.Type.Size() == 8) => x
 (MOV(H|W)reg   x:(MOVHload    _   _)) && (x.Type.IsSigned() || x.Type.Size() == 8) => x
-(MOV(H|W)reg   x:(MOVHloadidx _ _ _)) && (x.Type.IsSigned() || x.Type.Size() == 8) => x
 (MOVWreg       x:(MOVWload    _   _)) && (x.Type.IsSigned() || x.Type.Size() == 8) => x
-(MOVWreg       x:(MOVWloadidx _ _ _)) && (x.Type.IsSigned() || x.Type.Size() == 8) => x
 
 // Remove sign extensions after zero extending load.
 // These type checks are probably unnecessary but do them anyway just in case.
 (MOV(H|W)reg x:(MOVBZload    _   _)) && (!x.Type.IsSigned() || x.Type.Size() > 1) => x
-(MOV(H|W)reg x:(MOVBZloadidx _ _ _)) && (!x.Type.IsSigned() || x.Type.Size() > 1) => x
 (MOVWreg     x:(MOVHZload    _   _)) && (!x.Type.IsSigned() || x.Type.Size() > 2) => x
-(MOVWreg     x:(MOVHZloadidx _ _ _)) && (!x.Type.IsSigned() || x.Type.Size() > 2) => x
 
 // Fold sign and zero extensions into loads.
 //
@@ -538,14 +530,6 @@
   && x.Uses == 1
   && clobber(x)
   => @x.Block (MOV(B|H|W)load <t> [o] {s} p mem)
-(MOV(B|H|W)Zreg <t> x:(MOV(B|H|W)loadidx [o] {s} p i mem))
-  && x.Uses == 1
-  && clobber(x)
-  => @x.Block (MOV(B|H|W)Zloadidx <t> [o] {s} p i mem)
-(MOV(B|H|W)reg <t> x:(MOV(B|H|W)Zloadidx [o] {s} p i mem))
-  && x.Uses == 1
-  && clobber(x)
-  => @x.Block (MOV(B|H|W)loadidx <t> [o] {s} p i mem)
 
 // Remove zero extensions after argument load.
 (MOVBZreg x:(Arg <t>)) && !t.IsSigned() && t.Size() == 1 => x
@@ -641,41 +625,37 @@
 (BRC {c} (CMPWUconst x [y]) yes no) && y == int32( int8(y)) && (c == s390x.Equal || c == s390x.LessOrGreater) => (CIJ   {c} x [ int8(y)] yes no)
 
 // Fold constants into instructions.
-(ADD x (MOVDconst [c])) && is32Bit(c) -> (ADDconst [c] x)
-(ADDW x (MOVDconst [c])) -> (ADDWconst [int64(int32(c))] x)
+(ADD x (MOVDconst [c])) && is32Bit(c) => (ADDconst [int32(c)] x)
+(ADDW x (MOVDconst [c])) => (ADDWconst [int32(c)] x)
 
-(SUB x (MOVDconst [c])) && is32Bit(c) -> (SUBconst x [c])
-(SUB (MOVDconst [c]) x) && is32Bit(c) -> (NEG (SUBconst <v.Type> x [c]))
-(SUBW x (MOVDconst [c])) -> (SUBWconst x [int64(int32(c))])
-(SUBW (MOVDconst [c]) x) -> (NEGW (SUBWconst <v.Type> x [int64(int32(c))]))
+(SUB x (MOVDconst [c])) && is32Bit(c) => (SUBconst x [int32(c)])
+(SUB (MOVDconst [c]) x) && is32Bit(c) => (NEG (SUBconst <v.Type> x [int32(c)]))
+(SUBW x (MOVDconst [c])) => (SUBWconst x [int32(c)])
+(SUBW (MOVDconst [c]) x) => (NEGW (SUBWconst <v.Type> x [int32(c)]))
 
-(MULLD x (MOVDconst [c])) && is32Bit(c) -> (MULLDconst [c] x)
-(MULLW x (MOVDconst [c])) -> (MULLWconst [int64(int32(c))] x)
+(MULLD x (MOVDconst [c])) && is32Bit(c) => (MULLDconst [int32(c)] x)
+(MULLW x (MOVDconst [c])) => (MULLWconst [int32(c)] x)
 
 // NILF instructions leave the high 32 bits unchanged which is
 // equivalent to the leftmost 32 bits being set.
 // TODO(mundaym): modify the assembler to accept 64-bit values
 // and use isU32Bit(^c).
 (AND x (MOVDconst [c])) && is32Bit(c) && c < 0 => (ANDconst [c] x)
-(AND x (MOVDconst [c])) && is32Bit(c) && c >= 0 -> (MOVWZreg (ANDWconst <typ.UInt32> [int64(int32(c))] x))
-(ANDW x (MOVDconst [c])) -> (ANDWconst [int64(int32(c))] x)
+(AND x (MOVDconst [c])) && is32Bit(c) && c >= 0 => (MOVWZreg (ANDWconst <typ.UInt32> [int32(c)] x))
+(ANDW x (MOVDconst [c])) => (ANDWconst [int32(c)] x)
 
-(ANDWconst [c] (ANDWconst [d] x)) => (ANDWconst [c & d] x)
-(ANDconst [c] (ANDconst [d] x)) => (ANDconst [c & d] x)
+((AND|ANDW)const [c] ((AND|ANDW)const [d] x)) => ((AND|ANDW)const [c&d] x)
 
-(OR x (MOVDconst [c])) && isU32Bit(c) => (ORconst [c] x)
-(ORW x (MOVDconst [c])) -> (ORWconst [int64(int32(c))] x)
-
-(XOR x (MOVDconst [c])) && isU32Bit(c) => (XORconst [c] x)
-(XORW x (MOVDconst [c])) -> (XORWconst [int64(int32(c))] x)
+((OR|XOR) x (MOVDconst [c])) && isU32Bit(c) => ((OR|XOR)const [c] x)
+((OR|XOR)W x (MOVDconst [c])) => ((OR|XOR)Wconst [int32(c)] x)
 
 // Constant shifts.
 (S(LD|RD|RAD|LW|RW|RAW) x (MOVDconst [c]))
-	-> (S(LD|RD|RAD|LW|RW|RAW)const x [c&63])
+	=> (S(LD|RD|RAD|LW|RW|RAW)const x [int8(c&63)])
 
 // Shifts only use the rightmost 6 bits of the shift value.
 (S(LD|RD|RAD|LW|RW|RAW) x (AND (MOVDconst [c]) y))
-	-> (S(LD|RD|RAD|LW|RW|RAW) x (ANDWconst <typ.UInt32> [c&63] y))
+	=> (S(LD|RD|RAD|LW|RW|RAW) x (ANDWconst <typ.UInt32> [int32(c&63)] y))
 (S(LD|RD|RAD|LW|RW|RAW) x (ANDWconst [c] y)) && c&63 == 63
 	=> (S(LD|RD|RAD|LW|RW|RAW) x y)
 (SLD  x (MOV(W|H|B|WZ|HZ|BZ)reg y)) => (SLD  x y)
@@ -686,8 +666,8 @@
 (SRAW x (MOV(W|H|B|WZ|HZ|BZ)reg y)) => (SRAW x y)
 
 // Constant rotate generation
-(RLL  x (MOVDconst [c])) -> (RLLconst  x [c&31])
-(RLLG x (MOVDconst [c])) -> (RLLGconst x [c&63])
+(RLL  x (MOVDconst [c])) => (RLLconst  x [int8(c&31)])
+(RLLG x (MOVDconst [c])) => (RLLGconst x [int8(c&63)])
 
 (ADD (SLDconst x [c]) (SRDconst x [d])) && d == 64-c => (RLLGconst [c] x)
 ( OR (SLDconst x [c]) (SRDconst x [d])) && d == 64-c => (RLLGconst [c] x)
@@ -697,14 +677,17 @@
 ( ORW (SLWconst x [c]) (SRWconst x [d])) && d == 32-c => (RLLconst [c] x)
 (XORW (SLWconst x [c]) (SRWconst x [d])) && d == 32-c => (RLLconst [c] x)
 
-(CMP x (MOVDconst [c])) && is32Bit(c) -> (CMPconst x [c])
-(CMP (MOVDconst [c]) x) && is32Bit(c) -> (InvertFlags (CMPconst x [c]))
-(CMPW x (MOVDconst [c])) -> (CMPWconst x [int64(int32(c))])
-(CMPW (MOVDconst [c]) x) -> (InvertFlags (CMPWconst x [int64(int32(c))]))
-(CMPU x (MOVDconst [c])) && isU32Bit(c) -> (CMPUconst x [int64(int32(c))])
-(CMPU (MOVDconst [c]) x) && isU32Bit(c) -> (InvertFlags (CMPUconst x [int64(int32(c))]))
-(CMPWU x (MOVDconst [c])) -> (CMPWUconst x [int64(int32(c))])
-(CMPWU (MOVDconst [c]) x) -> (InvertFlags (CMPWUconst x [int64(int32(c))]))
+// Signed 64-bit comparison with immediate.
+(CMP x (MOVDconst [c])) && is32Bit(c) => (CMPconst x [int32(c)])
+(CMP (MOVDconst [c]) x) && is32Bit(c) => (InvertFlags (CMPconst x [int32(c)]))
+
+// Unsigned 64-bit comparison with immediate.
+(CMPU x (MOVDconst [c])) && isU32Bit(c) => (CMPUconst x [int32(c)])
+(CMPU (MOVDconst [c]) x) && isU32Bit(c) => (InvertFlags (CMPUconst x [int32(c)]))
+
+// Signed and unsigned 32-bit comparison with immediate.
+(CMP(W|WU) x (MOVDconst [c])) => (CMP(W|WU)const x [int32(c)])
+(CMP(W|WU) (MOVDconst [c]) x) => (InvertFlags (CMP(W|WU)const x [int32(c)]))
 
 // Canonicalize the order of arguments to comparisons - helps with CSE.
 ((CMP|CMPW|CMPU|CMPWU) x y) && x.ID > y.ID => (InvertFlags ((CMP|CMPW|CMPU|CMPWU) y x))
@@ -752,14 +735,14 @@
                  (SL(D|W)const <t> x [int8(log32(-c+(-c&^(-c-1))))]))
 
 // Fold ADD into MOVDaddr. Odd offsets from SB shouldn't be folded (LARL can't handle them).
-(ADDconst [c] (MOVDaddr [d] {s} x:(SB))) && ((c+d)&1 == 0) && is32Bit(c+d) -> (MOVDaddr [c+d] {s} x)
-(ADDconst [c] (MOVDaddr [d] {s} x)) && x.Op != OpSB && is20Bit(c+d) -> (MOVDaddr [c+d] {s} x)
-(ADD idx (MOVDaddr [c] {s} ptr)) && ptr.Op != OpSB && idx.Op != OpSB => (MOVDaddridx [c] {s} ptr idx)
+(ADDconst [c] (MOVDaddr [d] {s} x:(SB))) && ((c+d)&1 == 0) && is32Bit(int64(c)+int64(d)) => (MOVDaddr [c+d] {s} x)
+(ADDconst [c] (MOVDaddr [d] {s} x)) && x.Op != OpSB && is20Bit(int64(c)+int64(d)) => (MOVDaddr [c+d] {s} x)
+(ADD idx (MOVDaddr [c] {s} ptr)) && ptr.Op != OpSB => (MOVDaddridx [c] {s} ptr idx)
 
 // fold ADDconst into MOVDaddrx
-(ADDconst [c] (MOVDaddridx [d] {s} x y)) && is20Bit(c+d) -> (MOVDaddridx [c+d] {s} x y)
-(MOVDaddridx [c] {s} (ADDconst [d] x) y) && is20Bit(c+d) && x.Op != OpSB -> (MOVDaddridx [c+d] {s} x y)
-(MOVDaddridx [c] {s} x (ADDconst [d] y)) && is20Bit(c+d) && y.Op != OpSB -> (MOVDaddridx [c+d] {s} x y)
+(ADDconst [c] (MOVDaddridx [d] {s} x y)) && is20Bit(int64(c)+int64(d)) => (MOVDaddridx [c+d] {s} x y)
+(MOVDaddridx [c] {s} (ADDconst [d] x) y) && is20Bit(int64(c)+int64(d)) => (MOVDaddridx [c+d] {s} x y)
+(MOVDaddridx [c] {s} x (ADDconst [d] y)) && is20Bit(int64(c)+int64(d)) => (MOVDaddridx [c+d] {s} x y)
 
 // reverse ordering of compare instruction
 (LOCGR {c} x y (InvertFlags cmp)) => (LOCGR {c.ReverseComparison()} x y cmp)
@@ -799,11 +782,11 @@
 
 // detect copysign
 (OR (SLDconst [63] (SRDconst [63] (LGDR x))) (LGDR (LPDFR <t> y))) => (LGDR (CPSDR <t> y x))
-(OR (SLDconst [63] (SRDconst [63] (LGDR x))) (MOVDconst [c])) && c & -1<<63 == 0 -> (LGDR (CPSDR <x.Type> (FMOVDconst <x.Type> [c]) x))
+(OR (SLDconst [63] (SRDconst [63] (LGDR x))) (MOVDconst [c])) && c & -1<<63 == 0 => (LGDR (CPSDR <x.Type> (FMOVDconst <x.Type> [math.Float64frombits(uint64(c))]) x))
 (OR (AND (MOVDconst [-1<<63]) (LGDR x)) (LGDR (LPDFR <t> y))) => (LGDR (CPSDR <t> y x))
-(OR (AND (MOVDconst [-1<<63]) (LGDR x)) (MOVDconst [c])) && c & -1<<63 == 0 -> (LGDR (CPSDR <x.Type> (FMOVDconst <x.Type> [c]) x))
-(CPSDR y (FMOVDconst [c])) && c & -1<<63 == 0 -> (LPDFR y)
-(CPSDR y (FMOVDconst [c])) && c & -1<<63 != 0 -> (LNDFR y)
+(OR (AND (MOVDconst [-1<<63]) (LGDR x)) (MOVDconst [c])) && c & -1<<63 == 0 => (LGDR (CPSDR <x.Type> (FMOVDconst <x.Type> [math.Float64frombits(uint64(c))]) x))
+(CPSDR y (FMOVDconst [c])) && !math.Signbit(c) => (LPDFR y)
+(CPSDR y (FMOVDconst [c])) && math.Signbit(c)  => (LNDFR y)
 
 // absorb negations into set/clear sign bit
 (FNEG  (LPDFR x)) => (LNDFR x)
@@ -832,216 +815,131 @@
 // the ADDconst get eliminated, we still have to compute the ADDconst and we now
 // have potentially two live values (ptr and (ADDconst [off] ptr)) instead of one.
 // Nevertheless, let's do it!
-(MOVDload   [off1] {sym} (ADDconst [off2] ptr) mem) && is20Bit(off1+off2) -> (MOVDload  [off1+off2] {sym} ptr mem)
-(MOVWload   [off1] {sym} (ADDconst [off2] ptr) mem) && is20Bit(off1+off2) -> (MOVWload  [off1+off2] {sym} ptr mem)
-(MOVHload   [off1] {sym} (ADDconst [off2] ptr) mem) && is20Bit(off1+off2) -> (MOVHload  [off1+off2] {sym} ptr mem)
-(MOVBload   [off1] {sym} (ADDconst [off2] ptr) mem) && is20Bit(off1+off2) -> (MOVBload  [off1+off2] {sym} ptr mem)
-(MOVWZload  [off1] {sym} (ADDconst [off2] ptr) mem) && is20Bit(off1+off2) -> (MOVWZload [off1+off2] {sym} ptr mem)
-(MOVHZload  [off1] {sym} (ADDconst [off2] ptr) mem) && is20Bit(off1+off2) -> (MOVHZload [off1+off2] {sym} ptr mem)
-(MOVBZload  [off1] {sym} (ADDconst [off2] ptr) mem) && is20Bit(off1+off2) -> (MOVBZload [off1+off2] {sym} ptr mem)
-(FMOVSload  [off1] {sym} (ADDconst [off2] ptr) mem) && is20Bit(off1+off2) -> (FMOVSload [off1+off2] {sym} ptr mem)
-(FMOVDload  [off1] {sym} (ADDconst [off2] ptr) mem) && is20Bit(off1+off2) -> (FMOVDload [off1+off2] {sym} ptr mem)
+(MOVDload   [off1] {sym} (ADDconst [off2] ptr) mem) && is20Bit(int64(off1)+int64(off2)) => (MOVDload  [off1+off2] {sym} ptr mem)
+(MOVWload   [off1] {sym} (ADDconst [off2] ptr) mem) && is20Bit(int64(off1)+int64(off2)) => (MOVWload  [off1+off2] {sym} ptr mem)
+(MOVHload   [off1] {sym} (ADDconst [off2] ptr) mem) && is20Bit(int64(off1)+int64(off2)) => (MOVHload  [off1+off2] {sym} ptr mem)
+(MOVBload   [off1] {sym} (ADDconst [off2] ptr) mem) && is20Bit(int64(off1)+int64(off2)) => (MOVBload  [off1+off2] {sym} ptr mem)
+(MOVWZload  [off1] {sym} (ADDconst [off2] ptr) mem) && is20Bit(int64(off1)+int64(off2)) => (MOVWZload [off1+off2] {sym} ptr mem)
+(MOVHZload  [off1] {sym} (ADDconst [off2] ptr) mem) && is20Bit(int64(off1)+int64(off2)) => (MOVHZload [off1+off2] {sym} ptr mem)
+(MOVBZload  [off1] {sym} (ADDconst [off2] ptr) mem) && is20Bit(int64(off1)+int64(off2)) => (MOVBZload [off1+off2] {sym} ptr mem)
+(FMOVSload  [off1] {sym} (ADDconst [off2] ptr) mem) && is20Bit(int64(off1)+int64(off2)) => (FMOVSload [off1+off2] {sym} ptr mem)
+(FMOVDload  [off1] {sym} (ADDconst [off2] ptr) mem) && is20Bit(int64(off1)+int64(off2)) => (FMOVDload [off1+off2] {sym} ptr mem)
 
-(MOVDstore  [off1] {sym} (ADDconst [off2] ptr) val mem) && is20Bit(off1+off2) -> (MOVDstore  [off1+off2] {sym} ptr val mem)
-(MOVWstore  [off1] {sym} (ADDconst [off2] ptr) val mem) && is20Bit(off1+off2) -> (MOVWstore  [off1+off2] {sym} ptr val mem)
-(MOVHstore  [off1] {sym} (ADDconst [off2] ptr) val mem) && is20Bit(off1+off2) -> (MOVHstore  [off1+off2] {sym} ptr val mem)
-(MOVBstore  [off1] {sym} (ADDconst [off2] ptr) val mem) && is20Bit(off1+off2) -> (MOVBstore  [off1+off2] {sym} ptr val mem)
-(FMOVSstore [off1] {sym} (ADDconst [off2] ptr) val mem) && is20Bit(off1+off2) -> (FMOVSstore [off1+off2] {sym} ptr val mem)
-(FMOVDstore [off1] {sym} (ADDconst [off2] ptr) val mem) && is20Bit(off1+off2) -> (FMOVDstore [off1+off2] {sym} ptr val mem)
+(MOVDstore  [off1] {sym} (ADDconst [off2] ptr) val mem) && is20Bit(int64(off1)+int64(off2)) => (MOVDstore  [off1+off2] {sym} ptr val mem)
+(MOVWstore  [off1] {sym} (ADDconst [off2] ptr) val mem) && is20Bit(int64(off1)+int64(off2)) => (MOVWstore  [off1+off2] {sym} ptr val mem)
+(MOVHstore  [off1] {sym} (ADDconst [off2] ptr) val mem) && is20Bit(int64(off1)+int64(off2)) => (MOVHstore  [off1+off2] {sym} ptr val mem)
+(MOVBstore  [off1] {sym} (ADDconst [off2] ptr) val mem) && is20Bit(int64(off1)+int64(off2)) => (MOVBstore  [off1+off2] {sym} ptr val mem)
+(FMOVSstore [off1] {sym} (ADDconst [off2] ptr) val mem) && is20Bit(int64(off1)+int64(off2)) => (FMOVSstore [off1+off2] {sym} ptr val mem)
+(FMOVDstore [off1] {sym} (ADDconst [off2] ptr) val mem) && is20Bit(int64(off1)+int64(off2)) => (FMOVDstore [off1+off2] {sym} ptr val mem)
 
-(ADDload   [off1] {sym} x (ADDconst [off2] ptr) mem) && ptr.Op != OpSB && is20Bit(off1+off2) -> (ADDload   [off1+off2] {sym} x ptr mem)
-(ADDWload  [off1] {sym} x (ADDconst [off2] ptr) mem) && ptr.Op != OpSB && is20Bit(off1+off2) -> (ADDWload  [off1+off2] {sym} x ptr mem)
-(MULLDload [off1] {sym} x (ADDconst [off2] ptr) mem) && ptr.Op != OpSB && is20Bit(off1+off2) -> (MULLDload [off1+off2] {sym} x ptr mem)
-(MULLWload [off1] {sym} x (ADDconst [off2] ptr) mem) && ptr.Op != OpSB && is20Bit(off1+off2) -> (MULLWload [off1+off2] {sym} x ptr mem)
-(SUBload   [off1] {sym} x (ADDconst [off2] ptr) mem) && ptr.Op != OpSB && is20Bit(off1+off2) -> (SUBload   [off1+off2] {sym} x ptr mem)
-(SUBWload  [off1] {sym} x (ADDconst [off2] ptr) mem) && ptr.Op != OpSB && is20Bit(off1+off2) -> (SUBWload  [off1+off2] {sym} x ptr mem)
+(ADDload   [off1] {sym} x (ADDconst [off2] ptr) mem) && ptr.Op != OpSB && is20Bit(int64(off1)+int64(off2)) => (ADDload   [off1+off2] {sym} x ptr mem)
+(ADDWload  [off1] {sym} x (ADDconst [off2] ptr) mem) && ptr.Op != OpSB && is20Bit(int64(off1)+int64(off2)) => (ADDWload  [off1+off2] {sym} x ptr mem)
+(MULLDload [off1] {sym} x (ADDconst [off2] ptr) mem) && ptr.Op != OpSB && is20Bit(int64(off1)+int64(off2)) => (MULLDload [off1+off2] {sym} x ptr mem)
+(MULLWload [off1] {sym} x (ADDconst [off2] ptr) mem) && ptr.Op != OpSB && is20Bit(int64(off1)+int64(off2)) => (MULLWload [off1+off2] {sym} x ptr mem)
+(SUBload   [off1] {sym} x (ADDconst [off2] ptr) mem) && ptr.Op != OpSB && is20Bit(int64(off1)+int64(off2)) => (SUBload   [off1+off2] {sym} x ptr mem)
+(SUBWload  [off1] {sym} x (ADDconst [off2] ptr) mem) && ptr.Op != OpSB && is20Bit(int64(off1)+int64(off2)) => (SUBWload  [off1+off2] {sym} x ptr mem)
 
-(ANDload   [off1] {sym} x (ADDconst [off2] ptr) mem) && ptr.Op != OpSB && is20Bit(off1+off2) -> (ANDload   [off1+off2] {sym} x ptr mem)
-(ANDWload  [off1] {sym} x (ADDconst [off2] ptr) mem) && ptr.Op != OpSB && is20Bit(off1+off2) -> (ANDWload  [off1+off2] {sym} x ptr mem)
-(ORload    [off1] {sym} x (ADDconst [off2] ptr) mem) && ptr.Op != OpSB && is20Bit(off1+off2) -> (ORload    [off1+off2] {sym} x ptr mem)
-(ORWload   [off1] {sym} x (ADDconst [off2] ptr) mem) && ptr.Op != OpSB && is20Bit(off1+off2) -> (ORWload   [off1+off2] {sym} x ptr mem)
-(XORload   [off1] {sym} x (ADDconst [off2] ptr) mem) && ptr.Op != OpSB && is20Bit(off1+off2) -> (XORload   [off1+off2] {sym} x ptr mem)
-(XORWload  [off1] {sym} x (ADDconst [off2] ptr) mem) && ptr.Op != OpSB && is20Bit(off1+off2) -> (XORWload  [off1+off2] {sym} x ptr mem)
+(ANDload   [off1] {sym} x (ADDconst [off2] ptr) mem) && ptr.Op != OpSB && is20Bit(int64(off1)+int64(off2)) => (ANDload   [off1+off2] {sym} x ptr mem)
+(ANDWload  [off1] {sym} x (ADDconst [off2] ptr) mem) && ptr.Op != OpSB && is20Bit(int64(off1)+int64(off2)) => (ANDWload  [off1+off2] {sym} x ptr mem)
+(ORload    [off1] {sym} x (ADDconst [off2] ptr) mem) && ptr.Op != OpSB && is20Bit(int64(off1)+int64(off2)) => (ORload    [off1+off2] {sym} x ptr mem)
+(ORWload   [off1] {sym} x (ADDconst [off2] ptr) mem) && ptr.Op != OpSB && is20Bit(int64(off1)+int64(off2)) => (ORWload   [off1+off2] {sym} x ptr mem)
+(XORload   [off1] {sym} x (ADDconst [off2] ptr) mem) && ptr.Op != OpSB && is20Bit(int64(off1)+int64(off2)) => (XORload   [off1+off2] {sym} x ptr mem)
+(XORWload  [off1] {sym} x (ADDconst [off2] ptr) mem) && ptr.Op != OpSB && is20Bit(int64(off1)+int64(off2)) => (XORWload  [off1+off2] {sym} x ptr mem)
 
 // Fold constants into stores.
-(MOVDstore [off] {sym} ptr (MOVDconst [c]) mem) && is16Bit(c) && isU12Bit(off) && ptr.Op != OpSB ->
-	(MOVDstoreconst [makeValAndOff(c,off)] {sym} ptr mem)
-(MOVWstore [off] {sym} ptr (MOVDconst [c]) mem) && is16Bit(c) && isU12Bit(off) && ptr.Op != OpSB ->
-	(MOVWstoreconst [makeValAndOff(int64(int32(c)),off)] {sym} ptr mem)
-(MOVHstore [off] {sym} ptr (MOVDconst [c]) mem) && isU12Bit(off) && ptr.Op != OpSB ->
-	(MOVHstoreconst [makeValAndOff(int64(int16(c)),off)] {sym} ptr mem)
-(MOVBstore [off] {sym} ptr (MOVDconst [c]) mem) && is20Bit(off) && ptr.Op != OpSB ->
-	(MOVBstoreconst [makeValAndOff(int64(int8(c)),off)] {sym} ptr mem)
+(MOVDstore [off] {sym} ptr (MOVDconst [c]) mem) && is16Bit(c) && isU12Bit(int64(off)) && ptr.Op != OpSB =>
+	(MOVDstoreconst [makeValAndOff32(int32(c),off)] {sym} ptr mem)
+(MOVWstore [off] {sym} ptr (MOVDconst [c]) mem) && is16Bit(c) && isU12Bit(int64(off)) && ptr.Op != OpSB =>
+	(MOVWstoreconst [makeValAndOff32(int32(c),off)] {sym} ptr mem)
+(MOVHstore [off] {sym} ptr (MOVDconst [c]) mem) && isU12Bit(int64(off)) && ptr.Op != OpSB =>
+	(MOVHstoreconst [makeValAndOff32(int32(int16(c)),off)] {sym} ptr mem)
+(MOVBstore [off] {sym} ptr (MOVDconst [c]) mem) && is20Bit(int64(off)) && ptr.Op != OpSB =>
+	(MOVBstoreconst [makeValAndOff32(int32(int8(c)),off)] {sym} ptr mem)
 
 // Fold address offsets into constant stores.
-(MOVDstoreconst [sc] {s} (ADDconst [off] ptr) mem) && isU12Bit(ValAndOff(sc).Off()+off) ->
-	(MOVDstoreconst [ValAndOff(sc).add(off)] {s} ptr mem)
-(MOVWstoreconst [sc] {s} (ADDconst [off] ptr) mem) && isU12Bit(ValAndOff(sc).Off()+off) ->
-	(MOVWstoreconst [ValAndOff(sc).add(off)] {s} ptr mem)
-(MOVHstoreconst [sc] {s} (ADDconst [off] ptr) mem) && isU12Bit(ValAndOff(sc).Off()+off) ->
-	(MOVHstoreconst [ValAndOff(sc).add(off)] {s} ptr mem)
-(MOVBstoreconst [sc] {s} (ADDconst [off] ptr) mem) && is20Bit(ValAndOff(sc).Off()+off) ->
-	(MOVBstoreconst [ValAndOff(sc).add(off)] {s} ptr mem)
+(MOVDstoreconst [sc] {s} (ADDconst [off] ptr) mem) && isU12Bit(sc.Off()+int64(off)) =>
+	(MOVDstoreconst [sc.addOffset32(off)] {s} ptr mem)
+(MOVWstoreconst [sc] {s} (ADDconst [off] ptr) mem) && isU12Bit(sc.Off()+int64(off)) =>
+	(MOVWstoreconst [sc.addOffset32(off)] {s} ptr mem)
+(MOVHstoreconst [sc] {s} (ADDconst [off] ptr) mem) && isU12Bit(sc.Off()+int64(off)) =>
+	(MOVHstoreconst [sc.addOffset32(off)] {s} ptr mem)
+(MOVBstoreconst [sc] {s} (ADDconst [off] ptr) mem) && is20Bit(sc.Off()+int64(off)) =>
+	(MOVBstoreconst [sc.addOffset32(off)] {s} ptr mem)
 
 // Merge address calculations into loads and stores.
 // Offsets from SB must not be merged into unaligned memory accesses because
 // loads/stores using PC-relative addressing directly must be aligned to the
 // size of the target.
-(MOVDload   [off1] {sym1} (MOVDaddr <t> [off2] {sym2} base) mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || (t.IsPtr() && t.Elem().Alignment()%8 == 0 && (off1+off2)%8 == 0)) ->
-	(MOVDload  [off1+off2] {mergeSym(sym1,sym2)} base mem)
-(MOVWZload  [off1] {sym1} (MOVDaddr <t> [off2] {sym2} base) mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || (t.IsPtr() && t.Elem().Alignment()%4 == 0 && (off1+off2)%4 == 0)) ->
-	(MOVWZload  [off1+off2] {mergeSym(sym1,sym2)} base mem)
-(MOVHZload  [off1] {sym1} (MOVDaddr <t> [off2] {sym2} base) mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || (t.IsPtr() && t.Elem().Alignment()%2 == 0 && (off1+off2)%2 == 0)) ->
-	(MOVHZload  [off1+off2] {mergeSym(sym1,sym2)} base mem)
-(MOVBZload  [off1] {sym1} (MOVDaddr [off2] {sym2} base) mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
-	(MOVBZload  [off1+off2] {mergeSym(sym1,sym2)} base mem)
-(FMOVSload [off1] {sym1} (MOVDaddr [off2] {sym2} base) mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
-	(FMOVSload [off1+off2] {mergeSym(sym1,sym2)} base mem)
-(FMOVDload [off1] {sym1} (MOVDaddr [off2] {sym2} base) mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
-	(FMOVDload [off1+off2] {mergeSym(sym1,sym2)} base mem)
+(MOVDload   [off1] {sym1} (MOVDaddr <t> [off2] {sym2} base) mem) && is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) && (base.Op != OpSB || (t.IsPtr() && t.Elem().Alignment()%8 == 0 && (off1+off2)%8 == 0)) =>
+	(MOVDload  [off1+off2] {mergeSymTyped(sym1,sym2)} base mem)
+(MOVWZload  [off1] {sym1} (MOVDaddr <t> [off2] {sym2} base) mem) && is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) && (base.Op != OpSB || (t.IsPtr() && t.Elem().Alignment()%4 == 0 && (off1+off2)%4 == 0)) =>
+	(MOVWZload  [off1+off2] {mergeSymTyped(sym1,sym2)} base mem)
+(MOVHZload  [off1] {sym1} (MOVDaddr <t> [off2] {sym2} base) mem) && is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) && (base.Op != OpSB || (t.IsPtr() && t.Elem().Alignment()%2 == 0 && (off1+off2)%2 == 0)) =>
+	(MOVHZload  [off1+off2] {mergeSymTyped(sym1,sym2)} base mem)
+(MOVBZload  [off1] {sym1} (MOVDaddr [off2] {sym2} base) mem) && is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) =>
+	(MOVBZload  [off1+off2] {mergeSymTyped(sym1,sym2)} base mem)
+(FMOVSload [off1] {sym1} (MOVDaddr [off2] {sym2} base) mem) && is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) =>
+	(FMOVSload [off1+off2] {mergeSymTyped(sym1,sym2)} base mem)
+(FMOVDload [off1] {sym1} (MOVDaddr [off2] {sym2} base) mem) && is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) =>
+	(FMOVDload [off1+off2] {mergeSymTyped(sym1,sym2)} base mem)
 
-(MOVWload [off1] {sym1} (MOVDaddr <t> [off2] {sym2} base) mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || (t.IsPtr() && t.Elem().Alignment()%4 == 0 && (off1+off2)%4 == 0)) ->
-	(MOVWload [off1+off2] {mergeSym(sym1,sym2)} base mem)
-(MOVHload [off1] {sym1} (MOVDaddr <t> [off2] {sym2} base) mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || (t.IsPtr() && t.Elem().Alignment()%2 == 0 && (off1+off2)%2 == 0)) ->
-	(MOVHload [off1+off2] {mergeSym(sym1,sym2)} base mem)
-(MOVBload [off1] {sym1} (MOVDaddr [off2] {sym2} base) mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
-	(MOVBload [off1+off2] {mergeSym(sym1,sym2)} base mem)
+(MOVWload [off1] {sym1} (MOVDaddr <t> [off2] {sym2} base) mem) && is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) && (base.Op != OpSB || (t.IsPtr() && t.Elem().Alignment()%4 == 0 && (off1+off2)%4 == 0)) =>
+	(MOVWload [off1+off2] {mergeSymTyped(sym1,sym2)} base mem)
+(MOVHload [off1] {sym1} (MOVDaddr <t> [off2] {sym2} base) mem) && is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) && (base.Op != OpSB || (t.IsPtr() && t.Elem().Alignment()%2 == 0 && (off1+off2)%2 == 0)) =>
+	(MOVHload [off1+off2] {mergeSymTyped(sym1,sym2)} base mem)
+(MOVBload [off1] {sym1} (MOVDaddr [off2] {sym2} base) mem) && is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) =>
+	(MOVBload [off1+off2] {mergeSymTyped(sym1,sym2)} base mem)
 
-(MOVDstore  [off1] {sym1} (MOVDaddr <t> [off2] {sym2} base) val mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || (t.IsPtr() && t.Elem().Alignment()%8 == 0 && (off1+off2)%8 == 0)) ->
-	(MOVDstore  [off1+off2] {mergeSym(sym1,sym2)} base val mem)
-(MOVWstore  [off1] {sym1} (MOVDaddr <t> [off2] {sym2} base) val mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || (t.IsPtr() && t.Elem().Alignment()%4 == 0 && (off1+off2)%4 == 0)) ->
-	(MOVWstore  [off1+off2] {mergeSym(sym1,sym2)} base val mem)
-(MOVHstore  [off1] {sym1} (MOVDaddr <t> [off2] {sym2} base) val mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || (t.IsPtr() && t.Elem().Alignment()%2 == 0 && (off1+off2)%2 == 0)) ->
-	(MOVHstore  [off1+off2] {mergeSym(sym1,sym2)} base val mem)
-(MOVBstore  [off1] {sym1} (MOVDaddr [off2] {sym2} base) val mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
-	(MOVBstore  [off1+off2] {mergeSym(sym1,sym2)} base val mem)
-(FMOVSstore [off1] {sym1} (MOVDaddr [off2] {sym2} base) val mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
-	(FMOVSstore [off1+off2] {mergeSym(sym1,sym2)} base val mem)
-(FMOVDstore [off1] {sym1} (MOVDaddr [off2] {sym2} base) val mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
-	(FMOVDstore [off1+off2] {mergeSym(sym1,sym2)} base val mem)
+(MOVDstore  [off1] {sym1} (MOVDaddr <t> [off2] {sym2} base) val mem) && is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) && (base.Op != OpSB || (t.IsPtr() && t.Elem().Alignment()%8 == 0 && (off1+off2)%8 == 0)) =>
+	(MOVDstore  [off1+off2] {mergeSymTyped(sym1,sym2)} base val mem)
+(MOVWstore  [off1] {sym1} (MOVDaddr <t> [off2] {sym2} base) val mem) && is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) && (base.Op != OpSB || (t.IsPtr() && t.Elem().Alignment()%4 == 0 && (off1+off2)%4 == 0)) =>
+	(MOVWstore  [off1+off2] {mergeSymTyped(sym1,sym2)} base val mem)
+(MOVHstore  [off1] {sym1} (MOVDaddr <t> [off2] {sym2} base) val mem) && is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) && (base.Op != OpSB || (t.IsPtr() && t.Elem().Alignment()%2 == 0 && (off1+off2)%2 == 0)) =>
+	(MOVHstore  [off1+off2] {mergeSymTyped(sym1,sym2)} base val mem)
+(MOVBstore  [off1] {sym1} (MOVDaddr [off2] {sym2} base) val mem) && is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) =>
+	(MOVBstore  [off1+off2] {mergeSymTyped(sym1,sym2)} base val mem)
+(FMOVSstore [off1] {sym1} (MOVDaddr [off2] {sym2} base) val mem) && is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) =>
+	(FMOVSstore [off1+off2] {mergeSymTyped(sym1,sym2)} base val mem)
+(FMOVDstore [off1] {sym1} (MOVDaddr [off2] {sym2} base) val mem) && is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) =>
+	(FMOVDstore [off1+off2] {mergeSymTyped(sym1,sym2)} base val mem)
 
-(ADDload   [o1] {s1} x (MOVDaddr [o2] {s2} ptr) mem) && ptr.Op != OpSB && is20Bit(o1+o2) && canMergeSym(s1, s2) -> (ADDload   [o1+o2] {mergeSym(s1, s2)} x ptr mem)
-(ADDWload  [o1] {s1} x (MOVDaddr [o2] {s2} ptr) mem) && ptr.Op != OpSB && is20Bit(o1+o2) && canMergeSym(s1, s2) -> (ADDWload  [o1+o2] {mergeSym(s1, s2)} x ptr mem)
-(MULLDload [o1] {s1} x (MOVDaddr [o2] {s2} ptr) mem) && ptr.Op != OpSB && is20Bit(o1+o2) && canMergeSym(s1, s2) -> (MULLDload [o1+o2] {mergeSym(s1, s2)} x ptr mem)
-(MULLWload [o1] {s1} x (MOVDaddr [o2] {s2} ptr) mem) && ptr.Op != OpSB && is20Bit(o1+o2) && canMergeSym(s1, s2) -> (MULLWload [o1+o2] {mergeSym(s1, s2)} x ptr mem)
-(SUBload   [o1] {s1} x (MOVDaddr [o2] {s2} ptr) mem) && ptr.Op != OpSB && is20Bit(o1+o2) && canMergeSym(s1, s2) -> (SUBload   [o1+o2] {mergeSym(s1, s2)} x ptr mem)
-(SUBWload  [o1] {s1} x (MOVDaddr [o2] {s2} ptr) mem) && ptr.Op != OpSB && is20Bit(o1+o2) && canMergeSym(s1, s2) -> (SUBWload  [o1+o2] {mergeSym(s1, s2)} x ptr mem)
+(ADDload   [o1] {s1} x (MOVDaddr [o2] {s2} ptr) mem) && ptr.Op != OpSB && is20Bit(int64(o1)+int64(o2)) && canMergeSym(s1, s2) => (ADDload   [o1+o2] {mergeSymTyped(s1, s2)} x ptr mem)
+(ADDWload  [o1] {s1} x (MOVDaddr [o2] {s2} ptr) mem) && ptr.Op != OpSB && is20Bit(int64(o1)+int64(o2)) && canMergeSym(s1, s2) => (ADDWload  [o1+o2] {mergeSymTyped(s1, s2)} x ptr mem)
+(MULLDload [o1] {s1} x (MOVDaddr [o2] {s2} ptr) mem) && ptr.Op != OpSB && is20Bit(int64(o1)+int64(o2)) && canMergeSym(s1, s2) => (MULLDload [o1+o2] {mergeSymTyped(s1, s2)} x ptr mem)
+(MULLWload [o1] {s1} x (MOVDaddr [o2] {s2} ptr) mem) && ptr.Op != OpSB && is20Bit(int64(o1)+int64(o2)) && canMergeSym(s1, s2) => (MULLWload [o1+o2] {mergeSymTyped(s1, s2)} x ptr mem)
+(SUBload   [o1] {s1} x (MOVDaddr [o2] {s2} ptr) mem) && ptr.Op != OpSB && is20Bit(int64(o1)+int64(o2)) && canMergeSym(s1, s2) => (SUBload   [o1+o2] {mergeSymTyped(s1, s2)} x ptr mem)
+(SUBWload  [o1] {s1} x (MOVDaddr [o2] {s2} ptr) mem) && ptr.Op != OpSB && is20Bit(int64(o1)+int64(o2)) && canMergeSym(s1, s2) => (SUBWload  [o1+o2] {mergeSymTyped(s1, s2)} x ptr mem)
 
-(ANDload   [o1] {s1} x (MOVDaddr [o2] {s2} ptr) mem) && ptr.Op != OpSB && is20Bit(o1+o2) && canMergeSym(s1, s2) -> (ANDload   [o1+o2] {mergeSym(s1, s2)} x ptr mem)
-(ANDWload  [o1] {s1} x (MOVDaddr [o2] {s2} ptr) mem) && ptr.Op != OpSB && is20Bit(o1+o2) && canMergeSym(s1, s2) -> (ANDWload  [o1+o2] {mergeSym(s1, s2)} x ptr mem)
-(ORload    [o1] {s1} x (MOVDaddr [o2] {s2} ptr) mem) && ptr.Op != OpSB && is20Bit(o1+o2) && canMergeSym(s1, s2) -> (ORload    [o1+o2] {mergeSym(s1, s2)} x ptr mem)
-(ORWload   [o1] {s1} x (MOVDaddr [o2] {s2} ptr) mem) && ptr.Op != OpSB && is20Bit(o1+o2) && canMergeSym(s1, s2) -> (ORWload   [o1+o2] {mergeSym(s1, s2)} x ptr mem)
-(XORload   [o1] {s1} x (MOVDaddr [o2] {s2} ptr) mem) && ptr.Op != OpSB && is20Bit(o1+o2) && canMergeSym(s1, s2) -> (XORload   [o1+o2] {mergeSym(s1, s2)} x ptr mem)
-(XORWload  [o1] {s1} x (MOVDaddr [o2] {s2} ptr) mem) && ptr.Op != OpSB && is20Bit(o1+o2) && canMergeSym(s1, s2) -> (XORWload  [o1+o2] {mergeSym(s1, s2)} x ptr mem)
+(ANDload   [o1] {s1} x (MOVDaddr [o2] {s2} ptr) mem) && ptr.Op != OpSB && is20Bit(int64(o1)+int64(o2)) && canMergeSym(s1, s2) => (ANDload   [o1+o2] {mergeSymTyped(s1, s2)} x ptr mem)
+(ANDWload  [o1] {s1} x (MOVDaddr [o2] {s2} ptr) mem) && ptr.Op != OpSB && is20Bit(int64(o1)+int64(o2)) && canMergeSym(s1, s2) => (ANDWload  [o1+o2] {mergeSymTyped(s1, s2)} x ptr mem)
+(ORload    [o1] {s1} x (MOVDaddr [o2] {s2} ptr) mem) && ptr.Op != OpSB && is20Bit(int64(o1)+int64(o2)) && canMergeSym(s1, s2) => (ORload    [o1+o2] {mergeSymTyped(s1, s2)} x ptr mem)
+(ORWload   [o1] {s1} x (MOVDaddr [o2] {s2} ptr) mem) && ptr.Op != OpSB && is20Bit(int64(o1)+int64(o2)) && canMergeSym(s1, s2) => (ORWload   [o1+o2] {mergeSymTyped(s1, s2)} x ptr mem)
+(XORload   [o1] {s1} x (MOVDaddr [o2] {s2} ptr) mem) && ptr.Op != OpSB && is20Bit(int64(o1)+int64(o2)) && canMergeSym(s1, s2) => (XORload   [o1+o2] {mergeSymTyped(s1, s2)} x ptr mem)
+(XORWload  [o1] {s1} x (MOVDaddr [o2] {s2} ptr) mem) && ptr.Op != OpSB && is20Bit(int64(o1)+int64(o2)) && canMergeSym(s1, s2) => (XORWload  [o1+o2] {mergeSymTyped(s1, s2)} x ptr mem)
 
 // Cannot store constant to SB directly (no 'move relative long immediate' instructions).
-(MOVDstoreconst [sc] {sym1} (MOVDaddr [off] {sym2} ptr) mem) && ptr.Op != OpSB && canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd(off) ->
-	(MOVDstoreconst [ValAndOff(sc).add(off)] {mergeSym(sym1, sym2)} ptr mem)
-(MOVWstoreconst [sc] {sym1} (MOVDaddr [off] {sym2} ptr) mem) && ptr.Op != OpSB && canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd(off) ->
-	(MOVWstoreconst [ValAndOff(sc).add(off)] {mergeSym(sym1, sym2)} ptr mem)
-(MOVHstoreconst [sc] {sym1} (MOVDaddr [off] {sym2} ptr) mem) && ptr.Op != OpSB && canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd(off) ->
-	(MOVHstoreconst [ValAndOff(sc).add(off)] {mergeSym(sym1, sym2)} ptr mem)
-(MOVBstoreconst [sc] {sym1} (MOVDaddr [off] {sym2} ptr) mem) && ptr.Op != OpSB && canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd(off) ->
-	(MOVBstoreconst [ValAndOff(sc).add(off)] {mergeSym(sym1, sym2)} ptr mem)
-
-// generating indexed loads and stores
-(MOVBZload [off1] {sym1} (MOVDaddridx [off2] {sym2} ptr idx) mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
-	(MOVBZloadidx [off1+off2] {mergeSym(sym1,sym2)} ptr idx mem)
-(MOVBload [off1] {sym1} (MOVDaddridx [off2] {sym2} ptr idx) mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
-	(MOVBloadidx [off1+off2] {mergeSym(sym1,sym2)} ptr idx mem)
-(MOVHZload [off1] {sym1} (MOVDaddridx [off2] {sym2} ptr idx) mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
-	(MOVHZloadidx [off1+off2] {mergeSym(sym1,sym2)} ptr idx mem)
-(MOVHload [off1] {sym1} (MOVDaddridx [off2] {sym2} ptr idx) mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
-	(MOVHloadidx [off1+off2] {mergeSym(sym1,sym2)} ptr idx mem)
-(MOVWZload [off1] {sym1} (MOVDaddridx [off2] {sym2} ptr idx) mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
-	(MOVWZloadidx [off1+off2] {mergeSym(sym1,sym2)} ptr idx mem)
-(MOVWload [off1] {sym1} (MOVDaddridx [off2] {sym2} ptr idx) mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
-	(MOVWloadidx [off1+off2] {mergeSym(sym1,sym2)} ptr idx mem)
-(MOVDload [off1] {sym1} (MOVDaddridx [off2] {sym2} ptr idx) mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
-	(MOVDloadidx [off1+off2] {mergeSym(sym1,sym2)} ptr idx mem)
-(FMOVSload [off1] {sym1} (MOVDaddridx [off2] {sym2} ptr idx) mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
-	(FMOVSloadidx [off1+off2] {mergeSym(sym1,sym2)} ptr idx mem)
-(FMOVDload [off1] {sym1} (MOVDaddridx [off2] {sym2} ptr idx) mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
-	(FMOVDloadidx [off1+off2] {mergeSym(sym1,sym2)} ptr idx mem)
-
-(MOVBstore [off1] {sym1} (MOVDaddridx [off2] {sym2} ptr idx) val mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
-	(MOVBstoreidx [off1+off2] {mergeSym(sym1,sym2)} ptr idx val mem)
-(MOVHstore [off1] {sym1} (MOVDaddridx [off2] {sym2} ptr idx) val mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
-	(MOVHstoreidx [off1+off2] {mergeSym(sym1,sym2)} ptr idx val mem)
-(MOVWstore [off1] {sym1} (MOVDaddridx [off2] {sym2} ptr idx) val mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
-	(MOVWstoreidx [off1+off2] {mergeSym(sym1,sym2)} ptr idx val mem)
-(MOVDstore [off1] {sym1} (MOVDaddridx [off2] {sym2} ptr idx) val mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
-	(MOVDstoreidx [off1+off2] {mergeSym(sym1,sym2)} ptr idx val mem)
-(FMOVSstore [off1] {sym1} (MOVDaddridx [off2] {sym2} ptr idx) val mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
-	(FMOVSstoreidx [off1+off2] {mergeSym(sym1,sym2)} ptr idx val mem)
-(FMOVDstore [off1] {sym1} (MOVDaddridx [off2] {sym2} ptr idx) val mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
-	(FMOVDstoreidx [off1+off2] {mergeSym(sym1,sym2)} ptr idx val mem)
-
-(MOVBZload [off] {sym} (ADD ptr idx) mem) && ptr.Op != OpSB => (MOVBZloadidx [off] {sym} ptr idx mem)
-(MOVBload  [off] {sym} (ADD ptr idx) mem) && ptr.Op != OpSB => (MOVBloadidx  [off] {sym} ptr idx mem)
-(MOVHZload [off] {sym} (ADD ptr idx) mem) && ptr.Op != OpSB => (MOVHZloadidx [off] {sym} ptr idx mem)
-(MOVHload  [off] {sym} (ADD ptr idx) mem) && ptr.Op != OpSB => (MOVHloadidx  [off] {sym} ptr idx mem)
-(MOVWZload [off] {sym} (ADD ptr idx) mem) && ptr.Op != OpSB => (MOVWZloadidx [off] {sym} ptr idx mem)
-(MOVWload  [off] {sym} (ADD ptr idx) mem) && ptr.Op != OpSB => (MOVWloadidx  [off] {sym} ptr idx mem)
-(MOVDload  [off] {sym} (ADD ptr idx) mem) && ptr.Op != OpSB => (MOVDloadidx  [off] {sym} ptr idx mem)
-(FMOVSload [off] {sym} (ADD ptr idx) mem) && ptr.Op != OpSB => (FMOVSloadidx [off] {sym} ptr idx mem)
-(FMOVDload [off] {sym} (ADD ptr idx) mem) && ptr.Op != OpSB => (FMOVDloadidx [off] {sym} ptr idx mem)
-
-(MOVBstore  [off] {sym} (ADD ptr idx) val mem) && ptr.Op != OpSB => (MOVBstoreidx  [off] {sym} ptr idx val mem)
-(MOVHstore  [off] {sym} (ADD ptr idx) val mem) && ptr.Op != OpSB => (MOVHstoreidx  [off] {sym} ptr idx val mem)
-(MOVWstore  [off] {sym} (ADD ptr idx) val mem) && ptr.Op != OpSB => (MOVWstoreidx  [off] {sym} ptr idx val mem)
-(MOVDstore  [off] {sym} (ADD ptr idx) val mem) && ptr.Op != OpSB => (MOVDstoreidx  [off] {sym} ptr idx val mem)
-(FMOVSstore [off] {sym} (ADD ptr idx) val mem) && ptr.Op != OpSB => (FMOVSstoreidx [off] {sym} ptr idx val mem)
-(FMOVDstore [off] {sym} (ADD ptr idx) val mem) && ptr.Op != OpSB => (FMOVDstoreidx [off] {sym} ptr idx val mem)
-
-// combine ADD into indexed loads and stores
-(MOVBZloadidx [c] {sym} (ADDconst [d] ptr) idx mem) && is20Bit(c+d) -> (MOVBZloadidx [c+d] {sym} ptr idx mem)
-(MOVBloadidx  [c] {sym} (ADDconst [d] ptr) idx mem) && is20Bit(c+d) -> (MOVBloadidx  [c+d] {sym} ptr idx mem)
-(MOVHZloadidx [c] {sym} (ADDconst [d] ptr) idx mem) && is20Bit(c+d) -> (MOVHZloadidx [c+d] {sym} ptr idx mem)
-(MOVHloadidx  [c] {sym} (ADDconst [d] ptr) idx mem) && is20Bit(c+d) -> (MOVHloadidx  [c+d] {sym} ptr idx mem)
-(MOVWZloadidx [c] {sym} (ADDconst [d] ptr) idx mem) && is20Bit(c+d) -> (MOVWZloadidx [c+d] {sym} ptr idx mem)
-(MOVWloadidx  [c] {sym} (ADDconst [d] ptr) idx mem) && is20Bit(c+d) -> (MOVWloadidx  [c+d] {sym} ptr idx mem)
-(MOVDloadidx  [c] {sym} (ADDconst [d] ptr) idx mem) && is20Bit(c+d) -> (MOVDloadidx  [c+d] {sym} ptr idx mem)
-(FMOVSloadidx [c] {sym} (ADDconst [d] ptr) idx mem) && is20Bit(c+d) -> (FMOVSloadidx [c+d] {sym} ptr idx mem)
-(FMOVDloadidx [c] {sym} (ADDconst [d] ptr) idx mem) && is20Bit(c+d) -> (FMOVDloadidx [c+d] {sym} ptr idx mem)
-
-(MOVBstoreidx  [c] {sym} (ADDconst [d] ptr) idx val mem) && is20Bit(c+d) -> (MOVBstoreidx  [c+d] {sym} ptr idx val mem)
-(MOVHstoreidx  [c] {sym} (ADDconst [d] ptr) idx val mem) && is20Bit(c+d) -> (MOVHstoreidx  [c+d] {sym} ptr idx val mem)
-(MOVWstoreidx  [c] {sym} (ADDconst [d] ptr) idx val mem) && is20Bit(c+d) -> (MOVWstoreidx  [c+d] {sym} ptr idx val mem)
-(MOVDstoreidx  [c] {sym} (ADDconst [d] ptr) idx val mem) && is20Bit(c+d) -> (MOVDstoreidx  [c+d] {sym} ptr idx val mem)
-(FMOVSstoreidx [c] {sym} (ADDconst [d] ptr) idx val mem) && is20Bit(c+d) -> (FMOVSstoreidx [c+d] {sym} ptr idx val mem)
-(FMOVDstoreidx [c] {sym} (ADDconst [d] ptr) idx val mem) && is20Bit(c+d) -> (FMOVDstoreidx [c+d] {sym} ptr idx val mem)
-
-(MOVBZloadidx [c] {sym} ptr (ADDconst [d] idx) mem) && is20Bit(c+d) -> (MOVBZloadidx [c+d] {sym} ptr idx mem)
-(MOVBloadidx  [c] {sym} ptr (ADDconst [d] idx) mem) && is20Bit(c+d) -> (MOVBloadidx  [c+d] {sym} ptr idx mem)
-(MOVHZloadidx [c] {sym} ptr (ADDconst [d] idx) mem) && is20Bit(c+d) -> (MOVHZloadidx [c+d] {sym} ptr idx mem)
-(MOVHloadidx  [c] {sym} ptr (ADDconst [d] idx) mem) && is20Bit(c+d) -> (MOVHloadidx  [c+d] {sym} ptr idx mem)
-(MOVWZloadidx [c] {sym} ptr (ADDconst [d] idx) mem) && is20Bit(c+d) -> (MOVWZloadidx [c+d] {sym} ptr idx mem)
-(MOVWloadidx  [c] {sym} ptr (ADDconst [d] idx) mem) && is20Bit(c+d) -> (MOVWloadidx  [c+d] {sym} ptr idx mem)
-(MOVDloadidx  [c] {sym} ptr (ADDconst [d] idx) mem) && is20Bit(c+d) -> (MOVDloadidx  [c+d] {sym} ptr idx mem)
-(FMOVSloadidx [c] {sym} ptr (ADDconst [d] idx) mem) && is20Bit(c+d) -> (FMOVSloadidx [c+d] {sym} ptr idx mem)
-(FMOVDloadidx [c] {sym} ptr (ADDconst [d] idx) mem) && is20Bit(c+d) -> (FMOVDloadidx [c+d] {sym} ptr idx mem)
-
-(MOVBstoreidx  [c] {sym} ptr (ADDconst [d] idx) val mem) && is20Bit(c+d) -> (MOVBstoreidx  [c+d] {sym} ptr idx val mem)
-(MOVHstoreidx  [c] {sym} ptr (ADDconst [d] idx) val mem) && is20Bit(c+d) -> (MOVHstoreidx  [c+d] {sym} ptr idx val mem)
-(MOVWstoreidx  [c] {sym} ptr (ADDconst [d] idx) val mem) && is20Bit(c+d) -> (MOVWstoreidx  [c+d] {sym} ptr idx val mem)
-(MOVDstoreidx  [c] {sym} ptr (ADDconst [d] idx) val mem) && is20Bit(c+d) -> (MOVDstoreidx  [c+d] {sym} ptr idx val mem)
-(FMOVSstoreidx [c] {sym} ptr (ADDconst [d] idx) val mem) && is20Bit(c+d) -> (FMOVSstoreidx [c+d] {sym} ptr idx val mem)
-(FMOVDstoreidx [c] {sym} ptr (ADDconst [d] idx) val mem) && is20Bit(c+d) -> (FMOVDstoreidx [c+d] {sym} ptr idx val mem)
+(MOVDstoreconst [sc] {sym1} (MOVDaddr [off] {sym2} ptr) mem) && ptr.Op != OpSB && canMergeSym(sym1, sym2) && sc.canAdd32(off) =>
+	(MOVDstoreconst [sc.addOffset32(off)] {mergeSymTyped(sym1, sym2)} ptr mem)
+(MOVWstoreconst [sc] {sym1} (MOVDaddr [off] {sym2} ptr) mem) && ptr.Op != OpSB && canMergeSym(sym1, sym2) && sc.canAdd32(off) =>
+	(MOVWstoreconst [sc.addOffset32(off)] {mergeSymTyped(sym1, sym2)} ptr mem)
+(MOVHstoreconst [sc] {sym1} (MOVDaddr [off] {sym2} ptr) mem) && ptr.Op != OpSB && canMergeSym(sym1, sym2) && sc.canAdd32(off) =>
+	(MOVHstoreconst [sc.addOffset32(off)] {mergeSymTyped(sym1, sym2)} ptr mem)
+(MOVBstoreconst [sc] {sym1} (MOVDaddr [off] {sym2} ptr) mem) && ptr.Op != OpSB && canMergeSym(sym1, sym2) && sc.canAdd32(off) =>
+	(MOVBstoreconst [sc.addOffset32(off)] {mergeSymTyped(sym1, sym2)} ptr mem)
 
 // MOVDaddr into MOVDaddridx
-(MOVDaddridx [off1] {sym1} (MOVDaddr [off2] {sym2} x) y) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) && x.Op != OpSB ->
-       (MOVDaddridx [off1+off2] {mergeSym(sym1,sym2)} x y)
-(MOVDaddridx [off1] {sym1} x (MOVDaddr [off2] {sym2} y)) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) && y.Op != OpSB ->
-       (MOVDaddridx [off1+off2] {mergeSym(sym1,sym2)} x y)
+(MOVDaddridx [off1] {sym1} (MOVDaddr [off2] {sym2} x) y) && is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) && x.Op != OpSB =>
+       (MOVDaddridx [off1+off2] {mergeSymTyped(sym1,sym2)} x y)
+(MOVDaddridx [off1] {sym1} x (MOVDaddr [off2] {sym2} y)) && is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) && y.Op != OpSB =>
+       (MOVDaddridx [off1+off2] {mergeSymTyped(sym1,sym2)} x y)
 
 // Absorb InvertFlags into branches.
 (BRC {c} (InvertFlags cmp) yes no) => (BRC {c.ReverseComparison()} cmp yes no)
 
 // Constant comparisons.
-(CMPconst (MOVDconst [x]) [y]) && x==y -> (FlagEQ)
-(CMPconst (MOVDconst [x]) [y]) && x<y -> (FlagLT)
-(CMPconst (MOVDconst [x]) [y]) && x>y -> (FlagGT)
+(CMPconst (MOVDconst [x]) [y]) && x==int64(y) => (FlagEQ)
+(CMPconst (MOVDconst [x]) [y]) && x<int64(y) => (FlagLT)
+(CMPconst (MOVDconst [x]) [y]) && x>int64(y) => (FlagGT)
 (CMPUconst (MOVDconst [x]) [y]) && uint64(x)==uint64(y) => (FlagEQ)
 (CMPUconst (MOVDconst [x]) [y]) && uint64(x)<uint64(y) => (FlagLT)
 (CMPUconst (MOVDconst [x]) [y]) && uint64(x)>uint64(y) => (FlagGT)
@@ -1158,31 +1056,31 @@
 
 // Convert constant subtracts to constant adds.
 (SUBconst [c] x) && c != -(1<<31) => (ADDconst [-c] x)
-(SUBWconst [c] x) -> (ADDWconst [int64(int32(-c))] x)
+(SUBWconst [c] x) => (ADDWconst [-int32(c)] x)
 
 // generic constant folding
 // TODO: more of this
-(ADDconst [c] (MOVDconst [d])) -> (MOVDconst [c+d])
-(ADDWconst [c] (MOVDconst [d])) -> (MOVDconst [int64(int32(c+d))])
-(ADDconst [c] (ADDconst [d] x)) && is32Bit(c+d) -> (ADDconst [c+d] x)
-(ADDWconst [c] (ADDWconst [d] x)) -> (ADDWconst [int64(int32(c+d))] x)
-(SUBconst (MOVDconst [d]) [c]) -> (MOVDconst [d-c])
-(SUBconst (SUBconst x [d]) [c]) && is32Bit(-c-d) -> (ADDconst [-c-d] x)
+(ADDconst [c] (MOVDconst [d])) => (MOVDconst [int64(c)+d])
+(ADDWconst [c] (MOVDconst [d])) => (MOVDconst [int64(c)+d])
+(ADDconst [c] (ADDconst [d] x)) && is32Bit(int64(c)+int64(d)) => (ADDconst [c+d] x)
+(ADDWconst [c] (ADDWconst [d] x)) => (ADDWconst [int32(c+d)] x)
+(SUBconst (MOVDconst [d]) [c]) => (MOVDconst [d-int64(c)])
+(SUBconst (SUBconst x [d]) [c]) && is32Bit(-int64(c)-int64(d)) => (ADDconst [-c-d] x)
 (SRADconst [c] (MOVDconst [d])) => (MOVDconst [d>>uint64(c)])
 (SRAWconst [c] (MOVDconst [d])) => (MOVDconst [int64(int32(d))>>uint64(c)])
 (NEG (MOVDconst [c])) => (MOVDconst [-c])
 (NEGW (MOVDconst [c])) => (MOVDconst [int64(int32(-c))])
-(MULLDconst [c] (MOVDconst [d])) -> (MOVDconst [c*d])
-(MULLWconst [c] (MOVDconst [d])) -> (MOVDconst [int64(int32(c*d))])
+(MULLDconst [c] (MOVDconst [d])) => (MOVDconst [int64(c)*d])
+(MULLWconst [c] (MOVDconst [d])) => (MOVDconst [int64(c*int32(d))])
 (AND (MOVDconst [c]) (MOVDconst [d])) => (MOVDconst [c&d])
 (ANDconst [c] (MOVDconst [d])) => (MOVDconst [c&d])
-(ANDWconst [c] (MOVDconst [d])) -> (MOVDconst [c&d])
+(ANDWconst [c] (MOVDconst [d])) => (MOVDconst [int64(c)&d])
 (OR (MOVDconst [c]) (MOVDconst [d])) => (MOVDconst [c|d])
 (ORconst [c] (MOVDconst [d])) => (MOVDconst [c|d])
-(ORWconst [c] (MOVDconst [d])) -> (MOVDconst [c|d])
+(ORWconst [c] (MOVDconst [d])) => (MOVDconst [int64(c)|d])
 (XOR (MOVDconst [c]) (MOVDconst [d])) => (MOVDconst [c^d])
 (XORconst [c] (MOVDconst [d])) => (MOVDconst [c^d])
-(XORWconst [c] (MOVDconst [d])) -> (MOVDconst [c^d])
+(XORWconst [c] (MOVDconst [d])) => (MOVDconst [int64(c)^d])
 (LoweredRound32F x:(FMOVSconst)) => x
 (LoweredRound64F x:(FMOVDconst)) => x
 
@@ -1199,19 +1097,19 @@
 (XOR x x) => (MOVDconst [0])
 (XORW x x) => (MOVDconst [0])
 (NEG (ADDconst [c] (NEG x))) && c != -(1<<31) => (ADDconst [-c] x)
-(MOVBZreg (ANDWconst [m] x)) -> (MOVWZreg (ANDWconst <typ.UInt32> [int64( uint8(m))] x))
-(MOVHZreg (ANDWconst [m] x)) -> (MOVWZreg (ANDWconst <typ.UInt32> [int64(uint16(m))] x))
-(MOVBreg  (ANDWconst [m] x)) &&  int8(m) >= 0 -> (MOVWZreg (ANDWconst <typ.UInt32> [int64( uint8(m))] x))
-(MOVHreg  (ANDWconst [m] x)) && int16(m) >= 0 -> (MOVWZreg (ANDWconst <typ.UInt32> [int64(uint16(m))] x))
+(MOVBZreg (ANDWconst [m] x)) => (MOVWZreg (ANDWconst <typ.UInt32> [int32( uint8(m))] x))
+(MOVHZreg (ANDWconst [m] x)) => (MOVWZreg (ANDWconst <typ.UInt32> [int32(uint16(m))] x))
+(MOVBreg  (ANDWconst [m] x)) &&  int8(m) >= 0 => (MOVWZreg (ANDWconst <typ.UInt32> [int32( uint8(m))] x))
+(MOVHreg  (ANDWconst [m] x)) && int16(m) >= 0 => (MOVWZreg (ANDWconst <typ.UInt32> [int32(uint16(m))] x))
 
 // carry flag generation
 // (only constant fold carry of zero)
 (Select1 (ADDCconst (MOVDconst [c]) [d]))
-  && uint64(c+d) >= uint64(c) && c+d == 0
-  -> (FlagEQ)
+  && uint64(c+int64(d)) >= uint64(c) && c+int64(d) == 0
+  => (FlagEQ)
 (Select1 (ADDCconst (MOVDconst [c]) [d]))
-  && uint64(c+d) >= uint64(c) && c+d != 0
-  -> (FlagLT)
+  && uint64(c+int64(d)) >= uint64(c) && c+int64(d) != 0
+  => (FlagLT)
 
 // borrow flag generation
 // (only constant fold borrow of zero)
@@ -1225,8 +1123,8 @@
 // add with carry
 (ADDE x y (FlagEQ)) => (ADDC x y)
 (ADDE x y (FlagLT)) => (ADDC x y)
-(ADDC x (MOVDconst [c])) && is16Bit(c) -> (ADDCconst x [c])
-(Select0 (ADDCconst (MOVDconst [c]) [d])) -> (MOVDconst [c+d])
+(ADDC x (MOVDconst [c])) && is16Bit(c) => (ADDCconst x [int16(c)])
+(Select0 (ADDCconst (MOVDconst [c]) [d])) => (MOVDconst [c+int64(d)])
 
 // subtract with borrow
 (SUBE x y (FlagGT)) => (SUBC x y)
@@ -1256,14 +1154,12 @@
 (C(G|LG)IJ {s390x.Greater}       (NEG (Select0 (SUBE (MOVDconst [0]) (MOVDconst [0]) borrow))) [0]) => (BRC {s390x.Borrow}   borrow)
 
 // fused multiply-add
-(Select0 (F(ADD|SUB) (FMUL y z) x)) -> (FM(ADD|SUB) x y z)
-(Select0 (F(ADDS|SUBS) (FMULS y z) x)) -> (FM(ADDS|SUBS) x y z)
+(Select0 (F(ADD|SUB) (FMUL y z) x)) => (FM(ADD|SUB) x y z)
+(Select0 (F(ADDS|SUBS) (FMULS y z) x)) => (FM(ADDS|SUBS) x y z)
 
 // Convert floating point comparisons against zero into 'load and test' instructions.
-(FCMP x (FMOVDconst [c])) && auxTo64F(c) == 0 -> (LTDBR x)
-(FCMPS x (FMOVSconst [c])) && auxTo32F(c) == 0 -> (LTEBR x)
-(FCMP (FMOVDconst [c]) x) && auxTo64F(c) == 0 -> (InvertFlags (LTDBR <v.Type> x))
-(FCMPS (FMOVSconst [c]) x) && auxTo32F(c) == 0 -> (InvertFlags (LTEBR <v.Type> x))
+(F(CMP|CMPS) x (FMOV(D|S)const [0.0])) => (LT(D|E)BR x)
+(F(CMP|CMPS) (FMOV(D|S)const [0.0]) x) => (InvertFlags (LT(D|E)BR <v.Type> x))
 
 // FSUB, FSUBS, FADD, FADDS now produce a condition code representing the
 // comparison of the result with 0.0. If a compare with zero instruction
@@ -1274,30 +1170,30 @@
 // but moving the flag generating value to a different block seems to
 // increase the likelihood that the flags value will have to be regenerated
 // by flagalloc which is not what we want.
-(LTDBR (Select0 x:(F(ADD|SUB) _ _)))   && b == x.Block -> (Select1 x)
-(LTEBR (Select0 x:(F(ADDS|SUBS) _ _))) && b == x.Block -> (Select1 x)
+(LTDBR (Select0 x:(F(ADD|SUB) _ _)))   && b == x.Block => (Select1 x)
+(LTEBR (Select0 x:(F(ADDS|SUBS) _ _))) && b == x.Block => (Select1 x)
 
 // Fold memory operations into operations.
 // Exclude global data (SB) because these instructions cannot handle relative addresses.
 // TODO(mundaym): indexed versions of these?
 ((ADD|SUB|MULLD|AND|OR|XOR) <t> x g:(MOVDload [off] {sym} ptr mem))
   && ptr.Op != OpSB
-  && is20Bit(off)
+  && is20Bit(int64(off))
   && canMergeLoadClobber(v, g, x)
   && clobber(g)
-  -> ((ADD|SUB|MULLD|AND|OR|XOR)load <t> [off] {sym} x ptr mem)
+  => ((ADD|SUB|MULLD|AND|OR|XOR)load <t> [off] {sym} x ptr mem)
 ((ADD|SUB|MULL|AND|OR|XOR)W <t> x g:(MOVWload [off] {sym} ptr mem))
   && ptr.Op != OpSB
-  && is20Bit(off)
+  && is20Bit(int64(off))
   && canMergeLoadClobber(v, g, x)
   && clobber(g)
-  -> ((ADD|SUB|MULL|AND|OR|XOR)Wload <t> [off] {sym} x ptr mem)
+  => ((ADD|SUB|MULL|AND|OR|XOR)Wload <t> [off] {sym} x ptr mem)
 ((ADD|SUB|MULL|AND|OR|XOR)W <t> x g:(MOVWZload [off] {sym} ptr mem))
   && ptr.Op != OpSB
-  && is20Bit(off)
+  && is20Bit(int64(off))
   && canMergeLoadClobber(v, g, x)
   && clobber(g)
-  -> ((ADD|SUB|MULL|AND|OR|XOR)Wload <t> [off] {sym} x ptr mem)
+  => ((ADD|SUB|MULL|AND|OR|XOR)Wload <t> [off] {sym} x ptr mem)
 
 // Combine constant stores into larger (unaligned) stores.
 // Avoid SB because constant stores to relative offsets are
@@ -1305,21 +1201,21 @@
 (MOVBstoreconst [c] {s} p x:(MOVBstoreconst [a] {s} p mem))
   && p.Op != OpSB
   && x.Uses == 1
-  && ValAndOff(a).Off() + 1 == ValAndOff(c).Off()
+  && a.Off() + 1 == c.Off()
   && clobber(x)
-  -> (MOVHstoreconst [makeValAndOff(ValAndOff(c).Val()&0xff | ValAndOff(a).Val()<<8, ValAndOff(a).Off())] {s} p mem)
+  => (MOVHstoreconst [makeValAndOff32(c.Val32()&0xff | a.Val32()<<8, a.Off32())] {s} p mem)
 (MOVHstoreconst [c] {s} p x:(MOVHstoreconst [a] {s} p mem))
   && p.Op != OpSB
   && x.Uses == 1
-  && ValAndOff(a).Off() + 2 == ValAndOff(c).Off()
+  && a.Off() + 2 == c.Off()
   && clobber(x)
-  -> (MOVWstore [ValAndOff(a).Off()] {s} p (MOVDconst [int64(int32(ValAndOff(c).Val()&0xffff | ValAndOff(a).Val()<<16))]) mem)
+  => (MOVWstore [a.Off32()] {s} p (MOVDconst [int64(c.Val32()&0xffff | a.Val32()<<16)]) mem)
 (MOVWstoreconst [c] {s} p x:(MOVWstoreconst [a] {s} p mem))
   && p.Op != OpSB
   && x.Uses == 1
-  && ValAndOff(a).Off() + 4 == ValAndOff(c).Off()
+  && a.Off() + 4 == c.Off()
   && clobber(x)
-  -> (MOVDstore [ValAndOff(a).Off()] {s} p (MOVDconst [ValAndOff(c).Val()&0xffffffff | ValAndOff(a).Val()<<32]) mem)
+  => (MOVDstore [a.Off32()] {s} p (MOVDconst [c.Val()&0xffffffff | a.Val()<<32]) mem)
 
 // Combine stores into larger (unaligned) stores.
 // It doesn't work on global data (based on SB) because stores with relative addressing
@@ -1328,93 +1224,52 @@
   && p.Op != OpSB
   && x.Uses == 1
   && clobber(x)
-  -> (MOVHstore [i-1] {s} p w mem)
+  => (MOVHstore [i-1] {s} p w mem)
 (MOVBstore [i] {s} p w0:(SRDconst [j] w) x:(MOVBstore [i-1] {s} p (SRDconst [j+8] w) mem))
   && p.Op != OpSB
   && x.Uses == 1
   && clobber(x)
-  -> (MOVHstore [i-1] {s} p w0 mem)
+  => (MOVHstore [i-1] {s} p w0 mem)
 (MOVBstore [i] {s} p w x:(MOVBstore [i-1] {s} p (SRWconst [8] w) mem))
   && p.Op != OpSB
   && x.Uses == 1
   && clobber(x)
-  -> (MOVHstore [i-1] {s} p w mem)
+  => (MOVHstore [i-1] {s} p w mem)
 (MOVBstore [i] {s} p w0:(SRWconst [j] w) x:(MOVBstore [i-1] {s} p (SRWconst [j+8] w) mem))
   && p.Op != OpSB
   && x.Uses == 1
   && clobber(x)
-  -> (MOVHstore [i-1] {s} p w0 mem)
+  => (MOVHstore [i-1] {s} p w0 mem)
 (MOVHstore [i] {s} p w x:(MOVHstore [i-2] {s} p (SRDconst [16] w) mem))
   && p.Op != OpSB
   && x.Uses == 1
   && clobber(x)
-  -> (MOVWstore [i-2] {s} p w mem)
+  => (MOVWstore [i-2] {s} p w mem)
 (MOVHstore [i] {s} p w0:(SRDconst [j] w) x:(MOVHstore [i-2] {s} p (SRDconst [j+16] w) mem))
   && p.Op != OpSB
   && x.Uses == 1
   && clobber(x)
-  -> (MOVWstore [i-2] {s} p w0 mem)
+  => (MOVWstore [i-2] {s} p w0 mem)
 (MOVHstore [i] {s} p w x:(MOVHstore [i-2] {s} p (SRWconst [16] w) mem))
   && p.Op != OpSB
   && x.Uses == 1
   && clobber(x)
-  -> (MOVWstore [i-2] {s} p w mem)
+  => (MOVWstore [i-2] {s} p w mem)
 (MOVHstore [i] {s} p w0:(SRWconst [j] w) x:(MOVHstore [i-2] {s} p (SRWconst [j+16] w) mem))
   && p.Op != OpSB
   && x.Uses == 1
   && clobber(x)
-  -> (MOVWstore [i-2] {s} p w0 mem)
+  => (MOVWstore [i-2] {s} p w0 mem)
 (MOVWstore [i] {s} p (SRDconst [32] w) x:(MOVWstore [i-4] {s} p w mem))
   && p.Op != OpSB
   && x.Uses == 1
   && clobber(x)
-  -> (MOVDstore [i-4] {s} p w mem)
+  => (MOVDstore [i-4] {s} p w mem)
 (MOVWstore [i] {s} p w0:(SRDconst [j] w) x:(MOVWstore [i-4] {s} p (SRDconst [j+32] w) mem))
   && p.Op != OpSB
   && x.Uses == 1
   && clobber(x)
-  -> (MOVDstore [i-4] {s} p w0 mem)
-
-(MOVBstoreidx [i] {s} p idx w x:(MOVBstoreidx [i-1] {s} p idx (SRDconst [8] w) mem))
-  && x.Uses == 1
-  && clobber(x)
-  -> (MOVHstoreidx [i-1] {s} p idx w mem)
-(MOVBstoreidx [i] {s} p idx w0:(SRDconst [j] w) x:(MOVBstoreidx [i-1] {s} p idx (SRDconst [j+8] w) mem))
-  && x.Uses == 1
-  && clobber(x)
-  -> (MOVHstoreidx [i-1] {s} p idx w0 mem)
-(MOVBstoreidx [i] {s} p idx w x:(MOVBstoreidx [i-1] {s} p idx (SRWconst [8] w) mem))
-  && x.Uses == 1
-  && clobber(x)
-  -> (MOVHstoreidx [i-1] {s} p idx w mem)
-(MOVBstoreidx [i] {s} p idx w0:(SRWconst [j] w) x:(MOVBstoreidx [i-1] {s} p idx (SRWconst [j+8] w) mem))
-  && x.Uses == 1
-  && clobber(x)
-  -> (MOVHstoreidx [i-1] {s} p idx w0 mem)
-(MOVHstoreidx [i] {s} p idx w x:(MOVHstoreidx [i-2] {s} p idx (SRDconst [16] w) mem))
-  && x.Uses == 1
-  && clobber(x)
-  -> (MOVWstoreidx [i-2] {s} p idx w mem)
-(MOVHstoreidx [i] {s} p idx w0:(SRDconst [j] w) x:(MOVHstoreidx [i-2] {s} p idx (SRDconst [j+16] w) mem))
-  && x.Uses == 1
-  && clobber(x)
-  -> (MOVWstoreidx [i-2] {s} p idx w0 mem)
-(MOVHstoreidx [i] {s} p idx w x:(MOVHstoreidx [i-2] {s} p idx (SRWconst [16] w) mem))
-  && x.Uses == 1
-  && clobber(x)
-  -> (MOVWstoreidx [i-2] {s} p idx w mem)
-(MOVHstoreidx [i] {s} p idx w0:(SRWconst [j] w) x:(MOVHstoreidx [i-2] {s} p idx (SRWconst [j+16] w) mem))
-  && x.Uses == 1
-  && clobber(x)
-  -> (MOVWstoreidx [i-2] {s} p idx w0 mem)
-(MOVWstoreidx [i] {s} p idx w x:(MOVWstoreidx [i-4] {s} p idx (SRDconst [32] w) mem))
-  && x.Uses == 1
-  && clobber(x)
-  -> (MOVDstoreidx [i-4] {s} p idx w mem)
-(MOVWstoreidx [i] {s} p idx w0:(SRDconst [j] w) x:(MOVWstoreidx [i-4] {s} p idx (SRDconst [j+32] w) mem))
-  && x.Uses == 1
-  && clobber(x)
-  -> (MOVDstoreidx [i-4] {s} p idx w0 mem)
+  => (MOVDstore [i-4] {s} p w0 mem)
 
 // Combine stores into larger (unaligned) stores with the bytes reversed (little endian).
 // Store-with-bytes-reversed instructions do not support relative memory addresses,
@@ -1423,87 +1278,46 @@
   && p.Op != OpSB
   && x.Uses == 1
   && clobber(x)
-  -> (MOVHBRstore [i-1] {s} p w mem)
+  => (MOVHBRstore [i-1] {s} p w mem)
 (MOVBstore [i] {s} p (SRDconst [j] w) x:(MOVBstore [i-1] {s} p w0:(SRDconst [j-8] w) mem))
   && p.Op != OpSB
   && x.Uses == 1
   && clobber(x)
-  -> (MOVHBRstore [i-1] {s} p w0 mem)
+  => (MOVHBRstore [i-1] {s} p w0 mem)
 (MOVBstore [i] {s} p (SRWconst [8] w) x:(MOVBstore [i-1] {s} p w mem))
   && p.Op != OpSB
   && x.Uses == 1
   && clobber(x)
-  -> (MOVHBRstore [i-1] {s} p w mem)
+  => (MOVHBRstore [i-1] {s} p w mem)
 (MOVBstore [i] {s} p (SRWconst [j] w) x:(MOVBstore [i-1] {s} p w0:(SRWconst [j-8] w) mem))
   && p.Op != OpSB
   && x.Uses == 1
   && clobber(x)
-  -> (MOVHBRstore [i-1] {s} p w0 mem)
+  => (MOVHBRstore [i-1] {s} p w0 mem)
 (MOVHBRstore [i] {s} p (SRDconst [16] w) x:(MOVHBRstore [i-2] {s} p w mem))
   && x.Uses == 1
   && clobber(x)
-  -> (MOVWBRstore [i-2] {s} p w mem)
+  => (MOVWBRstore [i-2] {s} p w mem)
 (MOVHBRstore [i] {s} p (SRDconst [j] w) x:(MOVHBRstore [i-2] {s} p w0:(SRDconst [j-16] w) mem))
   && x.Uses == 1
   && clobber(x)
-  -> (MOVWBRstore [i-2] {s} p w0 mem)
+  => (MOVWBRstore [i-2] {s} p w0 mem)
 (MOVHBRstore [i] {s} p (SRWconst [16] w) x:(MOVHBRstore [i-2] {s} p w mem))
   && x.Uses == 1
   && clobber(x)
-  -> (MOVWBRstore [i-2] {s} p w mem)
+  => (MOVWBRstore [i-2] {s} p w mem)
 (MOVHBRstore [i] {s} p (SRWconst [j] w) x:(MOVHBRstore [i-2] {s} p w0:(SRWconst [j-16] w) mem))
   && x.Uses == 1
   && clobber(x)
-  -> (MOVWBRstore [i-2] {s} p w0 mem)
+  => (MOVWBRstore [i-2] {s} p w0 mem)
 (MOVWBRstore [i] {s} p (SRDconst [32] w) x:(MOVWBRstore [i-4] {s} p w mem))
   && x.Uses == 1
   && clobber(x)
-  -> (MOVDBRstore [i-4] {s} p w mem)
+  => (MOVDBRstore [i-4] {s} p w mem)
 (MOVWBRstore [i] {s} p (SRDconst [j] w) x:(MOVWBRstore [i-4] {s} p w0:(SRDconst [j-32] w) mem))
   && x.Uses == 1
   && clobber(x)
-  -> (MOVDBRstore [i-4] {s} p w0 mem)
-
-(MOVBstoreidx [i] {s} p idx (SRDconst [8] w) x:(MOVBstoreidx [i-1] {s} p idx w mem))
-  && x.Uses == 1
-  && clobber(x)
-  -> (MOVHBRstoreidx [i-1] {s} p idx w mem)
-(MOVBstoreidx [i] {s} p idx (SRDconst [j] w) x:(MOVBstoreidx [i-1] {s} p idx w0:(SRDconst [j-8] w) mem))
-  && x.Uses == 1
-  && clobber(x)
-  -> (MOVHBRstoreidx [i-1] {s} p idx w0 mem)
-(MOVBstoreidx [i] {s} p idx (SRWconst [8] w) x:(MOVBstoreidx [i-1] {s} p idx w mem))
-  && x.Uses == 1
-  && clobber(x)
-  -> (MOVHBRstoreidx [i-1] {s} p idx w mem)
-(MOVBstoreidx [i] {s} p idx (SRWconst [j] w) x:(MOVBstoreidx [i-1] {s} p idx w0:(SRWconst [j-8] w) mem))
-  && x.Uses == 1
-  && clobber(x)
-  -> (MOVHBRstoreidx [i-1] {s} p idx w0 mem)
-(MOVHBRstoreidx [i] {s} p idx (SRDconst [16] w) x:(MOVHBRstoreidx [i-2] {s} p idx w mem))
-  && x.Uses == 1
-  && clobber(x)
-  -> (MOVWBRstoreidx [i-2] {s} p idx w mem)
-(MOVHBRstoreidx [i] {s} p idx (SRDconst [j] w) x:(MOVHBRstoreidx [i-2] {s} p idx w0:(SRDconst [j-16] w) mem))
-  && x.Uses == 1
-  && clobber(x)
-  -> (MOVWBRstoreidx [i-2] {s} p idx w0 mem)
-(MOVHBRstoreidx [i] {s} p idx (SRWconst [16] w) x:(MOVHBRstoreidx [i-2] {s} p idx w mem))
-  && x.Uses == 1
-  && clobber(x)
-  -> (MOVWBRstoreidx [i-2] {s} p idx w mem)
-(MOVHBRstoreidx [i] {s} p idx (SRWconst [j] w) x:(MOVHBRstoreidx [i-2] {s} p idx w0:(SRWconst [j-16] w) mem))
-  && x.Uses == 1
-  && clobber(x)
-  -> (MOVWBRstoreidx [i-2] {s} p idx w0 mem)
-(MOVWBRstoreidx [i] {s} p idx (SRDconst [32] w) x:(MOVWBRstoreidx [i-4] {s} p idx w mem))
-  && x.Uses == 1
-  && clobber(x)
-  -> (MOVDBRstoreidx [i-4] {s} p idx w mem)
-(MOVWBRstoreidx [i] {s} p idx (SRDconst [j] w) x:(MOVWBRstoreidx [i-4] {s} p idx w0:(SRDconst [j-32] w) mem))
-  && x.Uses == 1
-  && clobber(x)
-  -> (MOVDBRstoreidx [i-4] {s} p idx w0 mem)
+  => (MOVDBRstore [i-4] {s} p w0 mem)
 
 // Combining byte loads into larger (unaligned) loads.
 
@@ -1518,7 +1332,7 @@
   && sh.Uses == 1
   && mergePoint(b,x0,x1) != nil
   && clobber(x0, x1, sh)
-  -> @mergePoint(b,x0,x1) (MOVHZload [i0] {s} p mem)
+  => @mergePoint(b,x0,x1) (MOVHZload [i0] {s} p mem)
 
 (OR                  x1:(MOVBZload [i1] {s} p mem)
     sh:(SLDconst [8] x0:(MOVBZload [i0] {s} p mem)))
@@ -1529,7 +1343,7 @@
   && sh.Uses == 1
   && mergePoint(b,x0,x1) != nil
   && clobber(x0, x1, sh)
-  -> @mergePoint(b,x0,x1) (MOVHZload [i0] {s} p mem)
+  => @mergePoint(b,x0,x1) (MOVHZload [i0] {s} p mem)
 
 (ORW                  x1:(MOVHZload [i1] {s} p mem)
     sh:(SLWconst [16] x0:(MOVHZload [i0] {s} p mem)))
@@ -1540,7 +1354,7 @@
   && sh.Uses == 1
   && mergePoint(b,x0,x1) != nil
   && clobber(x0, x1, sh)
-  -> @mergePoint(b,x0,x1) (MOVWZload [i0] {s} p mem)
+  => @mergePoint(b,x0,x1) (MOVWZload [i0] {s} p mem)
 
 (OR                   x1:(MOVHZload [i1] {s} p mem)
     sh:(SLDconst [16] x0:(MOVHZload [i0] {s} p mem)))
@@ -1551,7 +1365,7 @@
   && sh.Uses == 1
   && mergePoint(b,x0,x1) != nil
   && clobber(x0, x1, sh)
-  -> @mergePoint(b,x0,x1) (MOVWZload [i0] {s} p mem)
+  => @mergePoint(b,x0,x1) (MOVWZload [i0] {s} p mem)
 
 (OR                   x1:(MOVWZload [i1] {s} p mem)
     sh:(SLDconst [32] x0:(MOVWZload [i0] {s} p mem)))
@@ -1562,7 +1376,7 @@
   && sh.Uses == 1
   && mergePoint(b,x0,x1) != nil
   && clobber(x0, x1, sh)
-  -> @mergePoint(b,x0,x1) (MOVDload [i0] {s} p mem)
+  => @mergePoint(b,x0,x1) (MOVDload [i0] {s} p mem)
 
 (ORW
     s0:(SLWconst [j0] x0:(MOVBZload [i0] {s} p mem))
@@ -1579,7 +1393,7 @@
   && or.Uses == 1
   && mergePoint(b,x0,x1,y) != nil
   && clobber(x0, x1, s0, s1, or)
-  -> @mergePoint(b,x0,x1,y) (ORW <v.Type> (SLWconst <v.Type> [j1] (MOVHZload [i0] {s} p mem)) y)
+  => @mergePoint(b,x0,x1,y) (ORW <v.Type> (SLWconst <v.Type> [j1] (MOVHZload [i0] {s} p mem)) y)
 
 (OR
     s0:(SLDconst [j0] x0:(MOVBZload [i0] {s} p mem))
@@ -1596,7 +1410,7 @@
   && or.Uses == 1
   && mergePoint(b,x0,x1,y) != nil
   && clobber(x0, x1, s0, s1, or)
-  -> @mergePoint(b,x0,x1,y) (OR <v.Type> (SLDconst <v.Type> [j1] (MOVHZload [i0] {s} p mem)) y)
+  => @mergePoint(b,x0,x1,y) (OR <v.Type> (SLDconst <v.Type> [j1] (MOVHZload [i0] {s} p mem)) y)
 
 (OR
     s0:(SLDconst [j0] x0:(MOVHZload [i0] {s} p mem))
@@ -1613,115 +1427,7 @@
   && or.Uses == 1
   && mergePoint(b,x0,x1,y) != nil
   && clobber(x0, x1, s0, s1, or)
-  -> @mergePoint(b,x0,x1,y) (OR <v.Type> (SLDconst <v.Type> [j1] (MOVWZload [i0] {s} p mem)) y)
-
-// Big-endian indexed loads
-
-(ORW                 x1:(MOVBZloadidx [i1] {s} p idx mem)
-    sh:(SLWconst [8] x0:(MOVBZloadidx [i0] {s} p idx mem)))
-  && i1 == i0+1
-  && p.Op != OpSB
-  && x0.Uses == 1
-  && x1.Uses == 1
-  && sh.Uses == 1
-  && mergePoint(b,x0,x1) != nil
-  && clobber(x0, x1, sh)
-  -> @mergePoint(b,x0,x1) (MOVHZloadidx [i0] {s} p idx mem)
-
-(OR                  x1:(MOVBZloadidx [i1] {s} p idx mem)
-    sh:(SLDconst [8] x0:(MOVBZloadidx [i0] {s} p idx mem)))
-  && i1 == i0+1
-  && p.Op != OpSB
-  && x0.Uses == 1
-  && x1.Uses == 1
-  && sh.Uses == 1
-  && mergePoint(b,x0,x1) != nil
-  && clobber(x0, x1, sh)
-  -> @mergePoint(b,x0,x1) (MOVHZloadidx [i0] {s} p idx mem)
-
-(ORW                  x1:(MOVHZloadidx [i1] {s} p idx mem)
-    sh:(SLWconst [16] x0:(MOVHZloadidx [i0] {s} p idx mem)))
-  && i1 == i0+2
-  && p.Op != OpSB
-  && x0.Uses == 1
-  && x1.Uses == 1
-  && sh.Uses == 1
-  && mergePoint(b,x0,x1) != nil
-  && clobber(x0, x1, sh)
-  -> @mergePoint(b,x0,x1) (MOVWZloadidx [i0] {s} p idx mem)
-
-(OR                   x1:(MOVHZloadidx [i1] {s} p idx mem)
-    sh:(SLDconst [16] x0:(MOVHZloadidx [i0] {s} p idx mem)))
-  && i1 == i0+2
-  && p.Op != OpSB
-  && x0.Uses == 1
-  && x1.Uses == 1
-  && sh.Uses == 1
-  && mergePoint(b,x0,x1) != nil
-  && clobber(x0, x1, sh)
-  -> @mergePoint(b,x0,x1) (MOVWZloadidx [i0] {s} p idx mem)
-
-(OR                   x1:(MOVWZloadidx [i1] {s} p idx mem)
-    sh:(SLDconst [32] x0:(MOVWZloadidx [i0] {s} p idx mem)))
-  && i1 == i0+4
-  && p.Op != OpSB
-  && x0.Uses == 1
-  && x1.Uses == 1
-  && sh.Uses == 1
-  && mergePoint(b,x0,x1) != nil
-  && clobber(x0, x1, sh)
-  -> @mergePoint(b,x0,x1) (MOVDloadidx [i0] {s} p idx mem)
-
-(ORW
-    s0:(SLWconst [j0] x0:(MOVBZloadidx [i0] {s} p idx mem))
-    or:(ORW
-        s1:(SLWconst [j1] x1:(MOVBZloadidx [i1] {s} p idx mem))
-	y))
-  && i1 == i0+1
-  && j1 == j0-8
-  && j1 % 16 == 0
-  && x0.Uses == 1
-  && x1.Uses == 1
-  && s0.Uses == 1
-  && s1.Uses == 1
-  && or.Uses == 1
-  && mergePoint(b,x0,x1,y) != nil
-  && clobber(x0, x1, s0, s1, or)
-  -> @mergePoint(b,x0,x1,y) (ORW <v.Type> (SLWconst <v.Type> [j1] (MOVHZloadidx [i0] {s} p idx mem)) y)
-
-(OR
-    s0:(SLDconst [j0] x0:(MOVBZloadidx [i0] {s} p idx mem))
-    or:(OR
-        s1:(SLDconst [j1] x1:(MOVBZloadidx [i1] {s} p idx mem))
-	y))
-  && i1 == i0+1
-  && j1 == j0-8
-  && j1 % 16 == 0
-  && x0.Uses == 1
-  && x1.Uses == 1
-  && s0.Uses == 1
-  && s1.Uses == 1
-  && or.Uses == 1
-  && mergePoint(b,x0,x1,y) != nil
-  && clobber(x0, x1, s0, s1, or)
-  -> @mergePoint(b,x0,x1,y) (OR <v.Type> (SLDconst <v.Type> [j1] (MOVHZloadidx [i0] {s} p idx mem)) y)
-
-(OR
-    s0:(SLDconst [j0] x0:(MOVHZloadidx [i0] {s} p idx mem))
-    or:(OR
-        s1:(SLDconst [j1] x1:(MOVHZloadidx [i1] {s} p idx mem))
-	y))
-  && i1 == i0+2
-  && j1 == j0-16
-  && j1 % 32 == 0
-  && x0.Uses == 1
-  && x1.Uses == 1
-  && s0.Uses == 1
-  && s1.Uses == 1
-  && or.Uses == 1
-  && mergePoint(b,x0,x1,y) != nil
-  && clobber(x0, x1, s0, s1, or)
-  -> @mergePoint(b,x0,x1,y) (OR <v.Type> (SLDconst <v.Type> [j1] (MOVWZloadidx [i0] {s} p idx mem)) y)
+  => @mergePoint(b,x0,x1,y) (OR <v.Type> (SLDconst <v.Type> [j1] (MOVWZload [i0] {s} p mem)) y)
 
 // Little-endian loads
 
@@ -1734,7 +1440,7 @@
   && sh.Uses == 1
   && mergePoint(b,x0,x1) != nil
   && clobber(x0, x1, sh)
-  -> @mergePoint(b,x0,x1) (MOVHZreg (MOVHBRload [i0] {s} p mem))
+  => @mergePoint(b,x0,x1) (MOVHZreg (MOVHBRload [i0] {s} p mem))
 
 (OR                  x0:(MOVBZload [i0] {s} p mem)
     sh:(SLDconst [8] x1:(MOVBZload [i1] {s} p mem)))
@@ -1745,7 +1451,7 @@
   && sh.Uses == 1
   && mergePoint(b,x0,x1) != nil
   && clobber(x0, x1, sh)
-  -> @mergePoint(b,x0,x1) (MOVHZreg (MOVHBRload [i0] {s} p mem))
+  => @mergePoint(b,x0,x1) (MOVHZreg (MOVHBRload [i0] {s} p mem))
 
 (ORW                  r0:(MOVHZreg x0:(MOVHBRload [i0] {s} p mem))
     sh:(SLWconst [16] r1:(MOVHZreg x1:(MOVHBRload [i1] {s} p mem))))
@@ -1757,7 +1463,7 @@
   && sh.Uses == 1
   && mergePoint(b,x0,x1) != nil
   && clobber(x0, x1, r0, r1, sh)
-  -> @mergePoint(b,x0,x1) (MOVWBRload [i0] {s} p mem)
+  => @mergePoint(b,x0,x1) (MOVWBRload [i0] {s} p mem)
 
 (OR                   r0:(MOVHZreg x0:(MOVHBRload [i0] {s} p mem))
     sh:(SLDconst [16] r1:(MOVHZreg x1:(MOVHBRload [i1] {s} p mem))))
@@ -1769,7 +1475,7 @@
   && sh.Uses == 1
   && mergePoint(b,x0,x1) != nil
   && clobber(x0, x1, r0, r1, sh)
-  -> @mergePoint(b,x0,x1) (MOVWZreg (MOVWBRload [i0] {s} p mem))
+  => @mergePoint(b,x0,x1) (MOVWZreg (MOVWBRload [i0] {s} p mem))
 
 (OR                   r0:(MOVWZreg x0:(MOVWBRload [i0] {s} p mem))
     sh:(SLDconst [32] r1:(MOVWZreg x1:(MOVWBRload [i1] {s} p mem))))
@@ -1781,7 +1487,7 @@
   && sh.Uses == 1
   && mergePoint(b,x0,x1) != nil
   && clobber(x0, x1, r0, r1, sh)
-  -> @mergePoint(b,x0,x1) (MOVDBRload [i0] {s} p mem)
+  => @mergePoint(b,x0,x1) (MOVDBRload [i0] {s} p mem)
 
 (ORW
     s1:(SLWconst [j1] x1:(MOVBZload [i1] {s} p mem))
@@ -1799,7 +1505,7 @@
   && or.Uses == 1
   && mergePoint(b,x0,x1,y) != nil
   && clobber(x0, x1, s0, s1, or)
-  -> @mergePoint(b,x0,x1,y) (ORW <v.Type> (SLWconst <v.Type> [j0] (MOVHZreg (MOVHBRload [i0] {s} p mem))) y)
+  => @mergePoint(b,x0,x1,y) (ORW <v.Type> (SLWconst <v.Type> [j0] (MOVHZreg (MOVHBRload [i0] {s} p mem))) y)
 
 (OR
     s1:(SLDconst [j1] x1:(MOVBZload [i1] {s} p mem))
@@ -1817,7 +1523,7 @@
   && or.Uses == 1
   && mergePoint(b,x0,x1,y) != nil
   && clobber(x0, x1, s0, s1, or)
-  -> @mergePoint(b,x0,x1,y) (OR <v.Type> (SLDconst <v.Type> [j0] (MOVHZreg (MOVHBRload [i0] {s} p mem))) y)
+  => @mergePoint(b,x0,x1,y) (OR <v.Type> (SLDconst <v.Type> [j0] (MOVHZreg (MOVHBRload [i0] {s} p mem))) y)
 
 (OR
     s1:(SLDconst [j1] r1:(MOVHZreg x1:(MOVHBRload [i1] {s} p mem)))
@@ -1836,168 +1542,53 @@
   && or.Uses == 1
   && mergePoint(b,x0,x1,y) != nil
   && clobber(x0, x1, r0, r1, s0, s1, or)
-  -> @mergePoint(b,x0,x1,y) (OR <v.Type> (SLDconst <v.Type> [j0] (MOVWZreg (MOVWBRload [i0] {s} p mem))) y)
-
-// Little-endian indexed loads
-
-(ORW                 x0:(MOVBZloadidx [i0] {s} p idx mem)
-    sh:(SLWconst [8] x1:(MOVBZloadidx [i1] {s} p idx mem)))
-  && p.Op != OpSB
-  && i1 == i0+1
-  && x0.Uses == 1
-  && x1.Uses == 1
-  && sh.Uses == 1
-  && mergePoint(b,x0,x1) != nil
-  && clobber(x0, x1, sh)
-  -> @mergePoint(b,x0,x1) (MOVHZreg (MOVHBRloadidx [i0] {s} p idx mem))
-
-(OR                  x0:(MOVBZloadidx [i0] {s} p idx mem)
-    sh:(SLDconst [8] x1:(MOVBZloadidx [i1] {s} p idx mem)))
-  && p.Op != OpSB
-  && i1 == i0+1
-  && x0.Uses == 1
-  && x1.Uses == 1
-  && sh.Uses == 1
-  && mergePoint(b,x0,x1) != nil
-  && clobber(x0, x1, sh)
-  -> @mergePoint(b,x0,x1) (MOVHZreg (MOVHBRloadidx [i0] {s} p idx mem))
-
-(ORW                  r0:(MOVHZreg x0:(MOVHBRloadidx [i0] {s} p idx mem))
-    sh:(SLWconst [16] r1:(MOVHZreg x1:(MOVHBRloadidx [i1] {s} p idx mem))))
-  && i1 == i0+2
-  && x0.Uses == 1
-  && x1.Uses == 1
-  && r0.Uses == 1
-  && r1.Uses == 1
-  && sh.Uses == 1
-  && mergePoint(b,x0,x1) != nil
-  && clobber(x0, x1, r0, r1, sh)
-  -> @mergePoint(b,x0,x1) (MOVWBRloadidx [i0] {s} p idx mem)
-
-(OR                   r0:(MOVHZreg x0:(MOVHBRloadidx [i0] {s} p idx mem))
-    sh:(SLDconst [16] r1:(MOVHZreg x1:(MOVHBRloadidx [i1] {s} p idx mem))))
-  && i1 == i0+2
-  && x0.Uses == 1
-  && x1.Uses == 1
-  && r0.Uses == 1
-  && r1.Uses == 1
-  && sh.Uses == 1
-  && mergePoint(b,x0,x1) != nil
-  && clobber(x0, x1, r0, r1, sh)
-  -> @mergePoint(b,x0,x1) (MOVWZreg (MOVWBRloadidx [i0] {s} p idx mem))
-
-(OR                   r0:(MOVWZreg x0:(MOVWBRloadidx [i0] {s} p idx mem))
-    sh:(SLDconst [32] r1:(MOVWZreg x1:(MOVWBRloadidx [i1] {s} p idx mem))))
-  && i1 == i0+4
-  && x0.Uses == 1
-  && x1.Uses == 1
-  && r0.Uses == 1
-  && r1.Uses == 1
-  && sh.Uses == 1
-  && mergePoint(b,x0,x1) != nil
-  && clobber(x0, x1, r0, r1, sh)
-  -> @mergePoint(b,x0,x1) (MOVDBRloadidx [i0] {s} p idx mem)
-
-(ORW
-    s1:(SLWconst [j1] x1:(MOVBZloadidx [i1] {s} p idx mem))
-    or:(ORW
-        s0:(SLWconst [j0] x0:(MOVBZloadidx [i0] {s} p idx mem))
-	y))
-  && p.Op != OpSB
-  && i1 == i0+1
-  && j1 == j0+8
-  && j0 % 16 == 0
-  && x0.Uses == 1
-  && x1.Uses == 1
-  && s0.Uses == 1
-  && s1.Uses == 1
-  && or.Uses == 1
-  && mergePoint(b,x0,x1,y) != nil
-  && clobber(x0, x1, s0, s1, or)
-  -> @mergePoint(b,x0,x1,y) (ORW <v.Type> (SLWconst <v.Type> [j0] (MOVHZreg (MOVHBRloadidx [i0] {s} p idx mem))) y)
-
-(OR
-    s1:(SLDconst [j1] x1:(MOVBZloadidx [i1] {s} p idx mem))
-    or:(OR
-        s0:(SLDconst [j0] x0:(MOVBZloadidx [i0] {s} p idx mem))
-	y))
-  && p.Op != OpSB
-  && i1 == i0+1
-  && j1 == j0+8
-  && j0 % 16 == 0
-  && x0.Uses == 1
-  && x1.Uses == 1
-  && s0.Uses == 1
-  && s1.Uses == 1
-  && or.Uses == 1
-  && mergePoint(b,x0,x1,y) != nil
-  && clobber(x0, x1, s0, s1, or)
-  -> @mergePoint(b,x0,x1,y) (OR <v.Type> (SLDconst <v.Type> [j0] (MOVHZreg (MOVHBRloadidx [i0] {s} p idx mem))) y)
-
-(OR
-    s1:(SLDconst [j1] r1:(MOVHZreg x1:(MOVHBRloadidx [i1] {s} p idx mem)))
-    or:(OR
-        s0:(SLDconst [j0] r0:(MOVHZreg x0:(MOVHBRloadidx [i0] {s} p idx mem)))
-	y))
-  && i1 == i0+2
-  && j1 == j0+16
-  && j0 % 32 == 0
-  && x0.Uses == 1
-  && x1.Uses == 1
-  && r0.Uses == 1
-  && r1.Uses == 1
-  && s0.Uses == 1
-  && s1.Uses == 1
-  && or.Uses == 1
-  && mergePoint(b,x0,x1,y) != nil
-  && clobber(x0, x1, r0, r1, s0, s1, or)
-  -> @mergePoint(b,x0,x1,y) (OR <v.Type> (SLDconst <v.Type> [j0] (MOVWZreg (MOVWBRloadidx [i0] {s} p idx mem))) y)
+  => @mergePoint(b,x0,x1,y) (OR <v.Type> (SLDconst <v.Type> [j0] (MOVWZreg (MOVWBRload [i0] {s} p mem))) y)
 
 // Combine stores into store multiples.
 // 32-bit
 (MOVWstore [i] {s} p w1 x:(MOVWstore [i-4] {s} p w0 mem))
   && p.Op != OpSB
   && x.Uses == 1
-  && is20Bit(i-4)
+  && is20Bit(int64(i)-4)
   && clobber(x)
-  -> (STM2 [i-4] {s} p w0 w1 mem)
+  => (STM2 [i-4] {s} p w0 w1 mem)
 (MOVWstore [i] {s} p w2 x:(STM2 [i-8] {s} p w0 w1 mem))
   && x.Uses == 1
-  && is20Bit(i-8)
+  && is20Bit(int64(i)-8)
   && clobber(x)
-  -> (STM3 [i-8] {s} p w0 w1 w2 mem)
+  => (STM3 [i-8] {s} p w0 w1 w2 mem)
 (MOVWstore [i] {s} p w3 x:(STM3 [i-12] {s} p w0 w1 w2 mem))
   && x.Uses == 1
-  && is20Bit(i-12)
+  && is20Bit(int64(i)-12)
   && clobber(x)
-  -> (STM4 [i-12] {s} p w0 w1 w2 w3 mem)
+  => (STM4 [i-12] {s} p w0 w1 w2 w3 mem)
 (STM2 [i] {s} p w2 w3 x:(STM2 [i-8] {s} p w0 w1 mem))
   && x.Uses == 1
-  && is20Bit(i-8)
+  && is20Bit(int64(i)-8)
   && clobber(x)
-  -> (STM4 [i-8] {s} p w0 w1 w2 w3 mem)
+  => (STM4 [i-8] {s} p w0 w1 w2 w3 mem)
 // 64-bit
 (MOVDstore [i] {s} p w1 x:(MOVDstore [i-8] {s} p w0 mem))
   && p.Op != OpSB
   && x.Uses == 1
-  && is20Bit(i-8)
+  && is20Bit(int64(i)-8)
   && clobber(x)
-  -> (STMG2 [i-8] {s} p w0 w1 mem)
+  => (STMG2 [i-8] {s} p w0 w1 mem)
 (MOVDstore [i] {s} p w2 x:(STMG2 [i-16] {s} p w0 w1 mem))
   && x.Uses == 1
-  && is20Bit(i-16)
+  && is20Bit(int64(i)-16)
   && clobber(x)
-  -> (STMG3 [i-16] {s} p w0 w1 w2 mem)
+  => (STMG3 [i-16] {s} p w0 w1 w2 mem)
 (MOVDstore [i] {s} p w3 x:(STMG3 [i-24] {s} p w0 w1 w2 mem))
   && x.Uses == 1
-  && is20Bit(i-24)
+  && is20Bit(int64(i)-24)
   && clobber(x)
-  -> (STMG4 [i-24] {s} p w0 w1 w2 w3 mem)
+  => (STMG4 [i-24] {s} p w0 w1 w2 w3 mem)
 (STMG2 [i] {s} p w2 w3 x:(STMG2 [i-16] {s} p w0 w1 mem))
   && x.Uses == 1
-  && is20Bit(i-16)
+  && is20Bit(int64(i)-16)
   && clobber(x)
-  -> (STMG4 [i-16] {s} p w0 w1 w2 w3 mem)
+  => (STMG4 [i-16] {s} p w0 w1 w2 w3 mem)
 
 // Convert 32-bit store multiples into 64-bit stores.
-(STM2 [i] {s} p (SRDconst [32] x) x mem) -> (MOVDstore [i] {s} p x mem)
+(STM2 [i] {s} p (SRDconst [32] x) x mem) => (MOVDstore [i] {s} p x mem)
diff --git a/src/cmd/compile/internal/ssa/gen/dec.rules b/src/cmd/compile/internal/ssa/gen/dec.rules
index 3fd2be4..4c677f8 100644
--- a/src/cmd/compile/internal/ssa/gen/dec.rules
+++ b/src/cmd/compile/internal/ssa/gen/dec.rules
@@ -66,14 +66,14 @@
     (Load <typ.Int>
       (OffPtr <typ.IntPtr> [2*config.PtrSize] ptr)
       mem))
-(Store dst (SliceMake ptr len cap) mem) =>
+(Store {t} dst (SliceMake ptr len cap) mem) =>
   (Store {typ.Int}
     (OffPtr <typ.IntPtr> [2*config.PtrSize] dst)
     cap
     (Store {typ.Int}
       (OffPtr <typ.IntPtr> [config.PtrSize] dst)
       len
-      (Store {typ.BytePtr} dst ptr mem)))
+      (Store {t.Elem().PtrTo()} dst ptr mem)))
 
 // interface ops
 (ITab (IMake itab _)) => itab
diff --git a/src/cmd/compile/internal/ssa/gen/rulegen.go b/src/cmd/compile/internal/ssa/gen/rulegen.go
index e520503..9e2e112 100644
--- a/src/cmd/compile/internal/ssa/gen/rulegen.go
+++ b/src/cmd/compile/internal/ssa/gen/rulegen.go
@@ -1423,7 +1423,8 @@
 
 func opHasAuxInt(op opData) bool {
 	switch op.aux {
-	case "Bool", "Int8", "Int16", "Int32", "Int64", "Int128", "Float32", "Float64", "SymOff", "SymValAndOff", "TypSize", "ARM64BitField", "FlagConstant":
+	case "Bool", "Int8", "Int16", "Int32", "Int64", "Int128", "Float32", "Float64",
+		"SymOff", "SymValAndOff", "TypSize", "ARM64BitField", "FlagConstant", "CCop":
 		return true
 	}
 	return false
@@ -1431,7 +1432,7 @@
 
 func opHasAux(op opData) bool {
 	switch op.aux {
-	case "String", "Sym", "SymOff", "SymValAndOff", "Typ", "TypSize", "CCop",
+	case "String", "Sym", "SymOff", "SymValAndOff", "Typ", "TypSize",
 		"S390XCCMask", "S390XRotateParams":
 		return true
 	}
@@ -1784,8 +1785,6 @@
 		return "s390x.CCMask"
 	case "S390XRotateParams":
 		return "s390x.RotateParams"
-	case "CCop":
-		return "CCop"
 	default:
 		return "invalid"
 	}
diff --git a/src/cmd/compile/internal/ssa/lower.go b/src/cmd/compile/internal/ssa/lower.go
index ab0fa80..f332b2e 100644
--- a/src/cmd/compile/internal/ssa/lower.go
+++ b/src/cmd/compile/internal/ssa/lower.go
@@ -7,7 +7,7 @@
 // convert to machine-dependent ops
 func lower(f *Func) {
 	// repeat rewrites until we find no more rewrites
-	applyRewrite(f, f.Config.lowerBlock, f.Config.lowerValue)
+	applyRewrite(f, f.Config.lowerBlock, f.Config.lowerValue, removeDeadValues)
 }
 
 // checkLower checks for unlowered opcodes and fails if we find one.
diff --git a/src/cmd/compile/internal/ssa/nilcheck.go b/src/cmd/compile/internal/ssa/nilcheck.go
index 6b24371..d1bad52 100644
--- a/src/cmd/compile/internal/ssa/nilcheck.go
+++ b/src/cmd/compile/internal/ssa/nilcheck.go
@@ -235,7 +235,7 @@
 				continue
 			}
 			if v.Type.IsMemory() || v.Type.IsTuple() && v.Type.FieldType(1).IsMemory() {
-				if v.Op == OpVarKill || v.Op == OpVarLive || (v.Op == OpVarDef && !v.Aux.(GCNode).Typ().HasHeapPointer()) {
+				if v.Op == OpVarKill || v.Op == OpVarLive || (v.Op == OpVarDef && !v.Aux.(GCNode).Typ().HasPointers()) {
 					// These ops don't really change memory.
 					continue
 					// Note: OpVarDef requires that the defined variable not have pointers.
diff --git a/src/cmd/compile/internal/ssa/opGen.go b/src/cmd/compile/internal/ssa/opGen.go
index 4cd7279..4540189 100644
--- a/src/cmd/compile/internal/ssa/opGen.go
+++ b/src/cmd/compile/internal/ssa/opGen.go
@@ -1828,6 +1828,7 @@
 	OpPPC64FADD
 	OpPPC64FADDS
 	OpPPC64SUB
+	OpPPC64SUBFCconst
 	OpPPC64FSUB
 	OpPPC64FSUBS
 	OpPPC64MULLD
@@ -1853,8 +1854,6 @@
 	OpPPC64ROTL
 	OpPPC64ROTLW
 	OpPPC64LoweredAdd64Carry
-	OpPPC64ADDconstForCarry
-	OpPPC64MaskIfNotCarry
 	OpPPC64SRADconst
 	OpPPC64SRAWconst
 	OpPPC64SRDconst
@@ -2027,8 +2026,6 @@
 	OpPPC64FlagEQ
 	OpPPC64FlagLT
 	OpPPC64FlagGT
-	OpPPC64FlagCarrySet
-	OpPPC64FlagCarryClear
 
 	OpRISCV64ADD
 	OpRISCV64ADDI
@@ -24318,6 +24315,20 @@
 		},
 	},
 	{
+		name:    "SUBFCconst",
+		auxType: auxInt64,
+		argLen:  1,
+		asm:     ppc64.ASUBC,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{0, 1073733630}, // SP SB R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R14 R15 R16 R17 R18 R19 R20 R21 R22 R23 R24 R25 R26 R27 R28 R29
+			},
+			outputs: []outputInfo{
+				{0, 1073733624}, // R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R14 R15 R16 R17 R18 R19 R20 R21 R22 R23 R24 R25 R26 R27 R28 R29
+			},
+		},
+	},
+	{
 		name:   "FSUB",
 		argLen: 2,
 		asm:    ppc64.AFSUB,
@@ -24684,28 +24695,6 @@
 		},
 	},
 	{
-		name:    "ADDconstForCarry",
-		auxType: auxInt16,
-		argLen:  1,
-		asm:     ppc64.AADDC,
-		reg: regInfo{
-			inputs: []inputInfo{
-				{0, 1073733630}, // SP SB R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R14 R15 R16 R17 R18 R19 R20 R21 R22 R23 R24 R25 R26 R27 R28 R29
-			},
-			clobbers: 2147483648, // R31
-		},
-	},
-	{
-		name:   "MaskIfNotCarry",
-		argLen: 1,
-		asm:    ppc64.AADDME,
-		reg: regInfo{
-			outputs: []outputInfo{
-				{0, 1073733624}, // R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R14 R15 R16 R17 R18 R19 R20 R21 R22 R23 R24 R25 R26 R27 R28 R29
-			},
-		},
-	},
-	{
 		name:    "SRADconst",
 		auxType: auxInt64,
 		argLen:  1,
@@ -26964,16 +26953,6 @@
 		argLen: 0,
 		reg:    regInfo{},
 	},
-	{
-		name:   "FlagCarrySet",
-		argLen: 0,
-		reg:    regInfo{},
-	},
-	{
-		name:   "FlagCarryClear",
-		argLen: 0,
-		reg:    regInfo{},
-	},
 
 	{
 		name:        "ADD",
diff --git a/src/cmd/compile/internal/ssa/opt.go b/src/cmd/compile/internal/ssa/opt.go
index 6e91fd7..128e614 100644
--- a/src/cmd/compile/internal/ssa/opt.go
+++ b/src/cmd/compile/internal/ssa/opt.go
@@ -6,5 +6,5 @@
 
 // machine-independent optimization
 func opt(f *Func) {
-	applyRewrite(f, rewriteBlockgeneric, rewriteValuegeneric)
+	applyRewrite(f, rewriteBlockgeneric, rewriteValuegeneric, removeDeadValues)
 }
diff --git a/src/cmd/compile/internal/ssa/regalloc.go b/src/cmd/compile/internal/ssa/regalloc.go
index a2be7bb..64c6aed 100644
--- a/src/cmd/compile/internal/ssa/regalloc.go
+++ b/src/cmd/compile/internal/ssa/regalloc.go
@@ -588,7 +588,7 @@
 	if s.f.Config.hasGReg {
 		s.allocatable &^= 1 << s.GReg
 	}
-	if s.f.Config.ctxt.Framepointer_enabled && s.f.Config.FPReg >= 0 {
+	if objabi.Framepointer_enabled && s.f.Config.FPReg >= 0 {
 		s.allocatable &^= 1 << uint(s.f.Config.FPReg)
 	}
 	if s.f.Config.LinkReg != -1 {
diff --git a/src/cmd/compile/internal/ssa/rewrite.go b/src/cmd/compile/internal/ssa/rewrite.go
index fb35691..09f94ef 100644
--- a/src/cmd/compile/internal/ssa/rewrite.go
+++ b/src/cmd/compile/internal/ssa/rewrite.go
@@ -20,7 +20,15 @@
 	"path/filepath"
 )
 
-func applyRewrite(f *Func, rb blockRewriter, rv valueRewriter) {
+type deadValueChoice bool
+
+const (
+	leaveDeadValues  deadValueChoice = false
+	removeDeadValues                 = true
+)
+
+// deadcode indicates that rewrite should try to remove any values that become dead.
+func applyRewrite(f *Func, rb blockRewriter, rv valueRewriter, deadcode deadValueChoice) {
 	// repeat rewrites until we find no more rewrites
 	pendingLines := f.cachedLineStarts // Holds statement boundaries that need to be moved to a new value/block
 	pendingLines.clear()
@@ -56,6 +64,18 @@
 					*v0 = *v
 					v0.Args = append([]*Value{}, v.Args...) // make a new copy, not aliasing
 				}
+				if v.Uses == 0 && v.removeable() {
+					if v.Op != OpInvalid && deadcode == removeDeadValues {
+						// Reset any values that are now unused, so that we decrement
+						// the use count of all of its arguments.
+						// Not quite a deadcode pass, because it does not handle cycles.
+						// But it should help Uses==1 rules to fire.
+						v.reset(OpInvalid)
+						change = true
+					}
+					// No point rewriting values which aren't used.
+					continue
+				}
 
 				vchange := phielimValue(v)
 				if vchange && debug > 1 {
@@ -631,6 +651,10 @@
 	return flagConstant(x)
 }
 
+func auxIntToOp(cc int64) Op {
+	return Op(cc)
+}
+
 func boolToAuxInt(b bool) int64 {
 	if b {
 		return 1
@@ -674,6 +698,10 @@
 	return int64(x)
 }
 
+func opToAuxInt(o Op) int64 {
+	return int64(o)
+}
+
 func auxToString(i interface{}) string {
 	return i.(string)
 }
@@ -707,13 +735,6 @@
 func s390xRotateParamsToAux(r s390x.RotateParams) interface{} {
 	return r
 }
-func cCopToAux(o Op) interface{} {
-	return o
-}
-
-func auxToCCop(cc interface{}) Op {
-	return cc.(Op)
-}
 
 // uaddOvf reports whether unsigned a+b would overflow.
 func uaddOvf(a, b int64) bool {
diff --git a/src/cmd/compile/internal/ssa/rewriteAMD64.go b/src/cmd/compile/internal/ssa/rewriteAMD64.go
index cda9df5..89d6405 100644
--- a/src/cmd/compile/internal/ssa/rewriteAMD64.go
+++ b/src/cmd/compile/internal/ssa/rewriteAMD64.go
@@ -1245,9 +1245,9 @@
 			if v_1.Op != OpAMD64MOVLconst {
 				continue
 			}
-			c := v_1.AuxInt
+			c := auxIntToInt32(v_1.AuxInt)
 			v.reset(OpAMD64ADDLconst)
-			v.AuxInt = c
+			v.AuxInt = int32ToAuxInt(c)
 			v.AddArg(x)
 			return true
 		}
@@ -1261,17 +1261,17 @@
 			if v_0.Op != OpAMD64SHLLconst {
 				continue
 			}
-			c := v_0.AuxInt
+			c := auxIntToInt8(v_0.AuxInt)
 			x := v_0.Args[0]
 			if v_1.Op != OpAMD64SHRLconst {
 				continue
 			}
-			d := v_1.AuxInt
+			d := auxIntToInt8(v_1.AuxInt)
 			if x != v_1.Args[0] || !(d == 32-c) {
 				continue
 			}
 			v.reset(OpAMD64ROLLconst)
-			v.AuxInt = c
+			v.AuxInt = int8ToAuxInt(c)
 			v.AddArg(x)
 			return true
 		}
@@ -1286,17 +1286,17 @@
 			if v_0.Op != OpAMD64SHLLconst {
 				continue
 			}
-			c := v_0.AuxInt
+			c := auxIntToInt8(v_0.AuxInt)
 			x := v_0.Args[0]
 			if v_1.Op != OpAMD64SHRWconst {
 				continue
 			}
-			d := v_1.AuxInt
+			d := auxIntToInt8(v_1.AuxInt)
 			if x != v_1.Args[0] || !(d == 16-c && c < 16 && t.Size() == 2) {
 				continue
 			}
 			v.reset(OpAMD64ROLWconst)
-			v.AuxInt = c
+			v.AuxInt = int8ToAuxInt(c)
 			v.AddArg(x)
 			return true
 		}
@@ -1311,17 +1311,17 @@
 			if v_0.Op != OpAMD64SHLLconst {
 				continue
 			}
-			c := v_0.AuxInt
+			c := auxIntToInt8(v_0.AuxInt)
 			x := v_0.Args[0]
 			if v_1.Op != OpAMD64SHRBconst {
 				continue
 			}
-			d := v_1.AuxInt
+			d := auxIntToInt8(v_1.AuxInt)
 			if x != v_1.Args[0] || !(d == 8-c && c < 8 && t.Size() == 1) {
 				continue
 			}
 			v.reset(OpAMD64ROLBconst)
-			v.AuxInt = c
+			v.AuxInt = int8ToAuxInt(c)
 			v.AddArg(x)
 			return true
 		}
@@ -1332,7 +1332,7 @@
 	for {
 		for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
 			x := v_0
-			if v_1.Op != OpAMD64SHLLconst || v_1.AuxInt != 3 {
+			if v_1.Op != OpAMD64SHLLconst || auxIntToInt8(v_1.AuxInt) != 3 {
 				continue
 			}
 			y := v_1.Args[0]
@@ -1347,7 +1347,7 @@
 	for {
 		for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
 			x := v_0
-			if v_1.Op != OpAMD64SHLLconst || v_1.AuxInt != 2 {
+			if v_1.Op != OpAMD64SHLLconst || auxIntToInt8(v_1.AuxInt) != 2 {
 				continue
 			}
 			y := v_1.Args[0]
@@ -1362,7 +1362,7 @@
 	for {
 		for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
 			x := v_0
-			if v_1.Op != OpAMD64SHLLconst || v_1.AuxInt != 1 {
+			if v_1.Op != OpAMD64SHLLconst || auxIntToInt8(v_1.AuxInt) != 1 {
 				continue
 			}
 			y := v_1.Args[0]
@@ -1420,11 +1420,11 @@
 			if v_0.Op != OpAMD64ADDLconst {
 				continue
 			}
-			c := v_0.AuxInt
+			c := auxIntToInt32(v_0.AuxInt)
 			x := v_0.Args[0]
 			y := v_1
 			v.reset(OpAMD64LEAL1)
-			v.AuxInt = c
+			v.AuxInt = int32ToAuxInt(c)
 			v.AddArg2(x, y)
 			return true
 		}
@@ -1439,15 +1439,15 @@
 			if v_1.Op != OpAMD64LEAL {
 				continue
 			}
-			c := v_1.AuxInt
-			s := v_1.Aux
+			c := auxIntToInt32(v_1.AuxInt)
+			s := auxToSym(v_1.Aux)
 			y := v_1.Args[0]
 			if !(x.Op != OpSB && y.Op != OpSB) {
 				continue
 			}
 			v.reset(OpAMD64LEAL1)
-			v.AuxInt = c
-			v.Aux = s
+			v.AuxInt = int32ToAuxInt(c)
+			v.Aux = symToAux(s)
 			v.AddArg2(x, y)
 			return true
 		}
@@ -1500,131 +1500,131 @@
 	// match: (ADDLconst [c] (ADDL x y))
 	// result: (LEAL1 [c] x y)
 	for {
-		c := v.AuxInt
+		c := auxIntToInt32(v.AuxInt)
 		if v_0.Op != OpAMD64ADDL {
 			break
 		}
 		y := v_0.Args[1]
 		x := v_0.Args[0]
 		v.reset(OpAMD64LEAL1)
-		v.AuxInt = c
+		v.AuxInt = int32ToAuxInt(c)
 		v.AddArg2(x, y)
 		return true
 	}
 	// match: (ADDLconst [c] (SHLLconst [1] x))
 	// result: (LEAL1 [c] x x)
 	for {
-		c := v.AuxInt
-		if v_0.Op != OpAMD64SHLLconst || v_0.AuxInt != 1 {
+		c := auxIntToInt32(v.AuxInt)
+		if v_0.Op != OpAMD64SHLLconst || auxIntToInt8(v_0.AuxInt) != 1 {
 			break
 		}
 		x := v_0.Args[0]
 		v.reset(OpAMD64LEAL1)
-		v.AuxInt = c
+		v.AuxInt = int32ToAuxInt(c)
 		v.AddArg2(x, x)
 		return true
 	}
 	// match: (ADDLconst [c] (LEAL [d] {s} x))
-	// cond: is32Bit(c+d)
+	// cond: is32Bit(int64(c)+int64(d))
 	// result: (LEAL [c+d] {s} x)
 	for {
-		c := v.AuxInt
+		c := auxIntToInt32(v.AuxInt)
 		if v_0.Op != OpAMD64LEAL {
 			break
 		}
-		d := v_0.AuxInt
-		s := v_0.Aux
+		d := auxIntToInt32(v_0.AuxInt)
+		s := auxToSym(v_0.Aux)
 		x := v_0.Args[0]
-		if !(is32Bit(c + d)) {
+		if !(is32Bit(int64(c) + int64(d))) {
 			break
 		}
 		v.reset(OpAMD64LEAL)
-		v.AuxInt = c + d
-		v.Aux = s
+		v.AuxInt = int32ToAuxInt(c + d)
+		v.Aux = symToAux(s)
 		v.AddArg(x)
 		return true
 	}
 	// match: (ADDLconst [c] (LEAL1 [d] {s} x y))
-	// cond: is32Bit(c+d)
+	// cond: is32Bit(int64(c)+int64(d))
 	// result: (LEAL1 [c+d] {s} x y)
 	for {
-		c := v.AuxInt
+		c := auxIntToInt32(v.AuxInt)
 		if v_0.Op != OpAMD64LEAL1 {
 			break
 		}
-		d := v_0.AuxInt
-		s := v_0.Aux
+		d := auxIntToInt32(v_0.AuxInt)
+		s := auxToSym(v_0.Aux)
 		y := v_0.Args[1]
 		x := v_0.Args[0]
-		if !(is32Bit(c + d)) {
+		if !(is32Bit(int64(c) + int64(d))) {
 			break
 		}
 		v.reset(OpAMD64LEAL1)
-		v.AuxInt = c + d
-		v.Aux = s
+		v.AuxInt = int32ToAuxInt(c + d)
+		v.Aux = symToAux(s)
 		v.AddArg2(x, y)
 		return true
 	}
 	// match: (ADDLconst [c] (LEAL2 [d] {s} x y))
-	// cond: is32Bit(c+d)
+	// cond: is32Bit(int64(c)+int64(d))
 	// result: (LEAL2 [c+d] {s} x y)
 	for {
-		c := v.AuxInt
+		c := auxIntToInt32(v.AuxInt)
 		if v_0.Op != OpAMD64LEAL2 {
 			break
 		}
-		d := v_0.AuxInt
-		s := v_0.Aux
+		d := auxIntToInt32(v_0.AuxInt)
+		s := auxToSym(v_0.Aux)
 		y := v_0.Args[1]
 		x := v_0.Args[0]
-		if !(is32Bit(c + d)) {
+		if !(is32Bit(int64(c) + int64(d))) {
 			break
 		}
 		v.reset(OpAMD64LEAL2)
-		v.AuxInt = c + d
-		v.Aux = s
+		v.AuxInt = int32ToAuxInt(c + d)
+		v.Aux = symToAux(s)
 		v.AddArg2(x, y)
 		return true
 	}
 	// match: (ADDLconst [c] (LEAL4 [d] {s} x y))
-	// cond: is32Bit(c+d)
+	// cond: is32Bit(int64(c)+int64(d))
 	// result: (LEAL4 [c+d] {s} x y)
 	for {
-		c := v.AuxInt
+		c := auxIntToInt32(v.AuxInt)
 		if v_0.Op != OpAMD64LEAL4 {
 			break
 		}
-		d := v_0.AuxInt
-		s := v_0.Aux
+		d := auxIntToInt32(v_0.AuxInt)
+		s := auxToSym(v_0.Aux)
 		y := v_0.Args[1]
 		x := v_0.Args[0]
-		if !(is32Bit(c + d)) {
+		if !(is32Bit(int64(c) + int64(d))) {
 			break
 		}
 		v.reset(OpAMD64LEAL4)
-		v.AuxInt = c + d
-		v.Aux = s
+		v.AuxInt = int32ToAuxInt(c + d)
+		v.Aux = symToAux(s)
 		v.AddArg2(x, y)
 		return true
 	}
 	// match: (ADDLconst [c] (LEAL8 [d] {s} x y))
-	// cond: is32Bit(c+d)
+	// cond: is32Bit(int64(c)+int64(d))
 	// result: (LEAL8 [c+d] {s} x y)
 	for {
-		c := v.AuxInt
+		c := auxIntToInt32(v.AuxInt)
 		if v_0.Op != OpAMD64LEAL8 {
 			break
 		}
-		d := v_0.AuxInt
-		s := v_0.Aux
+		d := auxIntToInt32(v_0.AuxInt)
+		s := auxToSym(v_0.Aux)
 		y := v_0.Args[1]
 		x := v_0.Args[0]
-		if !(is32Bit(c + d)) {
+		if !(is32Bit(int64(c) + int64(d))) {
 			break
 		}
 		v.reset(OpAMD64LEAL8)
-		v.AuxInt = c + d
-		v.Aux = s
+		v.AuxInt = int32ToAuxInt(c + d)
+		v.Aux = symToAux(s)
 		v.AddArg2(x, y)
 		return true
 	}
@@ -1685,23 +1685,23 @@
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
 	// match: (ADDLconstmodify [valoff1] {sym} (ADDQconst [off2] base) mem)
-	// cond: ValAndOff(valoff1).canAdd(off2)
-	// result: (ADDLconstmodify [ValAndOff(valoff1).add(off2)] {sym} base mem)
+	// cond: ValAndOff(valoff1).canAdd32(off2)
+	// result: (ADDLconstmodify [ValAndOff(valoff1).addOffset32(off2)] {sym} base mem)
 	for {
-		valoff1 := v.AuxInt
-		sym := v.Aux
+		valoff1 := auxIntToValAndOff(v.AuxInt)
+		sym := auxToSym(v.Aux)
 		if v_0.Op != OpAMD64ADDQconst {
 			break
 		}
-		off2 := v_0.AuxInt
+		off2 := auxIntToInt32(v_0.AuxInt)
 		base := v_0.Args[0]
 		mem := v_1
-		if !(ValAndOff(valoff1).canAdd(off2)) {
+		if !(ValAndOff(valoff1).canAdd32(off2)) {
 			break
 		}
 		v.reset(OpAMD64ADDLconstmodify)
-		v.AuxInt = ValAndOff(valoff1).add(off2)
-		v.Aux = sym
+		v.AuxInt = valAndOffToAuxInt(ValAndOff(valoff1).addOffset32(off2))
+		v.Aux = symToAux(sym)
 		v.AddArg2(base, mem)
 		return true
 	}
@@ -1736,24 +1736,24 @@
 	b := v.Block
 	typ := &b.Func.Config.Types
 	// match: (ADDLload [off1] {sym} val (ADDQconst [off2] base) mem)
-	// cond: is32Bit(off1+off2)
+	// cond: is32Bit(int64(off1)+int64(off2))
 	// result: (ADDLload [off1+off2] {sym} val base mem)
 	for {
-		off1 := v.AuxInt
-		sym := v.Aux
+		off1 := auxIntToInt32(v.AuxInt)
+		sym := auxToSym(v.Aux)
 		val := v_0
 		if v_1.Op != OpAMD64ADDQconst {
 			break
 		}
-		off2 := v_1.AuxInt
+		off2 := auxIntToInt32(v_1.AuxInt)
 		base := v_1.Args[0]
 		mem := v_2
-		if !(is32Bit(off1 + off2)) {
+		if !(is32Bit(int64(off1) + int64(off2))) {
 			break
 		}
 		v.reset(OpAMD64ADDLload)
-		v.AuxInt = off1 + off2
-		v.Aux = sym
+		v.AuxInt = int32ToAuxInt(off1 + off2)
+		v.Aux = symToAux(sym)
 		v.AddArg3(val, base, mem)
 		return true
 	}
@@ -1807,24 +1807,24 @@
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
 	// match: (ADDLmodify [off1] {sym} (ADDQconst [off2] base) val mem)
-	// cond: is32Bit(off1+off2)
+	// cond: is32Bit(int64(off1)+int64(off2))
 	// result: (ADDLmodify [off1+off2] {sym} base val mem)
 	for {
-		off1 := v.AuxInt
-		sym := v.Aux
+		off1 := auxIntToInt32(v.AuxInt)
+		sym := auxToSym(v.Aux)
 		if v_0.Op != OpAMD64ADDQconst {
 			break
 		}
-		off2 := v_0.AuxInt
+		off2 := auxIntToInt32(v_0.AuxInt)
 		base := v_0.Args[0]
 		val := v_1
 		mem := v_2
-		if !(is32Bit(off1 + off2)) {
+		if !(is32Bit(int64(off1) + int64(off2))) {
 			break
 		}
 		v.reset(OpAMD64ADDLmodify)
-		v.AuxInt = off1 + off2
-		v.Aux = sym
+		v.AuxInt = int32ToAuxInt(off1 + off2)
+		v.Aux = symToAux(sym)
 		v.AddArg3(base, val, mem)
 		return true
 	}
@@ -1858,39 +1858,35 @@
 	v_0 := v.Args[0]
 	// match: (ADDQ x (MOVQconst [c]))
 	// cond: is32Bit(c)
-	// result: (ADDQconst [c] x)
+	// result: (ADDQconst [int32(c)] x)
 	for {
 		for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
 			x := v_0
 			if v_1.Op != OpAMD64MOVQconst {
 				continue
 			}
-			c := v_1.AuxInt
+			c := auxIntToInt64(v_1.AuxInt)
 			if !(is32Bit(c)) {
 				continue
 			}
 			v.reset(OpAMD64ADDQconst)
-			v.AuxInt = c
+			v.AuxInt = int32ToAuxInt(int32(c))
 			v.AddArg(x)
 			return true
 		}
 		break
 	}
 	// match: (ADDQ x (MOVLconst [c]))
-	// cond: is32Bit(c)
-	// result: (ADDQconst [int64(int32(c))] x)
+	// result: (ADDQconst [c] x)
 	for {
 		for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
 			x := v_0
 			if v_1.Op != OpAMD64MOVLconst {
 				continue
 			}
-			c := v_1.AuxInt
-			if !(is32Bit(c)) {
-				continue
-			}
+			c := auxIntToInt32(v_1.AuxInt)
 			v.reset(OpAMD64ADDQconst)
-			v.AuxInt = int64(int32(c))
+			v.AuxInt = int32ToAuxInt(c)
 			v.AddArg(x)
 			return true
 		}
@@ -1904,17 +1900,17 @@
 			if v_0.Op != OpAMD64SHLQconst {
 				continue
 			}
-			c := v_0.AuxInt
+			c := auxIntToInt8(v_0.AuxInt)
 			x := v_0.Args[0]
 			if v_1.Op != OpAMD64SHRQconst {
 				continue
 			}
-			d := v_1.AuxInt
+			d := auxIntToInt8(v_1.AuxInt)
 			if x != v_1.Args[0] || !(d == 64-c) {
 				continue
 			}
 			v.reset(OpAMD64ROLQconst)
-			v.AuxInt = c
+			v.AuxInt = int8ToAuxInt(c)
 			v.AddArg(x)
 			return true
 		}
@@ -1925,7 +1921,7 @@
 	for {
 		for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
 			x := v_0
-			if v_1.Op != OpAMD64SHLQconst || v_1.AuxInt != 3 {
+			if v_1.Op != OpAMD64SHLQconst || auxIntToInt8(v_1.AuxInt) != 3 {
 				continue
 			}
 			y := v_1.Args[0]
@@ -1940,7 +1936,7 @@
 	for {
 		for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
 			x := v_0
-			if v_1.Op != OpAMD64SHLQconst || v_1.AuxInt != 2 {
+			if v_1.Op != OpAMD64SHLQconst || auxIntToInt8(v_1.AuxInt) != 2 {
 				continue
 			}
 			y := v_1.Args[0]
@@ -1955,7 +1951,7 @@
 	for {
 		for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
 			x := v_0
-			if v_1.Op != OpAMD64SHLQconst || v_1.AuxInt != 1 {
+			if v_1.Op != OpAMD64SHLQconst || auxIntToInt8(v_1.AuxInt) != 1 {
 				continue
 			}
 			y := v_1.Args[0]
@@ -2013,11 +2009,11 @@
 			if v_0.Op != OpAMD64ADDQconst {
 				continue
 			}
-			c := v_0.AuxInt
+			c := auxIntToInt32(v_0.AuxInt)
 			x := v_0.Args[0]
 			y := v_1
 			v.reset(OpAMD64LEAQ1)
-			v.AuxInt = c
+			v.AuxInt = int32ToAuxInt(c)
 			v.AddArg2(x, y)
 			return true
 		}
@@ -2032,15 +2028,15 @@
 			if v_1.Op != OpAMD64LEAQ {
 				continue
 			}
-			c := v_1.AuxInt
-			s := v_1.Aux
+			c := auxIntToInt32(v_1.AuxInt)
+			s := auxToSym(v_1.Aux)
 			y := v_1.Args[0]
 			if !(x.Op != OpSB && y.Op != OpSB) {
 				continue
 			}
 			v.reset(OpAMD64LEAQ1)
-			v.AuxInt = c
-			v.Aux = s
+			v.AuxInt = int32ToAuxInt(c)
+			v.Aux = symToAux(s)
 			v.AddArg2(x, y)
 			return true
 		}
@@ -2118,131 +2114,131 @@
 	// match: (ADDQconst [c] (ADDQ x y))
 	// result: (LEAQ1 [c] x y)
 	for {
-		c := v.AuxInt
+		c := auxIntToInt32(v.AuxInt)
 		if v_0.Op != OpAMD64ADDQ {
 			break
 		}
 		y := v_0.Args[1]
 		x := v_0.Args[0]
 		v.reset(OpAMD64LEAQ1)
-		v.AuxInt = c
+		v.AuxInt = int32ToAuxInt(c)
 		v.AddArg2(x, y)
 		return true
 	}
 	// match: (ADDQconst [c] (SHLQconst [1] x))
 	// result: (LEAQ1 [c] x x)
 	for {
-		c := v.AuxInt
-		if v_0.Op != OpAMD64SHLQconst || v_0.AuxInt != 1 {
+		c := auxIntToInt32(v.AuxInt)
+		if v_0.Op != OpAMD64SHLQconst || auxIntToInt8(v_0.AuxInt) != 1 {
 			break
 		}
 		x := v_0.Args[0]
 		v.reset(OpAMD64LEAQ1)
-		v.AuxInt = c
+		v.AuxInt = int32ToAuxInt(c)
 		v.AddArg2(x, x)
 		return true
 	}
 	// match: (ADDQconst [c] (LEAQ [d] {s} x))
-	// cond: is32Bit(c+d)
+	// cond: is32Bit(int64(c)+int64(d))
 	// result: (LEAQ [c+d] {s} x)
 	for {
-		c := v.AuxInt
+		c := auxIntToInt32(v.AuxInt)
 		if v_0.Op != OpAMD64LEAQ {
 			break
 		}
-		d := v_0.AuxInt
-		s := v_0.Aux
+		d := auxIntToInt32(v_0.AuxInt)
+		s := auxToSym(v_0.Aux)
 		x := v_0.Args[0]
-		if !(is32Bit(c + d)) {
+		if !(is32Bit(int64(c) + int64(d))) {
 			break
 		}
 		v.reset(OpAMD64LEAQ)
-		v.AuxInt = c + d
-		v.Aux = s
+		v.AuxInt = int32ToAuxInt(c + d)
+		v.Aux = symToAux(s)
 		v.AddArg(x)
 		return true
 	}
 	// match: (ADDQconst [c] (LEAQ1 [d] {s} x y))
-	// cond: is32Bit(c+d)
+	// cond: is32Bit(int64(c)+int64(d))
 	// result: (LEAQ1 [c+d] {s} x y)
 	for {
-		c := v.AuxInt
+		c := auxIntToInt32(v.AuxInt)
 		if v_0.Op != OpAMD64LEAQ1 {
 			break
 		}
-		d := v_0.AuxInt
-		s := v_0.Aux
+		d := auxIntToInt32(v_0.AuxInt)
+		s := auxToSym(v_0.Aux)
 		y := v_0.Args[1]
 		x := v_0.Args[0]
-		if !(is32Bit(c + d)) {
+		if !(is32Bit(int64(c) + int64(d))) {
 			break
 		}
 		v.reset(OpAMD64LEAQ1)
-		v.AuxInt = c + d
-		v.Aux = s
+		v.AuxInt = int32ToAuxInt(c + d)
+		v.Aux = symToAux(s)
 		v.AddArg2(x, y)
 		return true
 	}
 	// match: (ADDQconst [c] (LEAQ2 [d] {s} x y))
-	// cond: is32Bit(c+d)
+	// cond: is32Bit(int64(c)+int64(d))
 	// result: (LEAQ2 [c+d] {s} x y)
 	for {
-		c := v.AuxInt
+		c := auxIntToInt32(v.AuxInt)
 		if v_0.Op != OpAMD64LEAQ2 {
 			break
 		}
-		d := v_0.AuxInt
-		s := v_0.Aux
+		d := auxIntToInt32(v_0.AuxInt)
+		s := auxToSym(v_0.Aux)
 		y := v_0.Args[1]
 		x := v_0.Args[0]
-		if !(is32Bit(c + d)) {
+		if !(is32Bit(int64(c) + int64(d))) {
 			break
 		}
 		v.reset(OpAMD64LEAQ2)
-		v.AuxInt = c + d
-		v.Aux = s
+		v.AuxInt = int32ToAuxInt(c + d)
+		v.Aux = symToAux(s)
 		v.AddArg2(x, y)
 		return true
 	}
 	// match: (ADDQconst [c] (LEAQ4 [d] {s} x y))
-	// cond: is32Bit(c+d)
+	// cond: is32Bit(int64(c)+int64(d))
 	// result: (LEAQ4 [c+d] {s} x y)
 	for {
-		c := v.AuxInt
+		c := auxIntToInt32(v.AuxInt)
 		if v_0.Op != OpAMD64LEAQ4 {
 			break
 		}
-		d := v_0.AuxInt
-		s := v_0.Aux
+		d := auxIntToInt32(v_0.AuxInt)
+		s := auxToSym(v_0.Aux)
 		y := v_0.Args[1]
 		x := v_0.Args[0]
-		if !(is32Bit(c + d)) {
+		if !(is32Bit(int64(c) + int64(d))) {
 			break
 		}
 		v.reset(OpAMD64LEAQ4)
-		v.AuxInt = c + d
-		v.Aux = s
+		v.AuxInt = int32ToAuxInt(c + d)
+		v.Aux = symToAux(s)
 		v.AddArg2(x, y)
 		return true
 	}
 	// match: (ADDQconst [c] (LEAQ8 [d] {s} x y))
-	// cond: is32Bit(c+d)
+	// cond: is32Bit(int64(c)+int64(d))
 	// result: (LEAQ8 [c+d] {s} x y)
 	for {
-		c := v.AuxInt
+		c := auxIntToInt32(v.AuxInt)
 		if v_0.Op != OpAMD64LEAQ8 {
 			break
 		}
-		d := v_0.AuxInt
-		s := v_0.Aux
+		d := auxIntToInt32(v_0.AuxInt)
+		s := auxToSym(v_0.Aux)
 		y := v_0.Args[1]
 		x := v_0.Args[0]
-		if !(is32Bit(c + d)) {
+		if !(is32Bit(int64(c) + int64(d))) {
 			break
 		}
 		v.reset(OpAMD64LEAQ8)
-		v.AuxInt = c + d
-		v.Aux = s
+		v.AuxInt = int32ToAuxInt(c + d)
+		v.Aux = symToAux(s)
 		v.AddArg2(x, y)
 		return true
 	}
@@ -2305,23 +2301,23 @@
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
 	// match: (ADDQconstmodify [valoff1] {sym} (ADDQconst [off2] base) mem)
-	// cond: ValAndOff(valoff1).canAdd(off2)
-	// result: (ADDQconstmodify [ValAndOff(valoff1).add(off2)] {sym} base mem)
+	// cond: ValAndOff(valoff1).canAdd32(off2)
+	// result: (ADDQconstmodify [ValAndOff(valoff1).addOffset32(off2)] {sym} base mem)
 	for {
-		valoff1 := v.AuxInt
-		sym := v.Aux
+		valoff1 := auxIntToValAndOff(v.AuxInt)
+		sym := auxToSym(v.Aux)
 		if v_0.Op != OpAMD64ADDQconst {
 			break
 		}
-		off2 := v_0.AuxInt
+		off2 := auxIntToInt32(v_0.AuxInt)
 		base := v_0.Args[0]
 		mem := v_1
-		if !(ValAndOff(valoff1).canAdd(off2)) {
+		if !(ValAndOff(valoff1).canAdd32(off2)) {
 			break
 		}
 		v.reset(OpAMD64ADDQconstmodify)
-		v.AuxInt = ValAndOff(valoff1).add(off2)
-		v.Aux = sym
+		v.AuxInt = valAndOffToAuxInt(ValAndOff(valoff1).addOffset32(off2))
+		v.Aux = symToAux(sym)
 		v.AddArg2(base, mem)
 		return true
 	}
@@ -2356,24 +2352,24 @@
 	b := v.Block
 	typ := &b.Func.Config.Types
 	// match: (ADDQload [off1] {sym} val (ADDQconst [off2] base) mem)
-	// cond: is32Bit(off1+off2)
+	// cond: is32Bit(int64(off1)+int64(off2))
 	// result: (ADDQload [off1+off2] {sym} val base mem)
 	for {
-		off1 := v.AuxInt
-		sym := v.Aux
+		off1 := auxIntToInt32(v.AuxInt)
+		sym := auxToSym(v.Aux)
 		val := v_0
 		if v_1.Op != OpAMD64ADDQconst {
 			break
 		}
-		off2 := v_1.AuxInt
+		off2 := auxIntToInt32(v_1.AuxInt)
 		base := v_1.Args[0]
 		mem := v_2
-		if !(is32Bit(off1 + off2)) {
+		if !(is32Bit(int64(off1) + int64(off2))) {
 			break
 		}
 		v.reset(OpAMD64ADDQload)
-		v.AuxInt = off1 + off2
-		v.Aux = sym
+		v.AuxInt = int32ToAuxInt(off1 + off2)
+		v.Aux = symToAux(sym)
 		v.AddArg3(val, base, mem)
 		return true
 	}
@@ -2427,24 +2423,24 @@
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
 	// match: (ADDQmodify [off1] {sym} (ADDQconst [off2] base) val mem)
-	// cond: is32Bit(off1+off2)
+	// cond: is32Bit(int64(off1)+int64(off2))
 	// result: (ADDQmodify [off1+off2] {sym} base val mem)
 	for {
-		off1 := v.AuxInt
-		sym := v.Aux
+		off1 := auxIntToInt32(v.AuxInt)
+		sym := auxToSym(v.Aux)
 		if v_0.Op != OpAMD64ADDQconst {
 			break
 		}
-		off2 := v_0.AuxInt
+		off2 := auxIntToInt32(v_0.AuxInt)
 		base := v_0.Args[0]
 		val := v_1
 		mem := v_2
-		if !(is32Bit(off1 + off2)) {
+		if !(is32Bit(int64(off1) + int64(off2))) {
 			break
 		}
 		v.reset(OpAMD64ADDQmodify)
-		v.AuxInt = off1 + off2
-		v.Aux = sym
+		v.AuxInt = int32ToAuxInt(off1 + off2)
+		v.Aux = symToAux(sym)
 		v.AddArg3(base, val, mem)
 		return true
 	}
@@ -2510,24 +2506,24 @@
 	b := v.Block
 	typ := &b.Func.Config.Types
 	// match: (ADDSDload [off1] {sym} val (ADDQconst [off2] base) mem)
-	// cond: is32Bit(off1+off2)
+	// cond: is32Bit(int64(off1)+int64(off2))
 	// result: (ADDSDload [off1+off2] {sym} val base mem)
 	for {
-		off1 := v.AuxInt
-		sym := v.Aux
+		off1 := auxIntToInt32(v.AuxInt)
+		sym := auxToSym(v.Aux)
 		val := v_0
 		if v_1.Op != OpAMD64ADDQconst {
 			break
 		}
-		off2 := v_1.AuxInt
+		off2 := auxIntToInt32(v_1.AuxInt)
 		base := v_1.Args[0]
 		mem := v_2
-		if !(is32Bit(off1 + off2)) {
+		if !(is32Bit(int64(off1) + int64(off2))) {
 			break
 		}
 		v.reset(OpAMD64ADDSDload)
-		v.AuxInt = off1 + off2
-		v.Aux = sym
+		v.AuxInt = int32ToAuxInt(off1 + off2)
+		v.Aux = symToAux(sym)
 		v.AddArg3(val, base, mem)
 		return true
 	}
@@ -2613,24 +2609,24 @@
 	b := v.Block
 	typ := &b.Func.Config.Types
 	// match: (ADDSSload [off1] {sym} val (ADDQconst [off2] base) mem)
-	// cond: is32Bit(off1+off2)
+	// cond: is32Bit(int64(off1)+int64(off2))
 	// result: (ADDSSload [off1+off2] {sym} val base mem)
 	for {
-		off1 := v.AuxInt
-		sym := v.Aux
+		off1 := auxIntToInt32(v.AuxInt)
+		sym := auxToSym(v.Aux)
 		val := v_0
 		if v_1.Op != OpAMD64ADDQconst {
 			break
 		}
-		off2 := v_1.AuxInt
+		off2 := auxIntToInt32(v_1.AuxInt)
 		base := v_1.Args[0]
 		mem := v_2
-		if !(is32Bit(off1 + off2)) {
+		if !(is32Bit(int64(off1) + int64(off2))) {
 			break
 		}
 		v.reset(OpAMD64ADDSSload)
-		v.AuxInt = off1 + off2
-		v.Aux = sym
+		v.AuxInt = int32ToAuxInt(off1 + off2)
+		v.Aux = symToAux(sym)
 		v.AddArg3(val, base, mem)
 		return true
 	}
@@ -2695,7 +2691,7 @@
 			}
 			y := v_0_0.Args[1]
 			v_0_0_0 := v_0_0.Args[0]
-			if v_0_0_0.Op != OpAMD64MOVLconst || v_0_0_0.AuxInt != 1 {
+			if v_0_0_0.Op != OpAMD64MOVLconst || auxIntToInt32(v_0_0_0.AuxInt) != 1 {
 				continue
 			}
 			x := v_1
@@ -2706,20 +2702,20 @@
 		break
 	}
 	// match: (ANDL (MOVLconst [c]) x)
-	// cond: isUint32PowerOfTwo(^c) && uint64(^c) >= 128
-	// result: (BTRLconst [log2uint32(^c)] x)
+	// cond: isUint32PowerOfTwo(int64(^c)) && uint64(^c) >= 128
+	// result: (BTRLconst [int8(log32(^c))] x)
 	for {
 		for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
 			if v_0.Op != OpAMD64MOVLconst {
 				continue
 			}
-			c := v_0.AuxInt
+			c := auxIntToInt32(v_0.AuxInt)
 			x := v_1
-			if !(isUint32PowerOfTwo(^c) && uint64(^c) >= 128) {
+			if !(isUint32PowerOfTwo(int64(^c)) && uint64(^c) >= 128) {
 				continue
 			}
 			v.reset(OpAMD64BTRLconst)
-			v.AuxInt = log2uint32(^c)
+			v.AuxInt = int8ToAuxInt(int8(log32(^c)))
 			v.AddArg(x)
 			return true
 		}
@@ -2733,9 +2729,9 @@
 			if v_1.Op != OpAMD64MOVLconst {
 				continue
 			}
-			c := v_1.AuxInt
+			c := auxIntToInt32(v_1.AuxInt)
 			v.reset(OpAMD64ANDLconst)
-			v.AuxInt = c
+			v.AuxInt = int32ToAuxInt(c)
 			v.AddArg(x)
 			return true
 		}
@@ -2781,16 +2777,16 @@
 func rewriteValueAMD64_OpAMD64ANDLconst(v *Value) bool {
 	v_0 := v.Args[0]
 	// match: (ANDLconst [c] x)
-	// cond: isUint32PowerOfTwo(^c) && uint64(^c) >= 128
-	// result: (BTRLconst [log2uint32(^c)] x)
+	// cond: isUint32PowerOfTwo(int64(^c)) && uint64(^c) >= 128
+	// result: (BTRLconst [int8(log32(^c))] x)
 	for {
-		c := v.AuxInt
+		c := auxIntToInt32(v.AuxInt)
 		x := v_0
-		if !(isUint32PowerOfTwo(^c) && uint64(^c) >= 128) {
+		if !(isUint32PowerOfTwo(int64(^c)) && uint64(^c) >= 128) {
 			break
 		}
 		v.reset(OpAMD64BTRLconst)
-		v.AuxInt = log2uint32(^c)
+		v.AuxInt = int8ToAuxInt(int8(log32(^c)))
 		v.AddArg(x)
 		return true
 	}
@@ -2825,7 +2821,7 @@
 	// match: (ANDLconst [ 0xFF] x)
 	// result: (MOVBQZX x)
 	for {
-		if v.AuxInt != 0xFF {
+		if auxIntToInt32(v.AuxInt) != 0xFF {
 			break
 		}
 		x := v_0
@@ -2836,7 +2832,7 @@
 	// match: (ANDLconst [0xFFFF] x)
 	// result: (MOVWQZX x)
 	for {
-		if v.AuxInt != 0xFFFF {
+		if auxIntToInt32(v.AuxInt) != 0xFFFF {
 			break
 		}
 		x := v_0
@@ -2886,23 +2882,23 @@
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
 	// match: (ANDLconstmodify [valoff1] {sym} (ADDQconst [off2] base) mem)
-	// cond: ValAndOff(valoff1).canAdd(off2)
-	// result: (ANDLconstmodify [ValAndOff(valoff1).add(off2)] {sym} base mem)
+	// cond: ValAndOff(valoff1).canAdd32(off2)
+	// result: (ANDLconstmodify [ValAndOff(valoff1).addOffset32(off2)] {sym} base mem)
 	for {
-		valoff1 := v.AuxInt
-		sym := v.Aux
+		valoff1 := auxIntToValAndOff(v.AuxInt)
+		sym := auxToSym(v.Aux)
 		if v_0.Op != OpAMD64ADDQconst {
 			break
 		}
-		off2 := v_0.AuxInt
+		off2 := auxIntToInt32(v_0.AuxInt)
 		base := v_0.Args[0]
 		mem := v_1
-		if !(ValAndOff(valoff1).canAdd(off2)) {
+		if !(ValAndOff(valoff1).canAdd32(off2)) {
 			break
 		}
 		v.reset(OpAMD64ANDLconstmodify)
-		v.AuxInt = ValAndOff(valoff1).add(off2)
-		v.Aux = sym
+		v.AuxInt = valAndOffToAuxInt(ValAndOff(valoff1).addOffset32(off2))
+		v.Aux = symToAux(sym)
 		v.AddArg2(base, mem)
 		return true
 	}
@@ -2937,24 +2933,24 @@
 	b := v.Block
 	typ := &b.Func.Config.Types
 	// match: (ANDLload [off1] {sym} val (ADDQconst [off2] base) mem)
-	// cond: is32Bit(off1+off2)
+	// cond: is32Bit(int64(off1)+int64(off2))
 	// result: (ANDLload [off1+off2] {sym} val base mem)
 	for {
-		off1 := v.AuxInt
-		sym := v.Aux
+		off1 := auxIntToInt32(v.AuxInt)
+		sym := auxToSym(v.Aux)
 		val := v_0
 		if v_1.Op != OpAMD64ADDQconst {
 			break
 		}
-		off2 := v_1.AuxInt
+		off2 := auxIntToInt32(v_1.AuxInt)
 		base := v_1.Args[0]
 		mem := v_2
-		if !(is32Bit(off1 + off2)) {
+		if !(is32Bit(int64(off1) + int64(off2))) {
 			break
 		}
 		v.reset(OpAMD64ANDLload)
-		v.AuxInt = off1 + off2
-		v.Aux = sym
+		v.AuxInt = int32ToAuxInt(off1 + off2)
+		v.Aux = symToAux(sym)
 		v.AddArg3(val, base, mem)
 		return true
 	}
@@ -3008,24 +3004,24 @@
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
 	// match: (ANDLmodify [off1] {sym} (ADDQconst [off2] base) val mem)
-	// cond: is32Bit(off1+off2)
+	// cond: is32Bit(int64(off1)+int64(off2))
 	// result: (ANDLmodify [off1+off2] {sym} base val mem)
 	for {
-		off1 := v.AuxInt
-		sym := v.Aux
+		off1 := auxIntToInt32(v.AuxInt)
+		sym := auxToSym(v.Aux)
 		if v_0.Op != OpAMD64ADDQconst {
 			break
 		}
-		off2 := v_0.AuxInt
+		off2 := auxIntToInt32(v_0.AuxInt)
 		base := v_0.Args[0]
 		val := v_1
 		mem := v_2
-		if !(is32Bit(off1 + off2)) {
+		if !(is32Bit(int64(off1) + int64(off2))) {
 			break
 		}
 		v.reset(OpAMD64ANDLmodify)
-		v.AuxInt = off1 + off2
-		v.Aux = sym
+		v.AuxInt = int32ToAuxInt(off1 + off2)
+		v.Aux = symToAux(sym)
 		v.AddArg3(base, val, mem)
 		return true
 	}
@@ -3070,7 +3066,7 @@
 			}
 			y := v_0_0.Args[1]
 			v_0_0_0 := v_0_0.Args[0]
-			if v_0_0_0.Op != OpAMD64MOVQconst || v_0_0_0.AuxInt != 1 {
+			if v_0_0_0.Op != OpAMD64MOVQconst || auxIntToInt64(v_0_0_0.AuxInt) != 1 {
 				continue
 			}
 			x := v_1
@@ -3082,19 +3078,19 @@
 	}
 	// match: (ANDQ (MOVQconst [c]) x)
 	// cond: isUint64PowerOfTwo(^c) && uint64(^c) >= 128
-	// result: (BTRQconst [log2(^c)] x)
+	// result: (BTRQconst [int8(log2(^c))] x)
 	for {
 		for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
 			if v_0.Op != OpAMD64MOVQconst {
 				continue
 			}
-			c := v_0.AuxInt
+			c := auxIntToInt64(v_0.AuxInt)
 			x := v_1
 			if !(isUint64PowerOfTwo(^c) && uint64(^c) >= 128) {
 				continue
 			}
 			v.reset(OpAMD64BTRQconst)
-			v.AuxInt = log2(^c)
+			v.AuxInt = int8ToAuxInt(int8(log2(^c)))
 			v.AddArg(x)
 			return true
 		}
@@ -3102,19 +3098,19 @@
 	}
 	// match: (ANDQ x (MOVQconst [c]))
 	// cond: is32Bit(c)
-	// result: (ANDQconst [c] x)
+	// result: (ANDQconst [int32(c)] x)
 	for {
 		for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
 			x := v_0
 			if v_1.Op != OpAMD64MOVQconst {
 				continue
 			}
-			c := v_1.AuxInt
+			c := auxIntToInt64(v_1.AuxInt)
 			if !(is32Bit(c)) {
 				continue
 			}
 			v.reset(OpAMD64ANDQconst)
-			v.AuxInt = c
+			v.AuxInt = int32ToAuxInt(int32(c))
 			v.AddArg(x)
 			return true
 		}
@@ -3160,16 +3156,16 @@
 func rewriteValueAMD64_OpAMD64ANDQconst(v *Value) bool {
 	v_0 := v.Args[0]
 	// match: (ANDQconst [c] x)
-	// cond: isUint64PowerOfTwo(^c) && uint64(^c) >= 128
-	// result: (BTRQconst [log2(^c)] x)
+	// cond: isUint64PowerOfTwo(int64(^c)) && uint64(^c) >= 128
+	// result: (BTRQconst [int8(log32(^c))] x)
 	for {
-		c := v.AuxInt
+		c := auxIntToInt32(v.AuxInt)
 		x := v_0
-		if !(isUint64PowerOfTwo(^c) && uint64(^c) >= 128) {
+		if !(isUint64PowerOfTwo(int64(^c)) && uint64(^c) >= 128) {
 			break
 		}
 		v.reset(OpAMD64BTRQconst)
-		v.AuxInt = log2(^c)
+		v.AuxInt = int8ToAuxInt(int8(log32(^c)))
 		v.AddArg(x)
 		return true
 	}
@@ -3208,7 +3204,7 @@
 	// match: (ANDQconst [ 0xFF] x)
 	// result: (MOVBQZX x)
 	for {
-		if v.AuxInt != 0xFF {
+		if auxIntToInt32(v.AuxInt) != 0xFF {
 			break
 		}
 		x := v_0
@@ -3219,7 +3215,7 @@
 	// match: (ANDQconst [0xFFFF] x)
 	// result: (MOVWQZX x)
 	for {
-		if v.AuxInt != 0xFFFF {
+		if auxIntToInt32(v.AuxInt) != 0xFFFF {
 			break
 		}
 		x := v_0
@@ -3227,17 +3223,6 @@
 		v.AddArg(x)
 		return true
 	}
-	// match: (ANDQconst [0xFFFFFFFF] x)
-	// result: (MOVLQZX x)
-	for {
-		if v.AuxInt != 0xFFFFFFFF {
-			break
-		}
-		x := v_0
-		v.reset(OpAMD64MOVLQZX)
-		v.AddArg(x)
-		return true
-	}
 	// match: (ANDQconst [0] _)
 	// result: (MOVQconst [0])
 	for {
@@ -3276,23 +3261,23 @@
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
 	// match: (ANDQconstmodify [valoff1] {sym} (ADDQconst [off2] base) mem)
-	// cond: ValAndOff(valoff1).canAdd(off2)
-	// result: (ANDQconstmodify [ValAndOff(valoff1).add(off2)] {sym} base mem)
+	// cond: ValAndOff(valoff1).canAdd32(off2)
+	// result: (ANDQconstmodify [ValAndOff(valoff1).addOffset32(off2)] {sym} base mem)
 	for {
-		valoff1 := v.AuxInt
-		sym := v.Aux
+		valoff1 := auxIntToValAndOff(v.AuxInt)
+		sym := auxToSym(v.Aux)
 		if v_0.Op != OpAMD64ADDQconst {
 			break
 		}
-		off2 := v_0.AuxInt
+		off2 := auxIntToInt32(v_0.AuxInt)
 		base := v_0.Args[0]
 		mem := v_1
-		if !(ValAndOff(valoff1).canAdd(off2)) {
+		if !(ValAndOff(valoff1).canAdd32(off2)) {
 			break
 		}
 		v.reset(OpAMD64ANDQconstmodify)
-		v.AuxInt = ValAndOff(valoff1).add(off2)
-		v.Aux = sym
+		v.AuxInt = valAndOffToAuxInt(ValAndOff(valoff1).addOffset32(off2))
+		v.Aux = symToAux(sym)
 		v.AddArg2(base, mem)
 		return true
 	}
@@ -3327,24 +3312,24 @@
 	b := v.Block
 	typ := &b.Func.Config.Types
 	// match: (ANDQload [off1] {sym} val (ADDQconst [off2] base) mem)
-	// cond: is32Bit(off1+off2)
+	// cond: is32Bit(int64(off1)+int64(off2))
 	// result: (ANDQload [off1+off2] {sym} val base mem)
 	for {
-		off1 := v.AuxInt
-		sym := v.Aux
+		off1 := auxIntToInt32(v.AuxInt)
+		sym := auxToSym(v.Aux)
 		val := v_0
 		if v_1.Op != OpAMD64ADDQconst {
 			break
 		}
-		off2 := v_1.AuxInt
+		off2 := auxIntToInt32(v_1.AuxInt)
 		base := v_1.Args[0]
 		mem := v_2
-		if !(is32Bit(off1 + off2)) {
+		if !(is32Bit(int64(off1) + int64(off2))) {
 			break
 		}
 		v.reset(OpAMD64ANDQload)
-		v.AuxInt = off1 + off2
-		v.Aux = sym
+		v.AuxInt = int32ToAuxInt(off1 + off2)
+		v.Aux = symToAux(sym)
 		v.AddArg3(val, base, mem)
 		return true
 	}
@@ -3398,24 +3383,24 @@
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
 	// match: (ANDQmodify [off1] {sym} (ADDQconst [off2] base) val mem)
-	// cond: is32Bit(off1+off2)
+	// cond: is32Bit(int64(off1)+int64(off2))
 	// result: (ANDQmodify [off1+off2] {sym} base val mem)
 	for {
-		off1 := v.AuxInt
-		sym := v.Aux
+		off1 := auxIntToInt32(v.AuxInt)
+		sym := auxToSym(v.Aux)
 		if v_0.Op != OpAMD64ADDQconst {
 			break
 		}
-		off2 := v_0.AuxInt
+		off2 := auxIntToInt32(v_0.AuxInt)
 		base := v_0.Args[0]
 		val := v_1
 		mem := v_2
-		if !(is32Bit(off1 + off2)) {
+		if !(is32Bit(int64(off1) + int64(off2))) {
 			break
 		}
 		v.reset(OpAMD64ANDQmodify)
-		v.AuxInt = off1 + off2
-		v.Aux = sym
+		v.AuxInt = int32ToAuxInt(off1 + off2)
+		v.Aux = symToAux(sym)
 		v.AddArg3(base, val, mem)
 		return true
 	}
@@ -3541,23 +3526,23 @@
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
 	// match: (BTCLconstmodify [valoff1] {sym} (ADDQconst [off2] base) mem)
-	// cond: ValAndOff(valoff1).canAdd(off2)
-	// result: (BTCLconstmodify [ValAndOff(valoff1).add(off2)] {sym} base mem)
+	// cond: ValAndOff(valoff1).canAdd32(off2)
+	// result: (BTCLconstmodify [ValAndOff(valoff1).addOffset32(off2)] {sym} base mem)
 	for {
-		valoff1 := v.AuxInt
-		sym := v.Aux
+		valoff1 := auxIntToValAndOff(v.AuxInt)
+		sym := auxToSym(v.Aux)
 		if v_0.Op != OpAMD64ADDQconst {
 			break
 		}
-		off2 := v_0.AuxInt
+		off2 := auxIntToInt32(v_0.AuxInt)
 		base := v_0.Args[0]
 		mem := v_1
-		if !(ValAndOff(valoff1).canAdd(off2)) {
+		if !(ValAndOff(valoff1).canAdd32(off2)) {
 			break
 		}
 		v.reset(OpAMD64BTCLconstmodify)
-		v.AuxInt = ValAndOff(valoff1).add(off2)
-		v.Aux = sym
+		v.AuxInt = valAndOffToAuxInt(ValAndOff(valoff1).addOffset32(off2))
+		v.Aux = symToAux(sym)
 		v.AddArg2(base, mem)
 		return true
 	}
@@ -3590,24 +3575,24 @@
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
 	// match: (BTCLmodify [off1] {sym} (ADDQconst [off2] base) val mem)
-	// cond: is32Bit(off1+off2)
+	// cond: is32Bit(int64(off1)+int64(off2))
 	// result: (BTCLmodify [off1+off2] {sym} base val mem)
 	for {
-		off1 := v.AuxInt
-		sym := v.Aux
+		off1 := auxIntToInt32(v.AuxInt)
+		sym := auxToSym(v.Aux)
 		if v_0.Op != OpAMD64ADDQconst {
 			break
 		}
-		off2 := v_0.AuxInt
+		off2 := auxIntToInt32(v_0.AuxInt)
 		base := v_0.Args[0]
 		val := v_1
 		mem := v_2
-		if !(is32Bit(off1 + off2)) {
+		if !(is32Bit(int64(off1) + int64(off2))) {
 			break
 		}
 		v.reset(OpAMD64BTCLmodify)
-		v.AuxInt = off1 + off2
-		v.Aux = sym
+		v.AuxInt = int32ToAuxInt(off1 + off2)
+		v.Aux = symToAux(sym)
 		v.AddArg3(base, val, mem)
 		return true
 	}
@@ -3692,23 +3677,23 @@
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
 	// match: (BTCQconstmodify [valoff1] {sym} (ADDQconst [off2] base) mem)
-	// cond: ValAndOff(valoff1).canAdd(off2)
-	// result: (BTCQconstmodify [ValAndOff(valoff1).add(off2)] {sym} base mem)
+	// cond: ValAndOff(valoff1).canAdd32(off2)
+	// result: (BTCQconstmodify [ValAndOff(valoff1).addOffset32(off2)] {sym} base mem)
 	for {
-		valoff1 := v.AuxInt
-		sym := v.Aux
+		valoff1 := auxIntToValAndOff(v.AuxInt)
+		sym := auxToSym(v.Aux)
 		if v_0.Op != OpAMD64ADDQconst {
 			break
 		}
-		off2 := v_0.AuxInt
+		off2 := auxIntToInt32(v_0.AuxInt)
 		base := v_0.Args[0]
 		mem := v_1
-		if !(ValAndOff(valoff1).canAdd(off2)) {
+		if !(ValAndOff(valoff1).canAdd32(off2)) {
 			break
 		}
 		v.reset(OpAMD64BTCQconstmodify)
-		v.AuxInt = ValAndOff(valoff1).add(off2)
-		v.Aux = sym
+		v.AuxInt = valAndOffToAuxInt(ValAndOff(valoff1).addOffset32(off2))
+		v.Aux = symToAux(sym)
 		v.AddArg2(base, mem)
 		return true
 	}
@@ -3741,24 +3726,24 @@
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
 	// match: (BTCQmodify [off1] {sym} (ADDQconst [off2] base) val mem)
-	// cond: is32Bit(off1+off2)
+	// cond: is32Bit(int64(off1)+int64(off2))
 	// result: (BTCQmodify [off1+off2] {sym} base val mem)
 	for {
-		off1 := v.AuxInt
-		sym := v.Aux
+		off1 := auxIntToInt32(v.AuxInt)
+		sym := auxToSym(v.Aux)
 		if v_0.Op != OpAMD64ADDQconst {
 			break
 		}
-		off2 := v_0.AuxInt
+		off2 := auxIntToInt32(v_0.AuxInt)
 		base := v_0.Args[0]
 		val := v_1
 		mem := v_2
-		if !(is32Bit(off1 + off2)) {
+		if !(is32Bit(int64(off1) + int64(off2))) {
 			break
 		}
 		v.reset(OpAMD64BTCQmodify)
-		v.AuxInt = off1 + off2
-		v.Aux = sym
+		v.AuxInt = int32ToAuxInt(off1 + off2)
+		v.Aux = symToAux(sym)
 		v.AddArg3(base, val, mem)
 		return true
 	}
@@ -3793,17 +3778,17 @@
 	// cond: (c+d)<64
 	// result: (BTQconst [c+d] x)
 	for {
-		c := v.AuxInt
+		c := auxIntToInt8(v.AuxInt)
 		if v_0.Op != OpAMD64SHRQconst {
 			break
 		}
-		d := v_0.AuxInt
+		d := auxIntToInt8(v_0.AuxInt)
 		x := v_0.Args[0]
 		if !((c + d) < 64) {
 			break
 		}
 		v.reset(OpAMD64BTQconst)
-		v.AuxInt = c + d
+		v.AuxInt = int8ToAuxInt(c + d)
 		v.AddArg(x)
 		return true
 	}
@@ -3811,24 +3796,24 @@
 	// cond: c>d
 	// result: (BTLconst [c-d] x)
 	for {
-		c := v.AuxInt
+		c := auxIntToInt8(v.AuxInt)
 		if v_0.Op != OpAMD64SHLQconst {
 			break
 		}
-		d := v_0.AuxInt
+		d := auxIntToInt8(v_0.AuxInt)
 		x := v_0.Args[0]
 		if !(c > d) {
 			break
 		}
 		v.reset(OpAMD64BTLconst)
-		v.AuxInt = c - d
+		v.AuxInt = int8ToAuxInt(c - d)
 		v.AddArg(x)
 		return true
 	}
 	// match: (BTLconst [0] s:(SHRQ x y))
 	// result: (BTQ y x)
 	for {
-		if v.AuxInt != 0 {
+		if auxIntToInt8(v.AuxInt) != 0 {
 			break
 		}
 		s := v_0
@@ -3845,17 +3830,17 @@
 	// cond: (c+d)<32
 	// result: (BTLconst [c+d] x)
 	for {
-		c := v.AuxInt
+		c := auxIntToInt8(v.AuxInt)
 		if v_0.Op != OpAMD64SHRLconst {
 			break
 		}
-		d := v_0.AuxInt
+		d := auxIntToInt8(v_0.AuxInt)
 		x := v_0.Args[0]
 		if !((c + d) < 32) {
 			break
 		}
 		v.reset(OpAMD64BTLconst)
-		v.AuxInt = c + d
+		v.AuxInt = int8ToAuxInt(c + d)
 		v.AddArg(x)
 		return true
 	}
@@ -3863,24 +3848,24 @@
 	// cond: c>d
 	// result: (BTLconst [c-d] x)
 	for {
-		c := v.AuxInt
+		c := auxIntToInt8(v.AuxInt)
 		if v_0.Op != OpAMD64SHLLconst {
 			break
 		}
-		d := v_0.AuxInt
+		d := auxIntToInt8(v_0.AuxInt)
 		x := v_0.Args[0]
 		if !(c > d) {
 			break
 		}
 		v.reset(OpAMD64BTLconst)
-		v.AuxInt = c - d
+		v.AuxInt = int8ToAuxInt(c - d)
 		v.AddArg(x)
 		return true
 	}
 	// match: (BTLconst [0] s:(SHRL x y))
 	// result: (BTL y x)
 	for {
-		if v.AuxInt != 0 {
+		if auxIntToInt8(v.AuxInt) != 0 {
 			break
 		}
 		s := v_0
@@ -3901,17 +3886,17 @@
 	// cond: (c+d)<64
 	// result: (BTQconst [c+d] x)
 	for {
-		c := v.AuxInt
+		c := auxIntToInt8(v.AuxInt)
 		if v_0.Op != OpAMD64SHRQconst {
 			break
 		}
-		d := v_0.AuxInt
+		d := auxIntToInt8(v_0.AuxInt)
 		x := v_0.Args[0]
 		if !((c + d) < 64) {
 			break
 		}
 		v.reset(OpAMD64BTQconst)
-		v.AuxInt = c + d
+		v.AuxInt = int8ToAuxInt(c + d)
 		v.AddArg(x)
 		return true
 	}
@@ -3919,24 +3904,24 @@
 	// cond: c>d
 	// result: (BTQconst [c-d] x)
 	for {
-		c := v.AuxInt
+		c := auxIntToInt8(v.AuxInt)
 		if v_0.Op != OpAMD64SHLQconst {
 			break
 		}
-		d := v_0.AuxInt
+		d := auxIntToInt8(v_0.AuxInt)
 		x := v_0.Args[0]
 		if !(c > d) {
 			break
 		}
 		v.reset(OpAMD64BTQconst)
-		v.AuxInt = c - d
+		v.AuxInt = int8ToAuxInt(c - d)
 		v.AddArg(x)
 		return true
 	}
 	// match: (BTQconst [0] s:(SHRQ x y))
 	// result: (BTQ y x)
 	for {
-		if v.AuxInt != 0 {
+		if auxIntToInt8(v.AuxInt) != 0 {
 			break
 		}
 		s := v_0
@@ -3956,26 +3941,26 @@
 	// match: (BTRLconst [c] (BTSLconst [c] x))
 	// result: (BTRLconst [c] x)
 	for {
-		c := v.AuxInt
-		if v_0.Op != OpAMD64BTSLconst || v_0.AuxInt != c {
+		c := auxIntToInt8(v.AuxInt)
+		if v_0.Op != OpAMD64BTSLconst || auxIntToInt8(v_0.AuxInt) != c {
 			break
 		}
 		x := v_0.Args[0]
 		v.reset(OpAMD64BTRLconst)
-		v.AuxInt = c
+		v.AuxInt = int8ToAuxInt(c)
 		v.AddArg(x)
 		return true
 	}
 	// match: (BTRLconst [c] (BTCLconst [c] x))
 	// result: (BTRLconst [c] x)
 	for {
-		c := v.AuxInt
-		if v_0.Op != OpAMD64BTCLconst || v_0.AuxInt != c {
+		c := auxIntToInt8(v.AuxInt)
+		if v_0.Op != OpAMD64BTCLconst || auxIntToInt8(v_0.AuxInt) != c {
 			break
 		}
 		x := v_0.Args[0]
 		v.reset(OpAMD64BTRLconst)
-		v.AuxInt = c
+		v.AuxInt = int8ToAuxInt(c)
 		v.AddArg(x)
 		return true
 	}
@@ -4025,23 +4010,23 @@
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
 	// match: (BTRLconstmodify [valoff1] {sym} (ADDQconst [off2] base) mem)
-	// cond: ValAndOff(valoff1).canAdd(off2)
-	// result: (BTRLconstmodify [ValAndOff(valoff1).add(off2)] {sym} base mem)
+	// cond: ValAndOff(valoff1).canAdd32(off2)
+	// result: (BTRLconstmodify [ValAndOff(valoff1).addOffset32(off2)] {sym} base mem)
 	for {
-		valoff1 := v.AuxInt
-		sym := v.Aux
+		valoff1 := auxIntToValAndOff(v.AuxInt)
+		sym := auxToSym(v.Aux)
 		if v_0.Op != OpAMD64ADDQconst {
 			break
 		}
-		off2 := v_0.AuxInt
+		off2 := auxIntToInt32(v_0.AuxInt)
 		base := v_0.Args[0]
 		mem := v_1
-		if !(ValAndOff(valoff1).canAdd(off2)) {
+		if !(ValAndOff(valoff1).canAdd32(off2)) {
 			break
 		}
 		v.reset(OpAMD64BTRLconstmodify)
-		v.AuxInt = ValAndOff(valoff1).add(off2)
-		v.Aux = sym
+		v.AuxInt = valAndOffToAuxInt(ValAndOff(valoff1).addOffset32(off2))
+		v.Aux = symToAux(sym)
 		v.AddArg2(base, mem)
 		return true
 	}
@@ -4074,24 +4059,24 @@
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
 	// match: (BTRLmodify [off1] {sym} (ADDQconst [off2] base) val mem)
-	// cond: is32Bit(off1+off2)
+	// cond: is32Bit(int64(off1)+int64(off2))
 	// result: (BTRLmodify [off1+off2] {sym} base val mem)
 	for {
-		off1 := v.AuxInt
-		sym := v.Aux
+		off1 := auxIntToInt32(v.AuxInt)
+		sym := auxToSym(v.Aux)
 		if v_0.Op != OpAMD64ADDQconst {
 			break
 		}
-		off2 := v_0.AuxInt
+		off2 := auxIntToInt32(v_0.AuxInt)
 		base := v_0.Args[0]
 		val := v_1
 		mem := v_2
-		if !(is32Bit(off1 + off2)) {
+		if !(is32Bit(int64(off1) + int64(off2))) {
 			break
 		}
 		v.reset(OpAMD64BTRLmodify)
-		v.AuxInt = off1 + off2
-		v.Aux = sym
+		v.AuxInt = int32ToAuxInt(off1 + off2)
+		v.Aux = symToAux(sym)
 		v.AddArg3(base, val, mem)
 		return true
 	}
@@ -4125,26 +4110,26 @@
 	// match: (BTRQconst [c] (BTSQconst [c] x))
 	// result: (BTRQconst [c] x)
 	for {
-		c := v.AuxInt
-		if v_0.Op != OpAMD64BTSQconst || v_0.AuxInt != c {
+		c := auxIntToInt8(v.AuxInt)
+		if v_0.Op != OpAMD64BTSQconst || auxIntToInt8(v_0.AuxInt) != c {
 			break
 		}
 		x := v_0.Args[0]
 		v.reset(OpAMD64BTRQconst)
-		v.AuxInt = c
+		v.AuxInt = int8ToAuxInt(c)
 		v.AddArg(x)
 		return true
 	}
 	// match: (BTRQconst [c] (BTCQconst [c] x))
 	// result: (BTRQconst [c] x)
 	for {
-		c := v.AuxInt
-		if v_0.Op != OpAMD64BTCQconst || v_0.AuxInt != c {
+		c := auxIntToInt8(v.AuxInt)
+		if v_0.Op != OpAMD64BTCQconst || auxIntToInt8(v_0.AuxInt) != c {
 			break
 		}
 		x := v_0.Args[0]
 		v.reset(OpAMD64BTRQconst)
-		v.AuxInt = c
+		v.AuxInt = int8ToAuxInt(c)
 		v.AddArg(x)
 		return true
 	}
@@ -4202,23 +4187,23 @@
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
 	// match: (BTRQconstmodify [valoff1] {sym} (ADDQconst [off2] base) mem)
-	// cond: ValAndOff(valoff1).canAdd(off2)
-	// result: (BTRQconstmodify [ValAndOff(valoff1).add(off2)] {sym} base mem)
+	// cond: ValAndOff(valoff1).canAdd32(off2)
+	// result: (BTRQconstmodify [ValAndOff(valoff1).addOffset32(off2)] {sym} base mem)
 	for {
-		valoff1 := v.AuxInt
-		sym := v.Aux
+		valoff1 := auxIntToValAndOff(v.AuxInt)
+		sym := auxToSym(v.Aux)
 		if v_0.Op != OpAMD64ADDQconst {
 			break
 		}
-		off2 := v_0.AuxInt
+		off2 := auxIntToInt32(v_0.AuxInt)
 		base := v_0.Args[0]
 		mem := v_1
-		if !(ValAndOff(valoff1).canAdd(off2)) {
+		if !(ValAndOff(valoff1).canAdd32(off2)) {
 			break
 		}
 		v.reset(OpAMD64BTRQconstmodify)
-		v.AuxInt = ValAndOff(valoff1).add(off2)
-		v.Aux = sym
+		v.AuxInt = valAndOffToAuxInt(ValAndOff(valoff1).addOffset32(off2))
+		v.Aux = symToAux(sym)
 		v.AddArg2(base, mem)
 		return true
 	}
@@ -4251,24 +4236,24 @@
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
 	// match: (BTRQmodify [off1] {sym} (ADDQconst [off2] base) val mem)
-	// cond: is32Bit(off1+off2)
+	// cond: is32Bit(int64(off1)+int64(off2))
 	// result: (BTRQmodify [off1+off2] {sym} base val mem)
 	for {
-		off1 := v.AuxInt
-		sym := v.Aux
+		off1 := auxIntToInt32(v.AuxInt)
+		sym := auxToSym(v.Aux)
 		if v_0.Op != OpAMD64ADDQconst {
 			break
 		}
-		off2 := v_0.AuxInt
+		off2 := auxIntToInt32(v_0.AuxInt)
 		base := v_0.Args[0]
 		val := v_1
 		mem := v_2
-		if !(is32Bit(off1 + off2)) {
+		if !(is32Bit(int64(off1) + int64(off2))) {
 			break
 		}
 		v.reset(OpAMD64BTRQmodify)
-		v.AuxInt = off1 + off2
-		v.Aux = sym
+		v.AuxInt = int32ToAuxInt(off1 + off2)
+		v.Aux = symToAux(sym)
 		v.AddArg3(base, val, mem)
 		return true
 	}
@@ -4302,26 +4287,26 @@
 	// match: (BTSLconst [c] (BTRLconst [c] x))
 	// result: (BTSLconst [c] x)
 	for {
-		c := v.AuxInt
-		if v_0.Op != OpAMD64BTRLconst || v_0.AuxInt != c {
+		c := auxIntToInt8(v.AuxInt)
+		if v_0.Op != OpAMD64BTRLconst || auxIntToInt8(v_0.AuxInt) != c {
 			break
 		}
 		x := v_0.Args[0]
 		v.reset(OpAMD64BTSLconst)
-		v.AuxInt = c
+		v.AuxInt = int8ToAuxInt(c)
 		v.AddArg(x)
 		return true
 	}
 	// match: (BTSLconst [c] (BTCLconst [c] x))
 	// result: (BTSLconst [c] x)
 	for {
-		c := v.AuxInt
-		if v_0.Op != OpAMD64BTCLconst || v_0.AuxInt != c {
+		c := auxIntToInt8(v.AuxInt)
+		if v_0.Op != OpAMD64BTCLconst || auxIntToInt8(v_0.AuxInt) != c {
 			break
 		}
 		x := v_0.Args[0]
 		v.reset(OpAMD64BTSLconst)
-		v.AuxInt = c
+		v.AuxInt = int8ToAuxInt(c)
 		v.AddArg(x)
 		return true
 	}
@@ -4371,23 +4356,23 @@
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
 	// match: (BTSLconstmodify [valoff1] {sym} (ADDQconst [off2] base) mem)
-	// cond: ValAndOff(valoff1).canAdd(off2)
-	// result: (BTSLconstmodify [ValAndOff(valoff1).add(off2)] {sym} base mem)
+	// cond: ValAndOff(valoff1).canAdd32(off2)
+	// result: (BTSLconstmodify [ValAndOff(valoff1).addOffset32(off2)] {sym} base mem)
 	for {
-		valoff1 := v.AuxInt
-		sym := v.Aux
+		valoff1 := auxIntToValAndOff(v.AuxInt)
+		sym := auxToSym(v.Aux)
 		if v_0.Op != OpAMD64ADDQconst {
 			break
 		}
-		off2 := v_0.AuxInt
+		off2 := auxIntToInt32(v_0.AuxInt)
 		base := v_0.Args[0]
 		mem := v_1
-		if !(ValAndOff(valoff1).canAdd(off2)) {
+		if !(ValAndOff(valoff1).canAdd32(off2)) {
 			break
 		}
 		v.reset(OpAMD64BTSLconstmodify)
-		v.AuxInt = ValAndOff(valoff1).add(off2)
-		v.Aux = sym
+		v.AuxInt = valAndOffToAuxInt(ValAndOff(valoff1).addOffset32(off2))
+		v.Aux = symToAux(sym)
 		v.AddArg2(base, mem)
 		return true
 	}
@@ -4420,24 +4405,24 @@
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
 	// match: (BTSLmodify [off1] {sym} (ADDQconst [off2] base) val mem)
-	// cond: is32Bit(off1+off2)
+	// cond: is32Bit(int64(off1)+int64(off2))
 	// result: (BTSLmodify [off1+off2] {sym} base val mem)
 	for {
-		off1 := v.AuxInt
-		sym := v.Aux
+		off1 := auxIntToInt32(v.AuxInt)
+		sym := auxToSym(v.Aux)
 		if v_0.Op != OpAMD64ADDQconst {
 			break
 		}
-		off2 := v_0.AuxInt
+		off2 := auxIntToInt32(v_0.AuxInt)
 		base := v_0.Args[0]
 		val := v_1
 		mem := v_2
-		if !(is32Bit(off1 + off2)) {
+		if !(is32Bit(int64(off1) + int64(off2))) {
 			break
 		}
 		v.reset(OpAMD64BTSLmodify)
-		v.AuxInt = off1 + off2
-		v.Aux = sym
+		v.AuxInt = int32ToAuxInt(off1 + off2)
+		v.Aux = symToAux(sym)
 		v.AddArg3(base, val, mem)
 		return true
 	}
@@ -4471,26 +4456,26 @@
 	// match: (BTSQconst [c] (BTRQconst [c] x))
 	// result: (BTSQconst [c] x)
 	for {
-		c := v.AuxInt
-		if v_0.Op != OpAMD64BTRQconst || v_0.AuxInt != c {
+		c := auxIntToInt8(v.AuxInt)
+		if v_0.Op != OpAMD64BTRQconst || auxIntToInt8(v_0.AuxInt) != c {
 			break
 		}
 		x := v_0.Args[0]
 		v.reset(OpAMD64BTSQconst)
-		v.AuxInt = c
+		v.AuxInt = int8ToAuxInt(c)
 		v.AddArg(x)
 		return true
 	}
 	// match: (BTSQconst [c] (BTCQconst [c] x))
 	// result: (BTSQconst [c] x)
 	for {
-		c := v.AuxInt
-		if v_0.Op != OpAMD64BTCQconst || v_0.AuxInt != c {
+		c := auxIntToInt8(v.AuxInt)
+		if v_0.Op != OpAMD64BTCQconst || auxIntToInt8(v_0.AuxInt) != c {
 			break
 		}
 		x := v_0.Args[0]
 		v.reset(OpAMD64BTSQconst)
-		v.AuxInt = c
+		v.AuxInt = int8ToAuxInt(c)
 		v.AddArg(x)
 		return true
 	}
@@ -4548,23 +4533,23 @@
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
 	// match: (BTSQconstmodify [valoff1] {sym} (ADDQconst [off2] base) mem)
-	// cond: ValAndOff(valoff1).canAdd(off2)
-	// result: (BTSQconstmodify [ValAndOff(valoff1).add(off2)] {sym} base mem)
+	// cond: ValAndOff(valoff1).canAdd32(off2)
+	// result: (BTSQconstmodify [ValAndOff(valoff1).addOffset32(off2)] {sym} base mem)
 	for {
-		valoff1 := v.AuxInt
-		sym := v.Aux
+		valoff1 := auxIntToValAndOff(v.AuxInt)
+		sym := auxToSym(v.Aux)
 		if v_0.Op != OpAMD64ADDQconst {
 			break
 		}
-		off2 := v_0.AuxInt
+		off2 := auxIntToInt32(v_0.AuxInt)
 		base := v_0.Args[0]
 		mem := v_1
-		if !(ValAndOff(valoff1).canAdd(off2)) {
+		if !(ValAndOff(valoff1).canAdd32(off2)) {
 			break
 		}
 		v.reset(OpAMD64BTSQconstmodify)
-		v.AuxInt = ValAndOff(valoff1).add(off2)
-		v.Aux = sym
+		v.AuxInt = valAndOffToAuxInt(ValAndOff(valoff1).addOffset32(off2))
+		v.Aux = symToAux(sym)
 		v.AddArg2(base, mem)
 		return true
 	}
@@ -4597,24 +4582,24 @@
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
 	// match: (BTSQmodify [off1] {sym} (ADDQconst [off2] base) val mem)
-	// cond: is32Bit(off1+off2)
+	// cond: is32Bit(int64(off1)+int64(off2))
 	// result: (BTSQmodify [off1+off2] {sym} base val mem)
 	for {
-		off1 := v.AuxInt
-		sym := v.Aux
+		off1 := auxIntToInt32(v.AuxInt)
+		sym := auxToSym(v.Aux)
 		if v_0.Op != OpAMD64ADDQconst {
 			break
 		}
-		off2 := v_0.AuxInt
+		off2 := auxIntToInt32(v_0.AuxInt)
 		base := v_0.Args[0]
 		val := v_1
 		mem := v_2
-		if !(is32Bit(off1 + off2)) {
+		if !(is32Bit(int64(off1) + int64(off2))) {
 			break
 		}
 		v.reset(OpAMD64BTSQmodify)
-		v.AuxInt = off1 + off2
-		v.Aux = sym
+		v.AuxInt = int32ToAuxInt(off1 + off2)
+		v.Aux = symToAux(sym)
 		v.AddArg3(base, val, mem)
 		return true
 	}
@@ -6741,29 +6726,29 @@
 	v_0 := v.Args[0]
 	b := v.Block
 	// match: (CMPB x (MOVLconst [c]))
-	// result: (CMPBconst x [int64(int8(c))])
+	// result: (CMPBconst x [int8(c)])
 	for {
 		x := v_0
 		if v_1.Op != OpAMD64MOVLconst {
 			break
 		}
-		c := v_1.AuxInt
+		c := auxIntToInt32(v_1.AuxInt)
 		v.reset(OpAMD64CMPBconst)
-		v.AuxInt = int64(int8(c))
+		v.AuxInt = int8ToAuxInt(int8(c))
 		v.AddArg(x)
 		return true
 	}
 	// match: (CMPB (MOVLconst [c]) x)
-	// result: (InvertFlags (CMPBconst x [int64(int8(c))]))
+	// result: (InvertFlags (CMPBconst x [int8(c)]))
 	for {
 		if v_0.Op != OpAMD64MOVLconst {
 			break
 		}
-		c := v_0.AuxInt
+		c := auxIntToInt32(v_0.AuxInt)
 		x := v_1
 		v.reset(OpAMD64InvertFlags)
 		v0 := b.NewValue0(v.Pos, OpAMD64CMPBconst, types.TypeFlags)
-		v0.AuxInt = int64(int8(c))
+		v0.AuxInt = int8ToAuxInt(int8(c))
 		v0.AddArg(x)
 		v.AddArg(v0)
 		return true
@@ -7006,23 +6991,23 @@
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
 	// match: (CMPBconstload [valoff1] {sym} (ADDQconst [off2] base) mem)
-	// cond: ValAndOff(valoff1).canAdd(off2)
-	// result: (CMPBconstload [ValAndOff(valoff1).add(off2)] {sym} base mem)
+	// cond: ValAndOff(valoff1).canAdd32(off2)
+	// result: (CMPBconstload [ValAndOff(valoff1).addOffset32(off2)] {sym} base mem)
 	for {
-		valoff1 := v.AuxInt
-		sym := v.Aux
+		valoff1 := auxIntToValAndOff(v.AuxInt)
+		sym := auxToSym(v.Aux)
 		if v_0.Op != OpAMD64ADDQconst {
 			break
 		}
-		off2 := v_0.AuxInt
+		off2 := auxIntToInt32(v_0.AuxInt)
 		base := v_0.Args[0]
 		mem := v_1
-		if !(ValAndOff(valoff1).canAdd(off2)) {
+		if !(ValAndOff(valoff1).canAdd32(off2)) {
 			break
 		}
 		v.reset(OpAMD64CMPBconstload)
-		v.AuxInt = ValAndOff(valoff1).add(off2)
-		v.Aux = sym
+		v.AuxInt = valAndOffToAuxInt(ValAndOff(valoff1).addOffset32(off2))
+		v.Aux = symToAux(sym)
 		v.AddArg2(base, mem)
 		return true
 	}
@@ -7055,24 +7040,24 @@
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
 	// match: (CMPBload [off1] {sym} (ADDQconst [off2] base) val mem)
-	// cond: is32Bit(off1+off2)
+	// cond: is32Bit(int64(off1)+int64(off2))
 	// result: (CMPBload [off1+off2] {sym} base val mem)
 	for {
-		off1 := v.AuxInt
-		sym := v.Aux
+		off1 := auxIntToInt32(v.AuxInt)
+		sym := auxToSym(v.Aux)
 		if v_0.Op != OpAMD64ADDQconst {
 			break
 		}
-		off2 := v_0.AuxInt
+		off2 := auxIntToInt32(v_0.AuxInt)
 		base := v_0.Args[0]
 		val := v_1
 		mem := v_2
-		if !(is32Bit(off1 + off2)) {
+		if !(is32Bit(int64(off1) + int64(off2))) {
 			break
 		}
 		v.reset(OpAMD64CMPBload)
-		v.AuxInt = off1 + off2
-		v.Aux = sym
+		v.AuxInt = int32ToAuxInt(off1 + off2)
+		v.Aux = symToAux(sym)
 		v.AddArg3(base, val, mem)
 		return true
 	}
@@ -7133,9 +7118,9 @@
 		if v_1.Op != OpAMD64MOVLconst {
 			break
 		}
-		c := v_1.AuxInt
+		c := auxIntToInt32(v_1.AuxInt)
 		v.reset(OpAMD64CMPLconst)
-		v.AuxInt = c
+		v.AuxInt = int32ToAuxInt(c)
 		v.AddArg(x)
 		return true
 	}
@@ -7145,11 +7130,11 @@
 		if v_0.Op != OpAMD64MOVLconst {
 			break
 		}
-		c := v_0.AuxInt
+		c := auxIntToInt32(v_0.AuxInt)
 		x := v_1
 		v.reset(OpAMD64InvertFlags)
 		v0 := b.NewValue0(v.Pos, OpAMD64CMPLconst, types.TypeFlags)
-		v0.AuxInt = c
+		v0.AuxInt = int32ToAuxInt(c)
 		v0.AddArg(x)
 		v.AddArg(v0)
 		return true
@@ -7407,23 +7392,23 @@
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
 	// match: (CMPLconstload [valoff1] {sym} (ADDQconst [off2] base) mem)
-	// cond: ValAndOff(valoff1).canAdd(off2)
-	// result: (CMPLconstload [ValAndOff(valoff1).add(off2)] {sym} base mem)
+	// cond: ValAndOff(valoff1).canAdd32(off2)
+	// result: (CMPLconstload [ValAndOff(valoff1).addOffset32(off2)] {sym} base mem)
 	for {
-		valoff1 := v.AuxInt
-		sym := v.Aux
+		valoff1 := auxIntToValAndOff(v.AuxInt)
+		sym := auxToSym(v.Aux)
 		if v_0.Op != OpAMD64ADDQconst {
 			break
 		}
-		off2 := v_0.AuxInt
+		off2 := auxIntToInt32(v_0.AuxInt)
 		base := v_0.Args[0]
 		mem := v_1
-		if !(ValAndOff(valoff1).canAdd(off2)) {
+		if !(ValAndOff(valoff1).canAdd32(off2)) {
 			break
 		}
 		v.reset(OpAMD64CMPLconstload)
-		v.AuxInt = ValAndOff(valoff1).add(off2)
-		v.Aux = sym
+		v.AuxInt = valAndOffToAuxInt(ValAndOff(valoff1).addOffset32(off2))
+		v.Aux = symToAux(sym)
 		v.AddArg2(base, mem)
 		return true
 	}
@@ -7456,24 +7441,24 @@
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
 	// match: (CMPLload [off1] {sym} (ADDQconst [off2] base) val mem)
-	// cond: is32Bit(off1+off2)
+	// cond: is32Bit(int64(off1)+int64(off2))
 	// result: (CMPLload [off1+off2] {sym} base val mem)
 	for {
-		off1 := v.AuxInt
-		sym := v.Aux
+		off1 := auxIntToInt32(v.AuxInt)
+		sym := auxToSym(v.Aux)
 		if v_0.Op != OpAMD64ADDQconst {
 			break
 		}
-		off2 := v_0.AuxInt
+		off2 := auxIntToInt32(v_0.AuxInt)
 		base := v_0.Args[0]
 		val := v_1
 		mem := v_2
-		if !(is32Bit(off1 + off2)) {
+		if !(is32Bit(int64(off1) + int64(off2))) {
 			break
 		}
 		v.reset(OpAMD64CMPLload)
-		v.AuxInt = off1 + off2
-		v.Aux = sym
+		v.AuxInt = int32ToAuxInt(off1 + off2)
+		v.Aux = symToAux(sym)
 		v.AddArg3(base, val, mem)
 		return true
 	}
@@ -7529,36 +7514,36 @@
 	b := v.Block
 	// match: (CMPQ x (MOVQconst [c]))
 	// cond: is32Bit(c)
-	// result: (CMPQconst x [c])
+	// result: (CMPQconst x [int32(c)])
 	for {
 		x := v_0
 		if v_1.Op != OpAMD64MOVQconst {
 			break
 		}
-		c := v_1.AuxInt
+		c := auxIntToInt64(v_1.AuxInt)
 		if !(is32Bit(c)) {
 			break
 		}
 		v.reset(OpAMD64CMPQconst)
-		v.AuxInt = c
+		v.AuxInt = int32ToAuxInt(int32(c))
 		v.AddArg(x)
 		return true
 	}
 	// match: (CMPQ (MOVQconst [c]) x)
 	// cond: is32Bit(c)
-	// result: (InvertFlags (CMPQconst x [c]))
+	// result: (InvertFlags (CMPQconst x [int32(c)]))
 	for {
 		if v_0.Op != OpAMD64MOVQconst {
 			break
 		}
-		c := v_0.AuxInt
+		c := auxIntToInt64(v_0.AuxInt)
 		x := v_1
 		if !(is32Bit(c)) {
 			break
 		}
 		v.reset(OpAMD64InvertFlags)
 		v0 := b.NewValue0(v.Pos, OpAMD64CMPQconst, types.TypeFlags)
-		v0.AuxInt = c
+		v0.AuxInt = int32ToAuxInt(int32(c))
 		v0.AddArg(x)
 		v.AddArg(v0)
 		return true
@@ -7722,15 +7707,15 @@
 	// match: (CMPQconst (NEGQ (ADDQconst [-16] (ANDQconst [15] _))) [32])
 	// result: (FlagLT_ULT)
 	for {
-		if v.AuxInt != 32 || v_0.Op != OpAMD64NEGQ {
+		if auxIntToInt32(v.AuxInt) != 32 || v_0.Op != OpAMD64NEGQ {
 			break
 		}
 		v_0_0 := v_0.Args[0]
-		if v_0_0.Op != OpAMD64ADDQconst || v_0_0.AuxInt != -16 {
+		if v_0_0.Op != OpAMD64ADDQconst || auxIntToInt32(v_0_0.AuxInt) != -16 {
 			break
 		}
 		v_0_0_0 := v_0_0.Args[0]
-		if v_0_0_0.Op != OpAMD64ANDQconst || v_0_0_0.AuxInt != 15 {
+		if v_0_0_0.Op != OpAMD64ANDQconst || auxIntToInt32(v_0_0_0.AuxInt) != 15 {
 			break
 		}
 		v.reset(OpAMD64FlagLT_ULT)
@@ -7739,15 +7724,15 @@
 	// match: (CMPQconst (NEGQ (ADDQconst [ -8] (ANDQconst [7] _))) [32])
 	// result: (FlagLT_ULT)
 	for {
-		if v.AuxInt != 32 || v_0.Op != OpAMD64NEGQ {
+		if auxIntToInt32(v.AuxInt) != 32 || v_0.Op != OpAMD64NEGQ {
 			break
 		}
 		v_0_0 := v_0.Args[0]
-		if v_0_0.Op != OpAMD64ADDQconst || v_0_0.AuxInt != -8 {
+		if v_0_0.Op != OpAMD64ADDQconst || auxIntToInt32(v_0_0.AuxInt) != -8 {
 			break
 		}
 		v_0_0_0 := v_0_0.Args[0]
-		if v_0_0_0.Op != OpAMD64ANDQconst || v_0_0_0.AuxInt != 7 {
+		if v_0_0_0.Op != OpAMD64ANDQconst || auxIntToInt32(v_0_0_0.AuxInt) != 7 {
 			break
 		}
 		v.reset(OpAMD64FlagLT_ULT)
@@ -7988,23 +7973,23 @@
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
 	// match: (CMPQconstload [valoff1] {sym} (ADDQconst [off2] base) mem)
-	// cond: ValAndOff(valoff1).canAdd(off2)
-	// result: (CMPQconstload [ValAndOff(valoff1).add(off2)] {sym} base mem)
+	// cond: ValAndOff(valoff1).canAdd32(off2)
+	// result: (CMPQconstload [ValAndOff(valoff1).addOffset32(off2)] {sym} base mem)
 	for {
-		valoff1 := v.AuxInt
-		sym := v.Aux
+		valoff1 := auxIntToValAndOff(v.AuxInt)
+		sym := auxToSym(v.Aux)
 		if v_0.Op != OpAMD64ADDQconst {
 			break
 		}
-		off2 := v_0.AuxInt
+		off2 := auxIntToInt32(v_0.AuxInt)
 		base := v_0.Args[0]
 		mem := v_1
-		if !(ValAndOff(valoff1).canAdd(off2)) {
+		if !(ValAndOff(valoff1).canAdd32(off2)) {
 			break
 		}
 		v.reset(OpAMD64CMPQconstload)
-		v.AuxInt = ValAndOff(valoff1).add(off2)
-		v.Aux = sym
+		v.AuxInt = valAndOffToAuxInt(ValAndOff(valoff1).addOffset32(off2))
+		v.Aux = symToAux(sym)
 		v.AddArg2(base, mem)
 		return true
 	}
@@ -8037,24 +8022,24 @@
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
 	// match: (CMPQload [off1] {sym} (ADDQconst [off2] base) val mem)
-	// cond: is32Bit(off1+off2)
+	// cond: is32Bit(int64(off1)+int64(off2))
 	// result: (CMPQload [off1+off2] {sym} base val mem)
 	for {
-		off1 := v.AuxInt
-		sym := v.Aux
+		off1 := auxIntToInt32(v.AuxInt)
+		sym := auxToSym(v.Aux)
 		if v_0.Op != OpAMD64ADDQconst {
 			break
 		}
-		off2 := v_0.AuxInt
+		off2 := auxIntToInt32(v_0.AuxInt)
 		base := v_0.Args[0]
 		val := v_1
 		mem := v_2
-		if !(is32Bit(off1 + off2)) {
+		if !(is32Bit(int64(off1) + int64(off2))) {
 			break
 		}
 		v.reset(OpAMD64CMPQload)
-		v.AuxInt = off1 + off2
-		v.Aux = sym
+		v.AuxInt = int32ToAuxInt(off1 + off2)
+		v.Aux = symToAux(sym)
 		v.AddArg3(base, val, mem)
 		return true
 	}
@@ -8109,29 +8094,29 @@
 	v_0 := v.Args[0]
 	b := v.Block
 	// match: (CMPW x (MOVLconst [c]))
-	// result: (CMPWconst x [int64(int16(c))])
+	// result: (CMPWconst x [int16(c)])
 	for {
 		x := v_0
 		if v_1.Op != OpAMD64MOVLconst {
 			break
 		}
-		c := v_1.AuxInt
+		c := auxIntToInt32(v_1.AuxInt)
 		v.reset(OpAMD64CMPWconst)
-		v.AuxInt = int64(int16(c))
+		v.AuxInt = int16ToAuxInt(int16(c))
 		v.AddArg(x)
 		return true
 	}
 	// match: (CMPW (MOVLconst [c]) x)
-	// result: (InvertFlags (CMPWconst x [int64(int16(c))]))
+	// result: (InvertFlags (CMPWconst x [int16(c)]))
 	for {
 		if v_0.Op != OpAMD64MOVLconst {
 			break
 		}
-		c := v_0.AuxInt
+		c := auxIntToInt32(v_0.AuxInt)
 		x := v_1
 		v.reset(OpAMD64InvertFlags)
 		v0 := b.NewValue0(v.Pos, OpAMD64CMPWconst, types.TypeFlags)
-		v0.AuxInt = int64(int16(c))
+		v0.AuxInt = int16ToAuxInt(int16(c))
 		v0.AddArg(x)
 		v.AddArg(v0)
 		return true
@@ -8374,23 +8359,23 @@
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
 	// match: (CMPWconstload [valoff1] {sym} (ADDQconst [off2] base) mem)
-	// cond: ValAndOff(valoff1).canAdd(off2)
-	// result: (CMPWconstload [ValAndOff(valoff1).add(off2)] {sym} base mem)
+	// cond: ValAndOff(valoff1).canAdd32(off2)
+	// result: (CMPWconstload [ValAndOff(valoff1).addOffset32(off2)] {sym} base mem)
 	for {
-		valoff1 := v.AuxInt
-		sym := v.Aux
+		valoff1 := auxIntToValAndOff(v.AuxInt)
+		sym := auxToSym(v.Aux)
 		if v_0.Op != OpAMD64ADDQconst {
 			break
 		}
-		off2 := v_0.AuxInt
+		off2 := auxIntToInt32(v_0.AuxInt)
 		base := v_0.Args[0]
 		mem := v_1
-		if !(ValAndOff(valoff1).canAdd(off2)) {
+		if !(ValAndOff(valoff1).canAdd32(off2)) {
 			break
 		}
 		v.reset(OpAMD64CMPWconstload)
-		v.AuxInt = ValAndOff(valoff1).add(off2)
-		v.Aux = sym
+		v.AuxInt = valAndOffToAuxInt(ValAndOff(valoff1).addOffset32(off2))
+		v.Aux = symToAux(sym)
 		v.AddArg2(base, mem)
 		return true
 	}
@@ -8423,24 +8408,24 @@
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
 	// match: (CMPWload [off1] {sym} (ADDQconst [off2] base) val mem)
-	// cond: is32Bit(off1+off2)
+	// cond: is32Bit(int64(off1)+int64(off2))
 	// result: (CMPWload [off1+off2] {sym} base val mem)
 	for {
-		off1 := v.AuxInt
-		sym := v.Aux
+		off1 := auxIntToInt32(v.AuxInt)
+		sym := auxToSym(v.Aux)
 		if v_0.Op != OpAMD64ADDQconst {
 			break
 		}
-		off2 := v_0.AuxInt
+		off2 := auxIntToInt32(v_0.AuxInt)
 		base := v_0.Args[0]
 		val := v_1
 		mem := v_2
-		if !(is32Bit(off1 + off2)) {
+		if !(is32Bit(int64(off1) + int64(off2))) {
 			break
 		}
 		v.reset(OpAMD64CMPWload)
-		v.AuxInt = off1 + off2
-		v.Aux = sym
+		v.AuxInt = int32ToAuxInt(off1 + off2)
+		v.Aux = symToAux(sym)
 		v.AddArg3(base, val, mem)
 		return true
 	}
@@ -8582,24 +8567,24 @@
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
 	// match: (DIVSDload [off1] {sym} val (ADDQconst [off2] base) mem)
-	// cond: is32Bit(off1+off2)
+	// cond: is32Bit(int64(off1)+int64(off2))
 	// result: (DIVSDload [off1+off2] {sym} val base mem)
 	for {
-		off1 := v.AuxInt
-		sym := v.Aux
+		off1 := auxIntToInt32(v.AuxInt)
+		sym := auxToSym(v.Aux)
 		val := v_0
 		if v_1.Op != OpAMD64ADDQconst {
 			break
 		}
-		off2 := v_1.AuxInt
+		off2 := auxIntToInt32(v_1.AuxInt)
 		base := v_1.Args[0]
 		mem := v_2
-		if !(is32Bit(off1 + off2)) {
+		if !(is32Bit(int64(off1) + int64(off2))) {
 			break
 		}
 		v.reset(OpAMD64DIVSDload)
-		v.AuxInt = off1 + off2
-		v.Aux = sym
+		v.AuxInt = int32ToAuxInt(off1 + off2)
+		v.Aux = symToAux(sym)
 		v.AddArg3(val, base, mem)
 		return true
 	}
@@ -8660,24 +8645,24 @@
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
 	// match: (DIVSSload [off1] {sym} val (ADDQconst [off2] base) mem)
-	// cond: is32Bit(off1+off2)
+	// cond: is32Bit(int64(off1)+int64(off2))
 	// result: (DIVSSload [off1+off2] {sym} val base mem)
 	for {
-		off1 := v.AuxInt
-		sym := v.Aux
+		off1 := auxIntToInt32(v.AuxInt)
+		sym := auxToSym(v.Aux)
 		val := v_0
 		if v_1.Op != OpAMD64ADDQconst {
 			break
 		}
-		off2 := v_1.AuxInt
+		off2 := auxIntToInt32(v_1.AuxInt)
 		base := v_1.Args[0]
 		mem := v_2
-		if !(is32Bit(off1 + off2)) {
+		if !(is32Bit(int64(off1) + int64(off2))) {
 			break
 		}
 		v.reset(OpAMD64DIVSSload)
-		v.AuxInt = off1 + off2
-		v.Aux = sym
+		v.AuxInt = int32ToAuxInt(off1 + off2)
+		v.Aux = symToAux(sym)
 		v.AddArg3(val, base, mem)
 		return true
 	}
@@ -8781,22 +8766,22 @@
 func rewriteValueAMD64_OpAMD64LEAL(v *Value) bool {
 	v_0 := v.Args[0]
 	// match: (LEAL [c] {s} (ADDLconst [d] x))
-	// cond: is32Bit(c+d)
+	// cond: is32Bit(int64(c)+int64(d))
 	// result: (LEAL [c+d] {s} x)
 	for {
-		c := v.AuxInt
-		s := v.Aux
+		c := auxIntToInt32(v.AuxInt)
+		s := auxToSym(v.Aux)
 		if v_0.Op != OpAMD64ADDLconst {
 			break
 		}
-		d := v_0.AuxInt
+		d := auxIntToInt32(v_0.AuxInt)
 		x := v_0.Args[0]
-		if !(is32Bit(c + d)) {
+		if !(is32Bit(int64(c) + int64(d))) {
 			break
 		}
 		v.reset(OpAMD64LEAL)
-		v.AuxInt = c + d
-		v.Aux = s
+		v.AuxInt = int32ToAuxInt(c + d)
+		v.Aux = symToAux(s)
 		v.AddArg(x)
 		return true
 	}
@@ -8804,8 +8789,8 @@
 	// cond: x.Op != OpSB && y.Op != OpSB
 	// result: (LEAL1 [c] {s} x y)
 	for {
-		c := v.AuxInt
-		s := v.Aux
+		c := auxIntToInt32(v.AuxInt)
+		s := auxToSym(v.Aux)
 		if v_0.Op != OpAMD64ADDL {
 			break
 		}
@@ -8819,8 +8804,8 @@
 				continue
 			}
 			v.reset(OpAMD64LEAL1)
-			v.AuxInt = c
-			v.Aux = s
+			v.AuxInt = int32ToAuxInt(c)
+			v.Aux = symToAux(s)
 			v.AddArg2(x, y)
 			return true
 		}
@@ -8832,24 +8817,24 @@
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
 	// match: (LEAL1 [c] {s} (ADDLconst [d] x) y)
-	// cond: is32Bit(c+d) && x.Op != OpSB
+	// cond: is32Bit(int64(c)+int64(d)) && x.Op != OpSB
 	// result: (LEAL1 [c+d] {s} x y)
 	for {
-		c := v.AuxInt
-		s := v.Aux
+		c := auxIntToInt32(v.AuxInt)
+		s := auxToSym(v.Aux)
 		for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
 			if v_0.Op != OpAMD64ADDLconst {
 				continue
 			}
-			d := v_0.AuxInt
+			d := auxIntToInt32(v_0.AuxInt)
 			x := v_0.Args[0]
 			y := v_1
-			if !(is32Bit(c+d) && x.Op != OpSB) {
+			if !(is32Bit(int64(c)+int64(d)) && x.Op != OpSB) {
 				continue
 			}
 			v.reset(OpAMD64LEAL1)
-			v.AuxInt = c + d
-			v.Aux = s
+			v.AuxInt = int32ToAuxInt(c + d)
+			v.Aux = symToAux(s)
 			v.AddArg2(x, y)
 			return true
 		}
@@ -8858,17 +8843,17 @@
 	// match: (LEAL1 [c] {s} x (SHLLconst [1] y))
 	// result: (LEAL2 [c] {s} x y)
 	for {
-		c := v.AuxInt
-		s := v.Aux
+		c := auxIntToInt32(v.AuxInt)
+		s := auxToSym(v.Aux)
 		for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
 			x := v_0
-			if v_1.Op != OpAMD64SHLLconst || v_1.AuxInt != 1 {
+			if v_1.Op != OpAMD64SHLLconst || auxIntToInt8(v_1.AuxInt) != 1 {
 				continue
 			}
 			y := v_1.Args[0]
 			v.reset(OpAMD64LEAL2)
-			v.AuxInt = c
-			v.Aux = s
+			v.AuxInt = int32ToAuxInt(c)
+			v.Aux = symToAux(s)
 			v.AddArg2(x, y)
 			return true
 		}
@@ -8877,17 +8862,17 @@
 	// match: (LEAL1 [c] {s} x (SHLLconst [2] y))
 	// result: (LEAL4 [c] {s} x y)
 	for {
-		c := v.AuxInt
-		s := v.Aux
+		c := auxIntToInt32(v.AuxInt)
+		s := auxToSym(v.Aux)
 		for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
 			x := v_0
-			if v_1.Op != OpAMD64SHLLconst || v_1.AuxInt != 2 {
+			if v_1.Op != OpAMD64SHLLconst || auxIntToInt8(v_1.AuxInt) != 2 {
 				continue
 			}
 			y := v_1.Args[0]
 			v.reset(OpAMD64LEAL4)
-			v.AuxInt = c
-			v.Aux = s
+			v.AuxInt = int32ToAuxInt(c)
+			v.Aux = symToAux(s)
 			v.AddArg2(x, y)
 			return true
 		}
@@ -8896,17 +8881,17 @@
 	// match: (LEAL1 [c] {s} x (SHLLconst [3] y))
 	// result: (LEAL8 [c] {s} x y)
 	for {
-		c := v.AuxInt
-		s := v.Aux
+		c := auxIntToInt32(v.AuxInt)
+		s := auxToSym(v.Aux)
 		for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
 			x := v_0
-			if v_1.Op != OpAMD64SHLLconst || v_1.AuxInt != 3 {
+			if v_1.Op != OpAMD64SHLLconst || auxIntToInt8(v_1.AuxInt) != 3 {
 				continue
 			}
 			y := v_1.Args[0]
 			v.reset(OpAMD64LEAL8)
-			v.AuxInt = c
-			v.Aux = s
+			v.AuxInt = int32ToAuxInt(c)
+			v.Aux = symToAux(s)
 			v.AddArg2(x, y)
 			return true
 		}
@@ -8918,76 +8903,76 @@
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
 	// match: (LEAL2 [c] {s} (ADDLconst [d] x) y)
-	// cond: is32Bit(c+d) && x.Op != OpSB
+	// cond: is32Bit(int64(c)+int64(d)) && x.Op != OpSB
 	// result: (LEAL2 [c+d] {s} x y)
 	for {
-		c := v.AuxInt
-		s := v.Aux
+		c := auxIntToInt32(v.AuxInt)
+		s := auxToSym(v.Aux)
 		if v_0.Op != OpAMD64ADDLconst {
 			break
 		}
-		d := v_0.AuxInt
+		d := auxIntToInt32(v_0.AuxInt)
 		x := v_0.Args[0]
 		y := v_1
-		if !(is32Bit(c+d) && x.Op != OpSB) {
+		if !(is32Bit(int64(c)+int64(d)) && x.Op != OpSB) {
 			break
 		}
 		v.reset(OpAMD64LEAL2)
-		v.AuxInt = c + d
-		v.Aux = s
+		v.AuxInt = int32ToAuxInt(c + d)
+		v.Aux = symToAux(s)
 		v.AddArg2(x, y)
 		return true
 	}
 	// match: (LEAL2 [c] {s} x (ADDLconst [d] y))
-	// cond: is32Bit(c+2*d) && y.Op != OpSB
+	// cond: is32Bit(int64(c)+2*int64(d)) && y.Op != OpSB
 	// result: (LEAL2 [c+2*d] {s} x y)
 	for {
-		c := v.AuxInt
-		s := v.Aux
+		c := auxIntToInt32(v.AuxInt)
+		s := auxToSym(v.Aux)
 		x := v_0
 		if v_1.Op != OpAMD64ADDLconst {
 			break
 		}
-		d := v_1.AuxInt
+		d := auxIntToInt32(v_1.AuxInt)
 		y := v_1.Args[0]
-		if !(is32Bit(c+2*d) && y.Op != OpSB) {
+		if !(is32Bit(int64(c)+2*int64(d)) && y.Op != OpSB) {
 			break
 		}
 		v.reset(OpAMD64LEAL2)
-		v.AuxInt = c + 2*d
-		v.Aux = s
+		v.AuxInt = int32ToAuxInt(c + 2*d)
+		v.Aux = symToAux(s)
 		v.AddArg2(x, y)
 		return true
 	}
 	// match: (LEAL2 [c] {s} x (SHLLconst [1] y))
 	// result: (LEAL4 [c] {s} x y)
 	for {
-		c := v.AuxInt
-		s := v.Aux
+		c := auxIntToInt32(v.AuxInt)
+		s := auxToSym(v.Aux)
 		x := v_0
-		if v_1.Op != OpAMD64SHLLconst || v_1.AuxInt != 1 {
+		if v_1.Op != OpAMD64SHLLconst || auxIntToInt8(v_1.AuxInt) != 1 {
 			break
 		}
 		y := v_1.Args[0]
 		v.reset(OpAMD64LEAL4)
-		v.AuxInt = c
-		v.Aux = s
+		v.AuxInt = int32ToAuxInt(c)
+		v.Aux = symToAux(s)
 		v.AddArg2(x, y)
 		return true
 	}
 	// match: (LEAL2 [c] {s} x (SHLLconst [2] y))
 	// result: (LEAL8 [c] {s} x y)
 	for {
-		c := v.AuxInt
-		s := v.Aux
+		c := auxIntToInt32(v.AuxInt)
+		s := auxToSym(v.Aux)
 		x := v_0
-		if v_1.Op != OpAMD64SHLLconst || v_1.AuxInt != 2 {
+		if v_1.Op != OpAMD64SHLLconst || auxIntToInt8(v_1.AuxInt) != 2 {
 			break
 		}
 		y := v_1.Args[0]
 		v.reset(OpAMD64LEAL8)
-		v.AuxInt = c
-		v.Aux = s
+		v.AuxInt = int32ToAuxInt(c)
+		v.Aux = symToAux(s)
 		v.AddArg2(x, y)
 		return true
 	}
@@ -8997,60 +8982,60 @@
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
 	// match: (LEAL4 [c] {s} (ADDLconst [d] x) y)
-	// cond: is32Bit(c+d) && x.Op != OpSB
+	// cond: is32Bit(int64(c)+int64(d)) && x.Op != OpSB
 	// result: (LEAL4 [c+d] {s} x y)
 	for {
-		c := v.AuxInt
-		s := v.Aux
+		c := auxIntToInt32(v.AuxInt)
+		s := auxToSym(v.Aux)
 		if v_0.Op != OpAMD64ADDLconst {
 			break
 		}
-		d := v_0.AuxInt
+		d := auxIntToInt32(v_0.AuxInt)
 		x := v_0.Args[0]
 		y := v_1
-		if !(is32Bit(c+d) && x.Op != OpSB) {
+		if !(is32Bit(int64(c)+int64(d)) && x.Op != OpSB) {
 			break
 		}
 		v.reset(OpAMD64LEAL4)
-		v.AuxInt = c + d
-		v.Aux = s
+		v.AuxInt = int32ToAuxInt(c + d)
+		v.Aux = symToAux(s)
 		v.AddArg2(x, y)
 		return true
 	}
 	// match: (LEAL4 [c] {s} x (ADDLconst [d] y))
-	// cond: is32Bit(c+4*d) && y.Op != OpSB
+	// cond: is32Bit(int64(c)+4*int64(d)) && y.Op != OpSB
 	// result: (LEAL4 [c+4*d] {s} x y)
 	for {
-		c := v.AuxInt
-		s := v.Aux
+		c := auxIntToInt32(v.AuxInt)
+		s := auxToSym(v.Aux)
 		x := v_0
 		if v_1.Op != OpAMD64ADDLconst {
 			break
 		}
-		d := v_1.AuxInt
+		d := auxIntToInt32(v_1.AuxInt)
 		y := v_1.Args[0]
-		if !(is32Bit(c+4*d) && y.Op != OpSB) {
+		if !(is32Bit(int64(c)+4*int64(d)) && y.Op != OpSB) {
 			break
 		}
 		v.reset(OpAMD64LEAL4)
-		v.AuxInt = c + 4*d
-		v.Aux = s
+		v.AuxInt = int32ToAuxInt(c + 4*d)
+		v.Aux = symToAux(s)
 		v.AddArg2(x, y)
 		return true
 	}
 	// match: (LEAL4 [c] {s} x (SHLLconst [1] y))
 	// result: (LEAL8 [c] {s} x y)
 	for {
-		c := v.AuxInt
-		s := v.Aux
+		c := auxIntToInt32(v.AuxInt)
+		s := auxToSym(v.Aux)
 		x := v_0
-		if v_1.Op != OpAMD64SHLLconst || v_1.AuxInt != 1 {
+		if v_1.Op != OpAMD64SHLLconst || auxIntToInt8(v_1.AuxInt) != 1 {
 			break
 		}
 		y := v_1.Args[0]
 		v.reset(OpAMD64LEAL8)
-		v.AuxInt = c
-		v.Aux = s
+		v.AuxInt = int32ToAuxInt(c)
+		v.Aux = symToAux(s)
 		v.AddArg2(x, y)
 		return true
 	}
@@ -9060,44 +9045,44 @@
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
 	// match: (LEAL8 [c] {s} (ADDLconst [d] x) y)
-	// cond: is32Bit(c+d) && x.Op != OpSB
+	// cond: is32Bit(int64(c)+int64(d)) && x.Op != OpSB
 	// result: (LEAL8 [c+d] {s} x y)
 	for {
-		c := v.AuxInt
-		s := v.Aux
+		c := auxIntToInt32(v.AuxInt)
+		s := auxToSym(v.Aux)
 		if v_0.Op != OpAMD64ADDLconst {
 			break
 		}
-		d := v_0.AuxInt
+		d := auxIntToInt32(v_0.AuxInt)
 		x := v_0.Args[0]
 		y := v_1
-		if !(is32Bit(c+d) && x.Op != OpSB) {
+		if !(is32Bit(int64(c)+int64(d)) && x.Op != OpSB) {
 			break
 		}
 		v.reset(OpAMD64LEAL8)
-		v.AuxInt = c + d
-		v.Aux = s
+		v.AuxInt = int32ToAuxInt(c + d)
+		v.Aux = symToAux(s)
 		v.AddArg2(x, y)
 		return true
 	}
 	// match: (LEAL8 [c] {s} x (ADDLconst [d] y))
-	// cond: is32Bit(c+8*d) && y.Op != OpSB
+	// cond: is32Bit(int64(c)+8*int64(d)) && y.Op != OpSB
 	// result: (LEAL8 [c+8*d] {s} x y)
 	for {
-		c := v.AuxInt
-		s := v.Aux
+		c := auxIntToInt32(v.AuxInt)
+		s := auxToSym(v.Aux)
 		x := v_0
 		if v_1.Op != OpAMD64ADDLconst {
 			break
 		}
-		d := v_1.AuxInt
+		d := auxIntToInt32(v_1.AuxInt)
 		y := v_1.Args[0]
-		if !(is32Bit(c+8*d) && y.Op != OpSB) {
+		if !(is32Bit(int64(c)+8*int64(d)) && y.Op != OpSB) {
 			break
 		}
 		v.reset(OpAMD64LEAL8)
-		v.AuxInt = c + 8*d
-		v.Aux = s
+		v.AuxInt = int32ToAuxInt(c + 8*d)
+		v.Aux = symToAux(s)
 		v.AddArg2(x, y)
 		return true
 	}
@@ -9106,22 +9091,22 @@
 func rewriteValueAMD64_OpAMD64LEAQ(v *Value) bool {
 	v_0 := v.Args[0]
 	// match: (LEAQ [c] {s} (ADDQconst [d] x))
-	// cond: is32Bit(c+d)
+	// cond: is32Bit(int64(c)+int64(d))
 	// result: (LEAQ [c+d] {s} x)
 	for {
-		c := v.AuxInt
-		s := v.Aux
+		c := auxIntToInt32(v.AuxInt)
+		s := auxToSym(v.Aux)
 		if v_0.Op != OpAMD64ADDQconst {
 			break
 		}
-		d := v_0.AuxInt
+		d := auxIntToInt32(v_0.AuxInt)
 		x := v_0.Args[0]
-		if !(is32Bit(c + d)) {
+		if !(is32Bit(int64(c) + int64(d))) {
 			break
 		}
 		v.reset(OpAMD64LEAQ)
-		v.AuxInt = c + d
-		v.Aux = s
+		v.AuxInt = int32ToAuxInt(c + d)
+		v.Aux = symToAux(s)
 		v.AddArg(x)
 		return true
 	}
@@ -9129,8 +9114,8 @@
 	// cond: x.Op != OpSB && y.Op != OpSB
 	// result: (LEAQ1 [c] {s} x y)
 	for {
-		c := v.AuxInt
-		s := v.Aux
+		c := auxIntToInt32(v.AuxInt)
+		s := auxToSym(v.Aux)
 		if v_0.Op != OpAMD64ADDQ {
 			break
 		}
@@ -9144,8 +9129,8 @@
 				continue
 			}
 			v.reset(OpAMD64LEAQ1)
-			v.AuxInt = c
-			v.Aux = s
+			v.AuxInt = int32ToAuxInt(c)
+			v.Aux = symToAux(s)
 			v.AddArg2(x, y)
 			return true
 		}
@@ -9266,24 +9251,24 @@
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
 	// match: (LEAQ1 [c] {s} (ADDQconst [d] x) y)
-	// cond: is32Bit(c+d) && x.Op != OpSB
+	// cond: is32Bit(int64(c)+int64(d)) && x.Op != OpSB
 	// result: (LEAQ1 [c+d] {s} x y)
 	for {
-		c := v.AuxInt
-		s := v.Aux
+		c := auxIntToInt32(v.AuxInt)
+		s := auxToSym(v.Aux)
 		for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
 			if v_0.Op != OpAMD64ADDQconst {
 				continue
 			}
-			d := v_0.AuxInt
+			d := auxIntToInt32(v_0.AuxInt)
 			x := v_0.Args[0]
 			y := v_1
-			if !(is32Bit(c+d) && x.Op != OpSB) {
+			if !(is32Bit(int64(c)+int64(d)) && x.Op != OpSB) {
 				continue
 			}
 			v.reset(OpAMD64LEAQ1)
-			v.AuxInt = c + d
-			v.Aux = s
+			v.AuxInt = int32ToAuxInt(c + d)
+			v.Aux = symToAux(s)
 			v.AddArg2(x, y)
 			return true
 		}
@@ -9292,17 +9277,17 @@
 	// match: (LEAQ1 [c] {s} x (SHLQconst [1] y))
 	// result: (LEAQ2 [c] {s} x y)
 	for {
-		c := v.AuxInt
-		s := v.Aux
+		c := auxIntToInt32(v.AuxInt)
+		s := auxToSym(v.Aux)
 		for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
 			x := v_0
-			if v_1.Op != OpAMD64SHLQconst || v_1.AuxInt != 1 {
+			if v_1.Op != OpAMD64SHLQconst || auxIntToInt8(v_1.AuxInt) != 1 {
 				continue
 			}
 			y := v_1.Args[0]
 			v.reset(OpAMD64LEAQ2)
-			v.AuxInt = c
-			v.Aux = s
+			v.AuxInt = int32ToAuxInt(c)
+			v.Aux = symToAux(s)
 			v.AddArg2(x, y)
 			return true
 		}
@@ -9311,17 +9296,17 @@
 	// match: (LEAQ1 [c] {s} x (SHLQconst [2] y))
 	// result: (LEAQ4 [c] {s} x y)
 	for {
-		c := v.AuxInt
-		s := v.Aux
+		c := auxIntToInt32(v.AuxInt)
+		s := auxToSym(v.Aux)
 		for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
 			x := v_0
-			if v_1.Op != OpAMD64SHLQconst || v_1.AuxInt != 2 {
+			if v_1.Op != OpAMD64SHLQconst || auxIntToInt8(v_1.AuxInt) != 2 {
 				continue
 			}
 			y := v_1.Args[0]
 			v.reset(OpAMD64LEAQ4)
-			v.AuxInt = c
-			v.Aux = s
+			v.AuxInt = int32ToAuxInt(c)
+			v.Aux = symToAux(s)
 			v.AddArg2(x, y)
 			return true
 		}
@@ -9330,17 +9315,17 @@
 	// match: (LEAQ1 [c] {s} x (SHLQconst [3] y))
 	// result: (LEAQ8 [c] {s} x y)
 	for {
-		c := v.AuxInt
-		s := v.Aux
+		c := auxIntToInt32(v.AuxInt)
+		s := auxToSym(v.Aux)
 		for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
 			x := v_0
-			if v_1.Op != OpAMD64SHLQconst || v_1.AuxInt != 3 {
+			if v_1.Op != OpAMD64SHLQconst || auxIntToInt8(v_1.AuxInt) != 3 {
 				continue
 			}
 			y := v_1.Args[0]
 			v.reset(OpAMD64LEAQ8)
-			v.AuxInt = c
-			v.Aux = s
+			v.AuxInt = int32ToAuxInt(c)
+			v.Aux = symToAux(s)
 			v.AddArg2(x, y)
 			return true
 		}
@@ -9451,76 +9436,76 @@
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
 	// match: (LEAQ2 [c] {s} (ADDQconst [d] x) y)
-	// cond: is32Bit(c+d) && x.Op != OpSB
+	// cond: is32Bit(int64(c)+int64(d)) && x.Op != OpSB
 	// result: (LEAQ2 [c+d] {s} x y)
 	for {
-		c := v.AuxInt
-		s := v.Aux
+		c := auxIntToInt32(v.AuxInt)
+		s := auxToSym(v.Aux)
 		if v_0.Op != OpAMD64ADDQconst {
 			break
 		}
-		d := v_0.AuxInt
+		d := auxIntToInt32(v_0.AuxInt)
 		x := v_0.Args[0]
 		y := v_1
-		if !(is32Bit(c+d) && x.Op != OpSB) {
+		if !(is32Bit(int64(c)+int64(d)) && x.Op != OpSB) {
 			break
 		}
 		v.reset(OpAMD64LEAQ2)
-		v.AuxInt = c + d
-		v.Aux = s
+		v.AuxInt = int32ToAuxInt(c + d)
+		v.Aux = symToAux(s)
 		v.AddArg2(x, y)
 		return true
 	}
 	// match: (LEAQ2 [c] {s} x (ADDQconst [d] y))
-	// cond: is32Bit(c+2*d) && y.Op != OpSB
+	// cond: is32Bit(int64(c)+2*int64(d)) && y.Op != OpSB
 	// result: (LEAQ2 [c+2*d] {s} x y)
 	for {
-		c := v.AuxInt
-		s := v.Aux
+		c := auxIntToInt32(v.AuxInt)
+		s := auxToSym(v.Aux)
 		x := v_0
 		if v_1.Op != OpAMD64ADDQconst {
 			break
 		}
-		d := v_1.AuxInt
+		d := auxIntToInt32(v_1.AuxInt)
 		y := v_1.Args[0]
-		if !(is32Bit(c+2*d) && y.Op != OpSB) {
+		if !(is32Bit(int64(c)+2*int64(d)) && y.Op != OpSB) {
 			break
 		}
 		v.reset(OpAMD64LEAQ2)
-		v.AuxInt = c + 2*d
-		v.Aux = s
+		v.AuxInt = int32ToAuxInt(c + 2*d)
+		v.Aux = symToAux(s)
 		v.AddArg2(x, y)
 		return true
 	}
 	// match: (LEAQ2 [c] {s} x (SHLQconst [1] y))
 	// result: (LEAQ4 [c] {s} x y)
 	for {
-		c := v.AuxInt
-		s := v.Aux
+		c := auxIntToInt32(v.AuxInt)
+		s := auxToSym(v.Aux)
 		x := v_0
-		if v_1.Op != OpAMD64SHLQconst || v_1.AuxInt != 1 {
+		if v_1.Op != OpAMD64SHLQconst || auxIntToInt8(v_1.AuxInt) != 1 {
 			break
 		}
 		y := v_1.Args[0]
 		v.reset(OpAMD64LEAQ4)
-		v.AuxInt = c
-		v.Aux = s
+		v.AuxInt = int32ToAuxInt(c)
+		v.Aux = symToAux(s)
 		v.AddArg2(x, y)
 		return true
 	}
 	// match: (LEAQ2 [c] {s} x (SHLQconst [2] y))
 	// result: (LEAQ8 [c] {s} x y)
 	for {
-		c := v.AuxInt
-		s := v.Aux
+		c := auxIntToInt32(v.AuxInt)
+		s := auxToSym(v.Aux)
 		x := v_0
-		if v_1.Op != OpAMD64SHLQconst || v_1.AuxInt != 2 {
+		if v_1.Op != OpAMD64SHLQconst || auxIntToInt8(v_1.AuxInt) != 2 {
 			break
 		}
 		y := v_1.Args[0]
 		v.reset(OpAMD64LEAQ8)
-		v.AuxInt = c
-		v.Aux = s
+		v.AuxInt = int32ToAuxInt(c)
+		v.Aux = symToAux(s)
 		v.AddArg2(x, y)
 		return true
 	}
@@ -9614,60 +9599,60 @@
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
 	// match: (LEAQ4 [c] {s} (ADDQconst [d] x) y)
-	// cond: is32Bit(c+d) && x.Op != OpSB
+	// cond: is32Bit(int64(c)+int64(d)) && x.Op != OpSB
 	// result: (LEAQ4 [c+d] {s} x y)
 	for {
-		c := v.AuxInt
-		s := v.Aux
+		c := auxIntToInt32(v.AuxInt)
+		s := auxToSym(v.Aux)
 		if v_0.Op != OpAMD64ADDQconst {
 			break
 		}
-		d := v_0.AuxInt
+		d := auxIntToInt32(v_0.AuxInt)
 		x := v_0.Args[0]
 		y := v_1
-		if !(is32Bit(c+d) && x.Op != OpSB) {
+		if !(is32Bit(int64(c)+int64(d)) && x.Op != OpSB) {
 			break
 		}
 		v.reset(OpAMD64LEAQ4)
-		v.AuxInt = c + d
-		v.Aux = s
+		v.AuxInt = int32ToAuxInt(c + d)
+		v.Aux = symToAux(s)
 		v.AddArg2(x, y)
 		return true
 	}
 	// match: (LEAQ4 [c] {s} x (ADDQconst [d] y))
-	// cond: is32Bit(c+4*d) && y.Op != OpSB
+	// cond: is32Bit(int64(c)+4*int64(d)) && y.Op != OpSB
 	// result: (LEAQ4 [c+4*d] {s} x y)
 	for {
-		c := v.AuxInt
-		s := v.Aux
+		c := auxIntToInt32(v.AuxInt)
+		s := auxToSym(v.Aux)
 		x := v_0
 		if v_1.Op != OpAMD64ADDQconst {
 			break
 		}
-		d := v_1.AuxInt
+		d := auxIntToInt32(v_1.AuxInt)
 		y := v_1.Args[0]
-		if !(is32Bit(c+4*d) && y.Op != OpSB) {
+		if !(is32Bit(int64(c)+4*int64(d)) && y.Op != OpSB) {
 			break
 		}
 		v.reset(OpAMD64LEAQ4)
-		v.AuxInt = c + 4*d
-		v.Aux = s
+		v.AuxInt = int32ToAuxInt(c + 4*d)
+		v.Aux = symToAux(s)
 		v.AddArg2(x, y)
 		return true
 	}
 	// match: (LEAQ4 [c] {s} x (SHLQconst [1] y))
 	// result: (LEAQ8 [c] {s} x y)
 	for {
-		c := v.AuxInt
-		s := v.Aux
+		c := auxIntToInt32(v.AuxInt)
+		s := auxToSym(v.Aux)
 		x := v_0
-		if v_1.Op != OpAMD64SHLQconst || v_1.AuxInt != 1 {
+		if v_1.Op != OpAMD64SHLQconst || auxIntToInt8(v_1.AuxInt) != 1 {
 			break
 		}
 		y := v_1.Args[0]
 		v.reset(OpAMD64LEAQ8)
-		v.AuxInt = c
-		v.Aux = s
+		v.AuxInt = int32ToAuxInt(c)
+		v.Aux = symToAux(s)
 		v.AddArg2(x, y)
 		return true
 	}
@@ -9761,44 +9746,44 @@
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
 	// match: (LEAQ8 [c] {s} (ADDQconst [d] x) y)
-	// cond: is32Bit(c+d) && x.Op != OpSB
+	// cond: is32Bit(int64(c)+int64(d)) && x.Op != OpSB
 	// result: (LEAQ8 [c+d] {s} x y)
 	for {
-		c := v.AuxInt
-		s := v.Aux
+		c := auxIntToInt32(v.AuxInt)
+		s := auxToSym(v.Aux)
 		if v_0.Op != OpAMD64ADDQconst {
 			break
 		}
-		d := v_0.AuxInt
+		d := auxIntToInt32(v_0.AuxInt)
 		x := v_0.Args[0]
 		y := v_1
-		if !(is32Bit(c+d) && x.Op != OpSB) {
+		if !(is32Bit(int64(c)+int64(d)) && x.Op != OpSB) {
 			break
 		}
 		v.reset(OpAMD64LEAQ8)
-		v.AuxInt = c + d
-		v.Aux = s
+		v.AuxInt = int32ToAuxInt(c + d)
+		v.Aux = symToAux(s)
 		v.AddArg2(x, y)
 		return true
 	}
 	// match: (LEAQ8 [c] {s} x (ADDQconst [d] y))
-	// cond: is32Bit(c+8*d) && y.Op != OpSB
+	// cond: is32Bit(int64(c)+8*int64(d)) && y.Op != OpSB
 	// result: (LEAQ8 [c+8*d] {s} x y)
 	for {
-		c := v.AuxInt
-		s := v.Aux
+		c := auxIntToInt32(v.AuxInt)
+		s := auxToSym(v.Aux)
 		x := v_0
 		if v_1.Op != OpAMD64ADDQconst {
 			break
 		}
-		d := v_1.AuxInt
+		d := auxIntToInt32(v_1.AuxInt)
 		y := v_1.Args[0]
-		if !(is32Bit(c+8*d) && y.Op != OpSB) {
+		if !(is32Bit(int64(c)+8*int64(d)) && y.Op != OpSB) {
 			break
 		}
 		v.reset(OpAMD64LEAQ8)
-		v.AuxInt = c + 8*d
-		v.Aux = s
+		v.AuxInt = int32ToAuxInt(c + 8*d)
+		v.Aux = symToAux(s)
 		v.AddArg2(x, y)
 		return true
 	}
@@ -9877,8 +9862,8 @@
 		if x.Op != OpAMD64MOVBload {
 			break
 		}
-		off := x.AuxInt
-		sym := x.Aux
+		off := auxIntToInt32(x.AuxInt)
+		sym := auxToSym(x.Aux)
 		mem := x.Args[1]
 		ptr := x.Args[0]
 		if !(x.Uses == 1 && clobber(x)) {
@@ -9887,8 +9872,8 @@
 		b = x.Block
 		v0 := b.NewValue0(x.Pos, OpAMD64MOVBQSXload, v.Type)
 		v.copyOf(v0)
-		v0.AuxInt = off
-		v0.Aux = sym
+		v0.AuxInt = int32ToAuxInt(off)
+		v0.Aux = symToAux(sym)
 		v0.AddArg2(ptr, mem)
 		return true
 	}
@@ -9900,8 +9885,8 @@
 		if x.Op != OpAMD64MOVWload {
 			break
 		}
-		off := x.AuxInt
-		sym := x.Aux
+		off := auxIntToInt32(x.AuxInt)
+		sym := auxToSym(x.Aux)
 		mem := x.Args[1]
 		ptr := x.Args[0]
 		if !(x.Uses == 1 && clobber(x)) {
@@ -9910,8 +9895,8 @@
 		b = x.Block
 		v0 := b.NewValue0(x.Pos, OpAMD64MOVBQSXload, v.Type)
 		v.copyOf(v0)
-		v0.AuxInt = off
-		v0.Aux = sym
+		v0.AuxInt = int32ToAuxInt(off)
+		v0.Aux = symToAux(sym)
 		v0.AddArg2(ptr, mem)
 		return true
 	}
@@ -9923,8 +9908,8 @@
 		if x.Op != OpAMD64MOVLload {
 			break
 		}
-		off := x.AuxInt
-		sym := x.Aux
+		off := auxIntToInt32(x.AuxInt)
+		sym := auxToSym(x.Aux)
 		mem := x.Args[1]
 		ptr := x.Args[0]
 		if !(x.Uses == 1 && clobber(x)) {
@@ -9933,8 +9918,8 @@
 		b = x.Block
 		v0 := b.NewValue0(x.Pos, OpAMD64MOVBQSXload, v.Type)
 		v.copyOf(v0)
-		v0.AuxInt = off
-		v0.Aux = sym
+		v0.AuxInt = int32ToAuxInt(off)
+		v0.Aux = symToAux(sym)
 		v0.AddArg2(ptr, mem)
 		return true
 	}
@@ -9946,8 +9931,8 @@
 		if x.Op != OpAMD64MOVQload {
 			break
 		}
-		off := x.AuxInt
-		sym := x.Aux
+		off := auxIntToInt32(x.AuxInt)
+		sym := auxToSym(x.Aux)
 		mem := x.Args[1]
 		ptr := x.Args[0]
 		if !(x.Uses == 1 && clobber(x)) {
@@ -9956,8 +9941,8 @@
 		b = x.Block
 		v0 := b.NewValue0(x.Pos, OpAMD64MOVBQSXload, v.Type)
 		v.copyOf(v0)
-		v0.AuxInt = off
-		v0.Aux = sym
+		v0.AuxInt = int32ToAuxInt(off)
+		v0.Aux = symToAux(sym)
 		v0.AddArg2(ptr, mem)
 		return true
 	}
@@ -9968,13 +9953,13 @@
 		if v_0.Op != OpAMD64ANDLconst {
 			break
 		}
-		c := v_0.AuxInt
+		c := auxIntToInt32(v_0.AuxInt)
 		x := v_0.Args[0]
 		if !(c&0x80 == 0) {
 			break
 		}
 		v.reset(OpAMD64ANDLconst)
-		v.AuxInt = c & 0x7f
+		v.AuxInt = int32ToAuxInt(c & 0x7f)
 		v.AddArg(x)
 		return true
 	}
@@ -9998,14 +9983,14 @@
 	// cond: sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)
 	// result: (MOVBQSX x)
 	for {
-		off := v.AuxInt
-		sym := v.Aux
+		off := auxIntToInt32(v.AuxInt)
+		sym := auxToSym(v.Aux)
 		ptr := v_0
 		if v_1.Op != OpAMD64MOVBstore {
 			break
 		}
-		off2 := v_1.AuxInt
-		sym2 := v_1.Aux
+		off2 := auxIntToInt32(v_1.AuxInt)
+		sym2 := auxToSym(v_1.Aux)
 		x := v_1.Args[1]
 		ptr2 := v_1.Args[0]
 		if !(sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)) {
@@ -10050,8 +10035,8 @@
 		if x.Op != OpAMD64MOVBload {
 			break
 		}
-		off := x.AuxInt
-		sym := x.Aux
+		off := auxIntToInt32(x.AuxInt)
+		sym := auxToSym(x.Aux)
 		mem := x.Args[1]
 		ptr := x.Args[0]
 		if !(x.Uses == 1 && clobber(x)) {
@@ -10060,8 +10045,8 @@
 		b = x.Block
 		v0 := b.NewValue0(x.Pos, OpAMD64MOVBload, v.Type)
 		v.copyOf(v0)
-		v0.AuxInt = off
-		v0.Aux = sym
+		v0.AuxInt = int32ToAuxInt(off)
+		v0.Aux = symToAux(sym)
 		v0.AddArg2(ptr, mem)
 		return true
 	}
@@ -10073,8 +10058,8 @@
 		if x.Op != OpAMD64MOVWload {
 			break
 		}
-		off := x.AuxInt
-		sym := x.Aux
+		off := auxIntToInt32(x.AuxInt)
+		sym := auxToSym(x.Aux)
 		mem := x.Args[1]
 		ptr := x.Args[0]
 		if !(x.Uses == 1 && clobber(x)) {
@@ -10083,8 +10068,8 @@
 		b = x.Block
 		v0 := b.NewValue0(x.Pos, OpAMD64MOVBload, v.Type)
 		v.copyOf(v0)
-		v0.AuxInt = off
-		v0.Aux = sym
+		v0.AuxInt = int32ToAuxInt(off)
+		v0.Aux = symToAux(sym)
 		v0.AddArg2(ptr, mem)
 		return true
 	}
@@ -10096,8 +10081,8 @@
 		if x.Op != OpAMD64MOVLload {
 			break
 		}
-		off := x.AuxInt
-		sym := x.Aux
+		off := auxIntToInt32(x.AuxInt)
+		sym := auxToSym(x.Aux)
 		mem := x.Args[1]
 		ptr := x.Args[0]
 		if !(x.Uses == 1 && clobber(x)) {
@@ -10106,8 +10091,8 @@
 		b = x.Block
 		v0 := b.NewValue0(x.Pos, OpAMD64MOVBload, v.Type)
 		v.copyOf(v0)
-		v0.AuxInt = off
-		v0.Aux = sym
+		v0.AuxInt = int32ToAuxInt(off)
+		v0.Aux = symToAux(sym)
 		v0.AddArg2(ptr, mem)
 		return true
 	}
@@ -10119,8 +10104,8 @@
 		if x.Op != OpAMD64MOVQload {
 			break
 		}
-		off := x.AuxInt
-		sym := x.Aux
+		off := auxIntToInt32(x.AuxInt)
+		sym := auxToSym(x.Aux)
 		mem := x.Args[1]
 		ptr := x.Args[0]
 		if !(x.Uses == 1 && clobber(x)) {
@@ -10129,8 +10114,8 @@
 		b = x.Block
 		v0 := b.NewValue0(x.Pos, OpAMD64MOVBload, v.Type)
 		v.copyOf(v0)
-		v0.AuxInt = off
-		v0.Aux = sym
+		v0.AuxInt = int32ToAuxInt(off)
+		v0.Aux = symToAux(sym)
 		v0.AddArg2(ptr, mem)
 		return true
 	}
@@ -10151,10 +10136,10 @@
 		if v_0.Op != OpAMD64ANDLconst {
 			break
 		}
-		c := v_0.AuxInt
+		c := auxIntToInt32(v_0.AuxInt)
 		x := v_0.Args[0]
 		v.reset(OpAMD64ANDLconst)
-		v.AuxInt = c & 0xff
+		v.AuxInt = int32ToAuxInt(c & 0xff)
 		v.AddArg(x)
 		return true
 	}
@@ -10226,14 +10211,14 @@
 	// cond: sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)
 	// result: (MOVBQZX x)
 	for {
-		off := v.AuxInt
-		sym := v.Aux
+		off := auxIntToInt32(v.AuxInt)
+		sym := auxToSym(v.Aux)
 		ptr := v_0
 		if v_1.Op != OpAMD64MOVBstore {
 			break
 		}
-		off2 := v_1.AuxInt
-		sym2 := v_1.Aux
+		off2 := auxIntToInt32(v_1.AuxInt)
+		sym2 := auxToSym(v_1.Aux)
 		x := v_1.Args[1]
 		ptr2 := v_1.Args[0]
 		if !(sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)) {
@@ -10244,23 +10229,23 @@
 		return true
 	}
 	// match: (MOVBload [off1] {sym} (ADDQconst [off2] ptr) mem)
-	// cond: is32Bit(off1+off2)
+	// cond: is32Bit(int64(off1)+int64(off2))
 	// result: (MOVBload [off1+off2] {sym} ptr mem)
 	for {
-		off1 := v.AuxInt
-		sym := v.Aux
+		off1 := auxIntToInt32(v.AuxInt)
+		sym := auxToSym(v.Aux)
 		if v_0.Op != OpAMD64ADDQconst {
 			break
 		}
-		off2 := v_0.AuxInt
+		off2 := auxIntToInt32(v_0.AuxInt)
 		ptr := v_0.Args[0]
 		mem := v_1
-		if !(is32Bit(off1 + off2)) {
+		if !(is32Bit(int64(off1) + int64(off2))) {
 			break
 		}
 		v.reset(OpAMD64MOVBload)
-		v.AuxInt = off1 + off2
-		v.Aux = sym
+		v.AuxInt = int32ToAuxInt(off1 + off2)
+		v.Aux = symToAux(sym)
 		v.AddArg2(ptr, mem)
 		return true
 	}
@@ -10354,8 +10339,8 @@
 	// cond: y.Uses == 1
 	// result: (SETLstore [off] {sym} ptr x mem)
 	for {
-		off := v.AuxInt
-		sym := v.Aux
+		off := auxIntToInt32(v.AuxInt)
+		sym := auxToSym(v.Aux)
 		ptr := v_0
 		y := v_1
 		if y.Op != OpAMD64SETL {
@@ -10367,8 +10352,8 @@
 			break
 		}
 		v.reset(OpAMD64SETLstore)
-		v.AuxInt = off
-		v.Aux = sym
+		v.AuxInt = int32ToAuxInt(off)
+		v.Aux = symToAux(sym)
 		v.AddArg3(ptr, x, mem)
 		return true
 	}
@@ -10376,8 +10361,8 @@
 	// cond: y.Uses == 1
 	// result: (SETLEstore [off] {sym} ptr x mem)
 	for {
-		off := v.AuxInt
-		sym := v.Aux
+		off := auxIntToInt32(v.AuxInt)
+		sym := auxToSym(v.Aux)
 		ptr := v_0
 		y := v_1
 		if y.Op != OpAMD64SETLE {
@@ -10389,8 +10374,8 @@
 			break
 		}
 		v.reset(OpAMD64SETLEstore)
-		v.AuxInt = off
-		v.Aux = sym
+		v.AuxInt = int32ToAuxInt(off)
+		v.Aux = symToAux(sym)
 		v.AddArg3(ptr, x, mem)
 		return true
 	}
@@ -10398,8 +10383,8 @@
 	// cond: y.Uses == 1
 	// result: (SETGstore [off] {sym} ptr x mem)
 	for {
-		off := v.AuxInt
-		sym := v.Aux
+		off := auxIntToInt32(v.AuxInt)
+		sym := auxToSym(v.Aux)
 		ptr := v_0
 		y := v_1
 		if y.Op != OpAMD64SETG {
@@ -10411,8 +10396,8 @@
 			break
 		}
 		v.reset(OpAMD64SETGstore)
-		v.AuxInt = off
-		v.Aux = sym
+		v.AuxInt = int32ToAuxInt(off)
+		v.Aux = symToAux(sym)
 		v.AddArg3(ptr, x, mem)
 		return true
 	}
@@ -10420,8 +10405,8 @@
 	// cond: y.Uses == 1
 	// result: (SETGEstore [off] {sym} ptr x mem)
 	for {
-		off := v.AuxInt
-		sym := v.Aux
+		off := auxIntToInt32(v.AuxInt)
+		sym := auxToSym(v.Aux)
 		ptr := v_0
 		y := v_1
 		if y.Op != OpAMD64SETGE {
@@ -10433,8 +10418,8 @@
 			break
 		}
 		v.reset(OpAMD64SETGEstore)
-		v.AuxInt = off
-		v.Aux = sym
+		v.AuxInt = int32ToAuxInt(off)
+		v.Aux = symToAux(sym)
 		v.AddArg3(ptr, x, mem)
 		return true
 	}
@@ -10442,8 +10427,8 @@
 	// cond: y.Uses == 1
 	// result: (SETEQstore [off] {sym} ptr x mem)
 	for {
-		off := v.AuxInt
-		sym := v.Aux
+		off := auxIntToInt32(v.AuxInt)
+		sym := auxToSym(v.Aux)
 		ptr := v_0
 		y := v_1
 		if y.Op != OpAMD64SETEQ {
@@ -10455,8 +10440,8 @@
 			break
 		}
 		v.reset(OpAMD64SETEQstore)
-		v.AuxInt = off
-		v.Aux = sym
+		v.AuxInt = int32ToAuxInt(off)
+		v.Aux = symToAux(sym)
 		v.AddArg3(ptr, x, mem)
 		return true
 	}
@@ -10464,8 +10449,8 @@
 	// cond: y.Uses == 1
 	// result: (SETNEstore [off] {sym} ptr x mem)
 	for {
-		off := v.AuxInt
-		sym := v.Aux
+		off := auxIntToInt32(v.AuxInt)
+		sym := auxToSym(v.Aux)
 		ptr := v_0
 		y := v_1
 		if y.Op != OpAMD64SETNE {
@@ -10477,8 +10462,8 @@
 			break
 		}
 		v.reset(OpAMD64SETNEstore)
-		v.AuxInt = off
-		v.Aux = sym
+		v.AuxInt = int32ToAuxInt(off)
+		v.Aux = symToAux(sym)
 		v.AddArg3(ptr, x, mem)
 		return true
 	}
@@ -10486,8 +10471,8 @@
 	// cond: y.Uses == 1
 	// result: (SETBstore [off] {sym} ptr x mem)
 	for {
-		off := v.AuxInt
-		sym := v.Aux
+		off := auxIntToInt32(v.AuxInt)
+		sym := auxToSym(v.Aux)
 		ptr := v_0
 		y := v_1
 		if y.Op != OpAMD64SETB {
@@ -10499,8 +10484,8 @@
 			break
 		}
 		v.reset(OpAMD64SETBstore)
-		v.AuxInt = off
-		v.Aux = sym
+		v.AuxInt = int32ToAuxInt(off)
+		v.Aux = symToAux(sym)
 		v.AddArg3(ptr, x, mem)
 		return true
 	}
@@ -10508,8 +10493,8 @@
 	// cond: y.Uses == 1
 	// result: (SETBEstore [off] {sym} ptr x mem)
 	for {
-		off := v.AuxInt
-		sym := v.Aux
+		off := auxIntToInt32(v.AuxInt)
+		sym := auxToSym(v.Aux)
 		ptr := v_0
 		y := v_1
 		if y.Op != OpAMD64SETBE {
@@ -10521,8 +10506,8 @@
 			break
 		}
 		v.reset(OpAMD64SETBEstore)
-		v.AuxInt = off
-		v.Aux = sym
+		v.AuxInt = int32ToAuxInt(off)
+		v.Aux = symToAux(sym)
 		v.AddArg3(ptr, x, mem)
 		return true
 	}
@@ -10530,8 +10515,8 @@
 	// cond: y.Uses == 1
 	// result: (SETAstore [off] {sym} ptr x mem)
 	for {
-		off := v.AuxInt
-		sym := v.Aux
+		off := auxIntToInt32(v.AuxInt)
+		sym := auxToSym(v.Aux)
 		ptr := v_0
 		y := v_1
 		if y.Op != OpAMD64SETA {
@@ -10543,8 +10528,8 @@
 			break
 		}
 		v.reset(OpAMD64SETAstore)
-		v.AuxInt = off
-		v.Aux = sym
+		v.AuxInt = int32ToAuxInt(off)
+		v.Aux = symToAux(sym)
 		v.AddArg3(ptr, x, mem)
 		return true
 	}
@@ -10552,8 +10537,8 @@
 	// cond: y.Uses == 1
 	// result: (SETAEstore [off] {sym} ptr x mem)
 	for {
-		off := v.AuxInt
-		sym := v.Aux
+		off := auxIntToInt32(v.AuxInt)
+		sym := auxToSym(v.Aux)
 		ptr := v_0
 		y := v_1
 		if y.Op != OpAMD64SETAE {
@@ -10565,16 +10550,16 @@
 			break
 		}
 		v.reset(OpAMD64SETAEstore)
-		v.AuxInt = off
-		v.Aux = sym
+		v.AuxInt = int32ToAuxInt(off)
+		v.Aux = symToAux(sym)
 		v.AddArg3(ptr, x, mem)
 		return true
 	}
 	// match: (MOVBstore [off] {sym} ptr (MOVBQSX x) mem)
 	// result: (MOVBstore [off] {sym} ptr x mem)
 	for {
-		off := v.AuxInt
-		sym := v.Aux
+		off := auxIntToInt32(v.AuxInt)
+		sym := auxToSym(v.Aux)
 		ptr := v_0
 		if v_1.Op != OpAMD64MOVBQSX {
 			break
@@ -10582,16 +10567,16 @@
 		x := v_1.Args[0]
 		mem := v_2
 		v.reset(OpAMD64MOVBstore)
-		v.AuxInt = off
-		v.Aux = sym
+		v.AuxInt = int32ToAuxInt(off)
+		v.Aux = symToAux(sym)
 		v.AddArg3(ptr, x, mem)
 		return true
 	}
 	// match: (MOVBstore [off] {sym} ptr (MOVBQZX x) mem)
 	// result: (MOVBstore [off] {sym} ptr x mem)
 	for {
-		off := v.AuxInt
-		sym := v.Aux
+		off := auxIntToInt32(v.AuxInt)
+		sym := auxToSym(v.Aux)
 		ptr := v_0
 		if v_1.Op != OpAMD64MOVBQZX {
 			break
@@ -10599,72 +10584,64 @@
 		x := v_1.Args[0]
 		mem := v_2
 		v.reset(OpAMD64MOVBstore)
-		v.AuxInt = off
-		v.Aux = sym
+		v.AuxInt = int32ToAuxInt(off)
+		v.Aux = symToAux(sym)
 		v.AddArg3(ptr, x, mem)
 		return true
 	}
 	// match: (MOVBstore [off1] {sym} (ADDQconst [off2] ptr) val mem)
-	// cond: is32Bit(off1+off2)
+	// cond: is32Bit(int64(off1)+int64(off2))
 	// result: (MOVBstore [off1+off2] {sym} ptr val mem)
 	for {
-		off1 := v.AuxInt
-		sym := v.Aux
+		off1 := auxIntToInt32(v.AuxInt)
+		sym := auxToSym(v.Aux)
 		if v_0.Op != OpAMD64ADDQconst {
 			break
 		}
-		off2 := v_0.AuxInt
+		off2 := auxIntToInt32(v_0.AuxInt)
 		ptr := v_0.Args[0]
 		val := v_1
 		mem := v_2
-		if !(is32Bit(off1 + off2)) {
+		if !(is32Bit(int64(off1) + int64(off2))) {
 			break
 		}
 		v.reset(OpAMD64MOVBstore)
-		v.AuxInt = off1 + off2
-		v.Aux = sym
+		v.AuxInt = int32ToAuxInt(off1 + off2)
+		v.Aux = symToAux(sym)
 		v.AddArg3(ptr, val, mem)
 		return true
 	}
 	// match: (MOVBstore [off] {sym} ptr (MOVLconst [c]) mem)
-	// cond: validOff(off)
-	// result: (MOVBstoreconst [makeValAndOff(int64(int8(c)),off)] {sym} ptr mem)
+	// result: (MOVBstoreconst [makeValAndOff32(int32(int8(c)),off)] {sym} ptr mem)
 	for {
-		off := v.AuxInt
-		sym := v.Aux
+		off := auxIntToInt32(v.AuxInt)
+		sym := auxToSym(v.Aux)
 		ptr := v_0
 		if v_1.Op != OpAMD64MOVLconst {
 			break
 		}
-		c := v_1.AuxInt
+		c := auxIntToInt32(v_1.AuxInt)
 		mem := v_2
-		if !(validOff(off)) {
-			break
-		}
 		v.reset(OpAMD64MOVBstoreconst)
-		v.AuxInt = makeValAndOff(int64(int8(c)), off)
-		v.Aux = sym
+		v.AuxInt = valAndOffToAuxInt(makeValAndOff32(int32(int8(c)), off))
+		v.Aux = symToAux(sym)
 		v.AddArg2(ptr, mem)
 		return true
 	}
 	// match: (MOVBstore [off] {sym} ptr (MOVQconst [c]) mem)
-	// cond: validOff(off)
-	// result: (MOVBstoreconst [makeValAndOff(int64(int8(c)),off)] {sym} ptr mem)
+	// result: (MOVBstoreconst [makeValAndOff32(int32(int8(c)),off)] {sym} ptr mem)
 	for {
-		off := v.AuxInt
-		sym := v.Aux
+		off := auxIntToInt32(v.AuxInt)
+		sym := auxToSym(v.Aux)
 		ptr := v_0
 		if v_1.Op != OpAMD64MOVQconst {
 			break
 		}
-		c := v_1.AuxInt
+		c := auxIntToInt64(v_1.AuxInt)
 		mem := v_2
-		if !(validOff(off)) {
-			break
-		}
 		v.reset(OpAMD64MOVBstoreconst)
-		v.AuxInt = makeValAndOff(int64(int8(c)), off)
-		v.Aux = sym
+		v.AuxInt = valAndOffToAuxInt(makeValAndOff32(int32(int8(c)), off))
+		v.Aux = symToAux(sym)
 		v.AddArg2(ptr, mem)
 		return true
 	}
@@ -11542,23 +11519,23 @@
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
 	// match: (MOVBstoreconst [sc] {s} (ADDQconst [off] ptr) mem)
-	// cond: ValAndOff(sc).canAdd(off)
-	// result: (MOVBstoreconst [ValAndOff(sc).add(off)] {s} ptr mem)
+	// cond: ValAndOff(sc).canAdd32(off)
+	// result: (MOVBstoreconst [ValAndOff(sc).addOffset32(off)] {s} ptr mem)
 	for {
-		sc := v.AuxInt
-		s := v.Aux
+		sc := auxIntToValAndOff(v.AuxInt)
+		s := auxToSym(v.Aux)
 		if v_0.Op != OpAMD64ADDQconst {
 			break
 		}
-		off := v_0.AuxInt
+		off := auxIntToInt32(v_0.AuxInt)
 		ptr := v_0.Args[0]
 		mem := v_1
-		if !(ValAndOff(sc).canAdd(off)) {
+		if !(ValAndOff(sc).canAdd32(off)) {
 			break
 		}
 		v.reset(OpAMD64MOVBstoreconst)
-		v.AuxInt = ValAndOff(sc).add(off)
-		v.Aux = s
+		v.AuxInt = valAndOffToAuxInt(ValAndOff(sc).addOffset32(off))
+		v.Aux = symToAux(s)
 		v.AddArg2(ptr, mem)
 		return true
 	}
@@ -11690,8 +11667,8 @@
 		if x.Op != OpAMD64MOVLload {
 			break
 		}
-		off := x.AuxInt
-		sym := x.Aux
+		off := auxIntToInt32(x.AuxInt)
+		sym := auxToSym(x.Aux)
 		mem := x.Args[1]
 		ptr := x.Args[0]
 		if !(x.Uses == 1 && clobber(x)) {
@@ -11700,8 +11677,8 @@
 		b = x.Block
 		v0 := b.NewValue0(x.Pos, OpAMD64MOVLQSXload, v.Type)
 		v.copyOf(v0)
-		v0.AuxInt = off
-		v0.Aux = sym
+		v0.AuxInt = int32ToAuxInt(off)
+		v0.Aux = symToAux(sym)
 		v0.AddArg2(ptr, mem)
 		return true
 	}
@@ -11713,8 +11690,8 @@
 		if x.Op != OpAMD64MOVQload {
 			break
 		}
-		off := x.AuxInt
-		sym := x.Aux
+		off := auxIntToInt32(x.AuxInt)
+		sym := auxToSym(x.Aux)
 		mem := x.Args[1]
 		ptr := x.Args[0]
 		if !(x.Uses == 1 && clobber(x)) {
@@ -11723,25 +11700,25 @@
 		b = x.Block
 		v0 := b.NewValue0(x.Pos, OpAMD64MOVLQSXload, v.Type)
 		v.copyOf(v0)
-		v0.AuxInt = off
-		v0.Aux = sym
+		v0.AuxInt = int32ToAuxInt(off)
+		v0.Aux = symToAux(sym)
 		v0.AddArg2(ptr, mem)
 		return true
 	}
 	// match: (MOVLQSX (ANDLconst [c] x))
-	// cond: c & 0x80000000 == 0
+	// cond: uint32(c) & 0x80000000 == 0
 	// result: (ANDLconst [c & 0x7fffffff] x)
 	for {
 		if v_0.Op != OpAMD64ANDLconst {
 			break
 		}
-		c := v_0.AuxInt
+		c := auxIntToInt32(v_0.AuxInt)
 		x := v_0.Args[0]
-		if !(c&0x80000000 == 0) {
+		if !(uint32(c)&0x80000000 == 0) {
 			break
 		}
 		v.reset(OpAMD64ANDLconst)
-		v.AuxInt = c & 0x7fffffff
+		v.AuxInt = int32ToAuxInt(c & 0x7fffffff)
 		v.AddArg(x)
 		return true
 	}
@@ -11787,14 +11764,14 @@
 	// cond: sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)
 	// result: (MOVLQSX x)
 	for {
-		off := v.AuxInt
-		sym := v.Aux
+		off := auxIntToInt32(v.AuxInt)
+		sym := auxToSym(v.Aux)
 		ptr := v_0
 		if v_1.Op != OpAMD64MOVLstore {
 			break
 		}
-		off2 := v_1.AuxInt
-		sym2 := v_1.Aux
+		off2 := auxIntToInt32(v_1.AuxInt)
+		sym2 := auxToSym(v_1.Aux)
 		x := v_1.Args[1]
 		ptr2 := v_1.Args[0]
 		if !(sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)) {
@@ -11839,8 +11816,8 @@
 		if x.Op != OpAMD64MOVLload {
 			break
 		}
-		off := x.AuxInt
-		sym := x.Aux
+		off := auxIntToInt32(x.AuxInt)
+		sym := auxToSym(x.Aux)
 		mem := x.Args[1]
 		ptr := x.Args[0]
 		if !(x.Uses == 1 && clobber(x)) {
@@ -11849,8 +11826,8 @@
 		b = x.Block
 		v0 := b.NewValue0(x.Pos, OpAMD64MOVLload, v.Type)
 		v.copyOf(v0)
-		v0.AuxInt = off
-		v0.Aux = sym
+		v0.AuxInt = int32ToAuxInt(off)
+		v0.Aux = symToAux(sym)
 		v0.AddArg2(ptr, mem)
 		return true
 	}
@@ -11862,8 +11839,8 @@
 		if x.Op != OpAMD64MOVQload {
 			break
 		}
-		off := x.AuxInt
-		sym := x.Aux
+		off := auxIntToInt32(x.AuxInt)
+		sym := auxToSym(x.Aux)
 		mem := x.Args[1]
 		ptr := x.Args[0]
 		if !(x.Uses == 1 && clobber(x)) {
@@ -11872,8 +11849,8 @@
 		b = x.Block
 		v0 := b.NewValue0(x.Pos, OpAMD64MOVLload, v.Type)
 		v.copyOf(v0)
-		v0.AuxInt = off
-		v0.Aux = sym
+		v0.AuxInt = int32ToAuxInt(off)
+		v0.Aux = symToAux(sym)
 		v0.AddArg2(ptr, mem)
 		return true
 	}
@@ -11894,10 +11871,10 @@
 		if v_0.Op != OpAMD64ANDLconst {
 			break
 		}
-		c := v_0.AuxInt
+		c := auxIntToInt32(v_0.AuxInt)
 		x := v_0.Args[0]
 		v.reset(OpAMD64ANDLconst)
-		v.AuxInt = c
+		v.AuxInt = int32ToAuxInt(c)
 		v.AddArg(x)
 		return true
 	}
@@ -12045,14 +12022,14 @@
 	// cond: sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)
 	// result: (MOVLQZX x)
 	for {
-		off := v.AuxInt
-		sym := v.Aux
+		off := auxIntToInt32(v.AuxInt)
+		sym := auxToSym(v.Aux)
 		ptr := v_0
 		if v_1.Op != OpAMD64MOVLstore {
 			break
 		}
-		off2 := v_1.AuxInt
-		sym2 := v_1.Aux
+		off2 := auxIntToInt32(v_1.AuxInt)
+		sym2 := auxToSym(v_1.Aux)
 		x := v_1.Args[1]
 		ptr2 := v_1.Args[0]
 		if !(sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)) {
@@ -12063,23 +12040,23 @@
 		return true
 	}
 	// match: (MOVLload [off1] {sym} (ADDQconst [off2] ptr) mem)
-	// cond: is32Bit(off1+off2)
+	// cond: is32Bit(int64(off1)+int64(off2))
 	// result: (MOVLload [off1+off2] {sym} ptr mem)
 	for {
-		off1 := v.AuxInt
-		sym := v.Aux
+		off1 := auxIntToInt32(v.AuxInt)
+		sym := auxToSym(v.Aux)
 		if v_0.Op != OpAMD64ADDQconst {
 			break
 		}
-		off2 := v_0.AuxInt
+		off2 := auxIntToInt32(v_0.AuxInt)
 		ptr := v_0.Args[0]
 		mem := v_1
-		if !(is32Bit(off1 + off2)) {
+		if !(is32Bit(int64(off1) + int64(off2))) {
 			break
 		}
 		v.reset(OpAMD64MOVLload)
-		v.AuxInt = off1 + off2
-		v.Aux = sym
+		v.AuxInt = int32ToAuxInt(off1 + off2)
+		v.Aux = symToAux(sym)
 		v.AddArg2(ptr, mem)
 		return true
 	}
@@ -12189,8 +12166,8 @@
 	// match: (MOVLstore [off] {sym} ptr (MOVLQSX x) mem)
 	// result: (MOVLstore [off] {sym} ptr x mem)
 	for {
-		off := v.AuxInt
-		sym := v.Aux
+		off := auxIntToInt32(v.AuxInt)
+		sym := auxToSym(v.Aux)
 		ptr := v_0
 		if v_1.Op != OpAMD64MOVLQSX {
 			break
@@ -12198,16 +12175,16 @@
 		x := v_1.Args[0]
 		mem := v_2
 		v.reset(OpAMD64MOVLstore)
-		v.AuxInt = off
-		v.Aux = sym
+		v.AuxInt = int32ToAuxInt(off)
+		v.Aux = symToAux(sym)
 		v.AddArg3(ptr, x, mem)
 		return true
 	}
 	// match: (MOVLstore [off] {sym} ptr (MOVLQZX x) mem)
 	// result: (MOVLstore [off] {sym} ptr x mem)
 	for {
-		off := v.AuxInt
-		sym := v.Aux
+		off := auxIntToInt32(v.AuxInt)
+		sym := auxToSym(v.Aux)
 		ptr := v_0
 		if v_1.Op != OpAMD64MOVLQZX {
 			break
@@ -12215,72 +12192,64 @@
 		x := v_1.Args[0]
 		mem := v_2
 		v.reset(OpAMD64MOVLstore)
-		v.AuxInt = off
-		v.Aux = sym
+		v.AuxInt = int32ToAuxInt(off)
+		v.Aux = symToAux(sym)
 		v.AddArg3(ptr, x, mem)
 		return true
 	}
 	// match: (MOVLstore [off1] {sym} (ADDQconst [off2] ptr) val mem)
-	// cond: is32Bit(off1+off2)
+	// cond: is32Bit(int64(off1)+int64(off2))
 	// result: (MOVLstore [off1+off2] {sym} ptr val mem)
 	for {
-		off1 := v.AuxInt
-		sym := v.Aux
+		off1 := auxIntToInt32(v.AuxInt)
+		sym := auxToSym(v.Aux)
 		if v_0.Op != OpAMD64ADDQconst {
 			break
 		}
-		off2 := v_0.AuxInt
+		off2 := auxIntToInt32(v_0.AuxInt)
 		ptr := v_0.Args[0]
 		val := v_1
 		mem := v_2
-		if !(is32Bit(off1 + off2)) {
+		if !(is32Bit(int64(off1) + int64(off2))) {
 			break
 		}
 		v.reset(OpAMD64MOVLstore)
-		v.AuxInt = off1 + off2
-		v.Aux = sym
+		v.AuxInt = int32ToAuxInt(off1 + off2)
+		v.Aux = symToAux(sym)
 		v.AddArg3(ptr, val, mem)
 		return true
 	}
 	// match: (MOVLstore [off] {sym} ptr (MOVLconst [c]) mem)
-	// cond: validOff(off)
-	// result: (MOVLstoreconst [makeValAndOff(int64(int32(c)),off)] {sym} ptr mem)
+	// result: (MOVLstoreconst [makeValAndOff32(int32(c),off)] {sym} ptr mem)
 	for {
-		off := v.AuxInt
-		sym := v.Aux
+		off := auxIntToInt32(v.AuxInt)
+		sym := auxToSym(v.Aux)
 		ptr := v_0
 		if v_1.Op != OpAMD64MOVLconst {
 			break
 		}
-		c := v_1.AuxInt
+		c := auxIntToInt32(v_1.AuxInt)
 		mem := v_2
-		if !(validOff(off)) {
-			break
-		}
 		v.reset(OpAMD64MOVLstoreconst)
-		v.AuxInt = makeValAndOff(int64(int32(c)), off)
-		v.Aux = sym
+		v.AuxInt = valAndOffToAuxInt(makeValAndOff32(int32(c), off))
+		v.Aux = symToAux(sym)
 		v.AddArg2(ptr, mem)
 		return true
 	}
 	// match: (MOVLstore [off] {sym} ptr (MOVQconst [c]) mem)
-	// cond: validOff(off)
-	// result: (MOVLstoreconst [makeValAndOff(int64(int32(c)),off)] {sym} ptr mem)
+	// result: (MOVLstoreconst [makeValAndOff32(int32(c),off)] {sym} ptr mem)
 	for {
-		off := v.AuxInt
-		sym := v.Aux
+		off := auxIntToInt32(v.AuxInt)
+		sym := auxToSym(v.Aux)
 		ptr := v_0
 		if v_1.Op != OpAMD64MOVQconst {
 			break
 		}
-		c := v_1.AuxInt
+		c := auxIntToInt64(v_1.AuxInt)
 		mem := v_2
-		if !(validOff(off)) {
-			break
-		}
 		v.reset(OpAMD64MOVLstoreconst)
-		v.AuxInt = makeValAndOff(int64(int32(c)), off)
-		v.Aux = sym
+		v.AuxInt = valAndOffToAuxInt(makeValAndOff32(int32(c), off))
+		v.Aux = symToAux(sym)
 		v.AddArg2(ptr, mem)
 		return true
 	}
@@ -13048,23 +13017,23 @@
 	b := v.Block
 	typ := &b.Func.Config.Types
 	// match: (MOVLstoreconst [sc] {s} (ADDQconst [off] ptr) mem)
-	// cond: ValAndOff(sc).canAdd(off)
-	// result: (MOVLstoreconst [ValAndOff(sc).add(off)] {s} ptr mem)
+	// cond: ValAndOff(sc).canAdd32(off)
+	// result: (MOVLstoreconst [ValAndOff(sc).addOffset32(off)] {s} ptr mem)
 	for {
-		sc := v.AuxInt
-		s := v.Aux
+		sc := auxIntToValAndOff(v.AuxInt)
+		s := auxToSym(v.Aux)
 		if v_0.Op != OpAMD64ADDQconst {
 			break
 		}
-		off := v_0.AuxInt
+		off := auxIntToInt32(v_0.AuxInt)
 		ptr := v_0.Args[0]
 		mem := v_1
-		if !(ValAndOff(sc).canAdd(off)) {
+		if !(ValAndOff(sc).canAdd32(off)) {
 			break
 		}
 		v.reset(OpAMD64MOVLstoreconst)
-		v.AuxInt = ValAndOff(sc).add(off)
-		v.Aux = s
+		v.AuxInt = valAndOffToAuxInt(ValAndOff(sc).addOffset32(off))
+		v.Aux = symToAux(s)
 		v.AddArg2(ptr, mem)
 		return true
 	}
@@ -13193,23 +13162,23 @@
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
 	// match: (MOVOload [off1] {sym} (ADDQconst [off2] ptr) mem)
-	// cond: is32Bit(off1+off2)
+	// cond: is32Bit(int64(off1)+int64(off2))
 	// result: (MOVOload [off1+off2] {sym} ptr mem)
 	for {
-		off1 := v.AuxInt
-		sym := v.Aux
+		off1 := auxIntToInt32(v.AuxInt)
+		sym := auxToSym(v.Aux)
 		if v_0.Op != OpAMD64ADDQconst {
 			break
 		}
-		off2 := v_0.AuxInt
+		off2 := auxIntToInt32(v_0.AuxInt)
 		ptr := v_0.Args[0]
 		mem := v_1
-		if !(is32Bit(off1 + off2)) {
+		if !(is32Bit(int64(off1) + int64(off2))) {
 			break
 		}
 		v.reset(OpAMD64MOVOload)
-		v.AuxInt = off1 + off2
-		v.Aux = sym
+		v.AuxInt = int32ToAuxInt(off1 + off2)
+		v.Aux = symToAux(sym)
 		v.AddArg2(ptr, mem)
 		return true
 	}
@@ -13245,24 +13214,24 @@
 	config := b.Func.Config
 	typ := &b.Func.Config.Types
 	// match: (MOVOstore [off1] {sym} (ADDQconst [off2] ptr) val mem)
-	// cond: is32Bit(off1+off2)
+	// cond: is32Bit(int64(off1)+int64(off2))
 	// result: (MOVOstore [off1+off2] {sym} ptr val mem)
 	for {
-		off1 := v.AuxInt
-		sym := v.Aux
+		off1 := auxIntToInt32(v.AuxInt)
+		sym := auxToSym(v.Aux)
 		if v_0.Op != OpAMD64ADDQconst {
 			break
 		}
-		off2 := v_0.AuxInt
+		off2 := auxIntToInt32(v_0.AuxInt)
 		ptr := v_0.Args[0]
 		val := v_1
 		mem := v_2
-		if !(is32Bit(off1 + off2)) {
+		if !(is32Bit(int64(off1) + int64(off2))) {
 			break
 		}
 		v.reset(OpAMD64MOVOstore)
-		v.AuxInt = off1 + off2
-		v.Aux = sym
+		v.AuxInt = int32ToAuxInt(off1 + off2)
+		v.Aux = symToAux(sym)
 		v.AddArg3(ptr, val, mem)
 		return true
 	}
@@ -13434,14 +13403,14 @@
 	// cond: sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)
 	// result: x
 	for {
-		off := v.AuxInt
-		sym := v.Aux
+		off := auxIntToInt32(v.AuxInt)
+		sym := auxToSym(v.Aux)
 		ptr := v_0
 		if v_1.Op != OpAMD64MOVQstore {
 			break
 		}
-		off2 := v_1.AuxInt
-		sym2 := v_1.Aux
+		off2 := auxIntToInt32(v_1.AuxInt)
+		sym2 := auxToSym(v_1.Aux)
 		x := v_1.Args[1]
 		ptr2 := v_1.Args[0]
 		if !(sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)) {
@@ -13451,23 +13420,23 @@
 		return true
 	}
 	// match: (MOVQload [off1] {sym} (ADDQconst [off2] ptr) mem)
-	// cond: is32Bit(off1+off2)
+	// cond: is32Bit(int64(off1)+int64(off2))
 	// result: (MOVQload [off1+off2] {sym} ptr mem)
 	for {
-		off1 := v.AuxInt
-		sym := v.Aux
+		off1 := auxIntToInt32(v.AuxInt)
+		sym := auxToSym(v.Aux)
 		if v_0.Op != OpAMD64ADDQconst {
 			break
 		}
-		off2 := v_0.AuxInt
+		off2 := auxIntToInt32(v_0.AuxInt)
 		ptr := v_0.Args[0]
 		mem := v_1
-		if !(is32Bit(off1 + off2)) {
+		if !(is32Bit(int64(off1) + int64(off2))) {
 			break
 		}
 		v.reset(OpAMD64MOVQload)
-		v.AuxInt = off1 + off2
-		v.Aux = sym
+		v.AuxInt = int32ToAuxInt(off1 + off2)
+		v.Aux = symToAux(sym)
 		v.AddArg2(ptr, mem)
 		return true
 	}
@@ -13573,45 +13542,45 @@
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
 	// match: (MOVQstore [off1] {sym} (ADDQconst [off2] ptr) val mem)
-	// cond: is32Bit(off1+off2)
+	// cond: is32Bit(int64(off1)+int64(off2))
 	// result: (MOVQstore [off1+off2] {sym} ptr val mem)
 	for {
-		off1 := v.AuxInt
-		sym := v.Aux
+		off1 := auxIntToInt32(v.AuxInt)
+		sym := auxToSym(v.Aux)
 		if v_0.Op != OpAMD64ADDQconst {
 			break
 		}
-		off2 := v_0.AuxInt
+		off2 := auxIntToInt32(v_0.AuxInt)
 		ptr := v_0.Args[0]
 		val := v_1
 		mem := v_2
-		if !(is32Bit(off1 + off2)) {
+		if !(is32Bit(int64(off1) + int64(off2))) {
 			break
 		}
 		v.reset(OpAMD64MOVQstore)
-		v.AuxInt = off1 + off2
-		v.Aux = sym
+		v.AuxInt = int32ToAuxInt(off1 + off2)
+		v.Aux = symToAux(sym)
 		v.AddArg3(ptr, val, mem)
 		return true
 	}
 	// match: (MOVQstore [off] {sym} ptr (MOVQconst [c]) mem)
-	// cond: validValAndOff(c,off)
-	// result: (MOVQstoreconst [makeValAndOff(c,off)] {sym} ptr mem)
+	// cond: validVal(c)
+	// result: (MOVQstoreconst [makeValAndOff32(int32(c),off)] {sym} ptr mem)
 	for {
-		off := v.AuxInt
-		sym := v.Aux
+		off := auxIntToInt32(v.AuxInt)
+		sym := auxToSym(v.Aux)
 		ptr := v_0
 		if v_1.Op != OpAMD64MOVQconst {
 			break
 		}
-		c := v_1.AuxInt
+		c := auxIntToInt64(v_1.AuxInt)
 		mem := v_2
-		if !(validValAndOff(c, off)) {
+		if !(validVal(c)) {
 			break
 		}
 		v.reset(OpAMD64MOVQstoreconst)
-		v.AuxInt = makeValAndOff(c, off)
-		v.Aux = sym
+		v.AuxInt = valAndOffToAuxInt(makeValAndOff32(int32(c), off))
+		v.Aux = symToAux(sym)
 		v.AddArg2(ptr, mem)
 		return true
 	}
@@ -14229,23 +14198,23 @@
 	b := v.Block
 	config := b.Func.Config
 	// match: (MOVQstoreconst [sc] {s} (ADDQconst [off] ptr) mem)
-	// cond: ValAndOff(sc).canAdd(off)
-	// result: (MOVQstoreconst [ValAndOff(sc).add(off)] {s} ptr mem)
+	// cond: ValAndOff(sc).canAdd32(off)
+	// result: (MOVQstoreconst [ValAndOff(sc).addOffset32(off)] {s} ptr mem)
 	for {
-		sc := v.AuxInt
-		s := v.Aux
+		sc := auxIntToValAndOff(v.AuxInt)
+		s := auxToSym(v.Aux)
 		if v_0.Op != OpAMD64ADDQconst {
 			break
 		}
-		off := v_0.AuxInt
+		off := auxIntToInt32(v_0.AuxInt)
 		ptr := v_0.Args[0]
 		mem := v_1
-		if !(ValAndOff(sc).canAdd(off)) {
+		if !(ValAndOff(sc).canAdd32(off)) {
 			break
 		}
 		v.reset(OpAMD64MOVQstoreconst)
-		v.AuxInt = ValAndOff(sc).add(off)
-		v.Aux = s
+		v.AuxInt = valAndOffToAuxInt(ValAndOff(sc).addOffset32(off))
+		v.Aux = symToAux(s)
 		v.AddArg2(ptr, mem)
 		return true
 	}
@@ -14347,23 +14316,23 @@
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
 	// match: (MOVSDload [off1] {sym} (ADDQconst [off2] ptr) mem)
-	// cond: is32Bit(off1+off2)
+	// cond: is32Bit(int64(off1)+int64(off2))
 	// result: (MOVSDload [off1+off2] {sym} ptr mem)
 	for {
-		off1 := v.AuxInt
-		sym := v.Aux
+		off1 := auxIntToInt32(v.AuxInt)
+		sym := auxToSym(v.Aux)
 		if v_0.Op != OpAMD64ADDQconst {
 			break
 		}
-		off2 := v_0.AuxInt
+		off2 := auxIntToInt32(v_0.AuxInt)
 		ptr := v_0.Args[0]
 		mem := v_1
-		if !(is32Bit(off1 + off2)) {
+		if !(is32Bit(int64(off1) + int64(off2))) {
 			break
 		}
 		v.reset(OpAMD64MOVSDload)
-		v.AuxInt = off1 + off2
-		v.Aux = sym
+		v.AuxInt = int32ToAuxInt(off1 + off2)
+		v.Aux = symToAux(sym)
 		v.AddArg2(ptr, mem)
 		return true
 	}
@@ -14413,24 +14382,24 @@
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
 	// match: (MOVSDstore [off1] {sym} (ADDQconst [off2] ptr) val mem)
-	// cond: is32Bit(off1+off2)
+	// cond: is32Bit(int64(off1)+int64(off2))
 	// result: (MOVSDstore [off1+off2] {sym} ptr val mem)
 	for {
-		off1 := v.AuxInt
-		sym := v.Aux
+		off1 := auxIntToInt32(v.AuxInt)
+		sym := auxToSym(v.Aux)
 		if v_0.Op != OpAMD64ADDQconst {
 			break
 		}
-		off2 := v_0.AuxInt
+		off2 := auxIntToInt32(v_0.AuxInt)
 		ptr := v_0.Args[0]
 		val := v_1
 		mem := v_2
-		if !(is32Bit(off1 + off2)) {
+		if !(is32Bit(int64(off1) + int64(off2))) {
 			break
 		}
 		v.reset(OpAMD64MOVSDstore)
-		v.AuxInt = off1 + off2
-		v.Aux = sym
+		v.AuxInt = int32ToAuxInt(off1 + off2)
+		v.Aux = symToAux(sym)
 		v.AddArg3(ptr, val, mem)
 		return true
 	}
@@ -14480,23 +14449,23 @@
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
 	// match: (MOVSSload [off1] {sym} (ADDQconst [off2] ptr) mem)
-	// cond: is32Bit(off1+off2)
+	// cond: is32Bit(int64(off1)+int64(off2))
 	// result: (MOVSSload [off1+off2] {sym} ptr mem)
 	for {
-		off1 := v.AuxInt
-		sym := v.Aux
+		off1 := auxIntToInt32(v.AuxInt)
+		sym := auxToSym(v.Aux)
 		if v_0.Op != OpAMD64ADDQconst {
 			break
 		}
-		off2 := v_0.AuxInt
+		off2 := auxIntToInt32(v_0.AuxInt)
 		ptr := v_0.Args[0]
 		mem := v_1
-		if !(is32Bit(off1 + off2)) {
+		if !(is32Bit(int64(off1) + int64(off2))) {
 			break
 		}
 		v.reset(OpAMD64MOVSSload)
-		v.AuxInt = off1 + off2
-		v.Aux = sym
+		v.AuxInt = int32ToAuxInt(off1 + off2)
+		v.Aux = symToAux(sym)
 		v.AddArg2(ptr, mem)
 		return true
 	}
@@ -14546,24 +14515,24 @@
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
 	// match: (MOVSSstore [off1] {sym} (ADDQconst [off2] ptr) val mem)
-	// cond: is32Bit(off1+off2)
+	// cond: is32Bit(int64(off1)+int64(off2))
 	// result: (MOVSSstore [off1+off2] {sym} ptr val mem)
 	for {
-		off1 := v.AuxInt
-		sym := v.Aux
+		off1 := auxIntToInt32(v.AuxInt)
+		sym := auxToSym(v.Aux)
 		if v_0.Op != OpAMD64ADDQconst {
 			break
 		}
-		off2 := v_0.AuxInt
+		off2 := auxIntToInt32(v_0.AuxInt)
 		ptr := v_0.Args[0]
 		val := v_1
 		mem := v_2
-		if !(is32Bit(off1 + off2)) {
+		if !(is32Bit(int64(off1) + int64(off2))) {
 			break
 		}
 		v.reset(OpAMD64MOVSSstore)
-		v.AuxInt = off1 + off2
-		v.Aux = sym
+		v.AuxInt = int32ToAuxInt(off1 + off2)
+		v.Aux = symToAux(sym)
 		v.AddArg3(ptr, val, mem)
 		return true
 	}
@@ -14620,8 +14589,8 @@
 		if x.Op != OpAMD64MOVWload {
 			break
 		}
-		off := x.AuxInt
-		sym := x.Aux
+		off := auxIntToInt32(x.AuxInt)
+		sym := auxToSym(x.Aux)
 		mem := x.Args[1]
 		ptr := x.Args[0]
 		if !(x.Uses == 1 && clobber(x)) {
@@ -14630,8 +14599,8 @@
 		b = x.Block
 		v0 := b.NewValue0(x.Pos, OpAMD64MOVWQSXload, v.Type)
 		v.copyOf(v0)
-		v0.AuxInt = off
-		v0.Aux = sym
+		v0.AuxInt = int32ToAuxInt(off)
+		v0.Aux = symToAux(sym)
 		v0.AddArg2(ptr, mem)
 		return true
 	}
@@ -14643,8 +14612,8 @@
 		if x.Op != OpAMD64MOVLload {
 			break
 		}
-		off := x.AuxInt
-		sym := x.Aux
+		off := auxIntToInt32(x.AuxInt)
+		sym := auxToSym(x.Aux)
 		mem := x.Args[1]
 		ptr := x.Args[0]
 		if !(x.Uses == 1 && clobber(x)) {
@@ -14653,8 +14622,8 @@
 		b = x.Block
 		v0 := b.NewValue0(x.Pos, OpAMD64MOVWQSXload, v.Type)
 		v.copyOf(v0)
-		v0.AuxInt = off
-		v0.Aux = sym
+		v0.AuxInt = int32ToAuxInt(off)
+		v0.Aux = symToAux(sym)
 		v0.AddArg2(ptr, mem)
 		return true
 	}
@@ -14666,8 +14635,8 @@
 		if x.Op != OpAMD64MOVQload {
 			break
 		}
-		off := x.AuxInt
-		sym := x.Aux
+		off := auxIntToInt32(x.AuxInt)
+		sym := auxToSym(x.Aux)
 		mem := x.Args[1]
 		ptr := x.Args[0]
 		if !(x.Uses == 1 && clobber(x)) {
@@ -14676,8 +14645,8 @@
 		b = x.Block
 		v0 := b.NewValue0(x.Pos, OpAMD64MOVWQSXload, v.Type)
 		v.copyOf(v0)
-		v0.AuxInt = off
-		v0.Aux = sym
+		v0.AuxInt = int32ToAuxInt(off)
+		v0.Aux = symToAux(sym)
 		v0.AddArg2(ptr, mem)
 		return true
 	}
@@ -14688,13 +14657,13 @@
 		if v_0.Op != OpAMD64ANDLconst {
 			break
 		}
-		c := v_0.AuxInt
+		c := auxIntToInt32(v_0.AuxInt)
 		x := v_0.Args[0]
 		if !(c&0x8000 == 0) {
 			break
 		}
 		v.reset(OpAMD64ANDLconst)
-		v.AuxInt = c & 0x7fff
+		v.AuxInt = int32ToAuxInt(c & 0x7fff)
 		v.AddArg(x)
 		return true
 	}
@@ -14729,14 +14698,14 @@
 	// cond: sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)
 	// result: (MOVWQSX x)
 	for {
-		off := v.AuxInt
-		sym := v.Aux
+		off := auxIntToInt32(v.AuxInt)
+		sym := auxToSym(v.Aux)
 		ptr := v_0
 		if v_1.Op != OpAMD64MOVWstore {
 			break
 		}
-		off2 := v_1.AuxInt
-		sym2 := v_1.Aux
+		off2 := auxIntToInt32(v_1.AuxInt)
+		sym2 := auxToSym(v_1.Aux)
 		x := v_1.Args[1]
 		ptr2 := v_1.Args[0]
 		if !(sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)) {
@@ -14781,8 +14750,8 @@
 		if x.Op != OpAMD64MOVWload {
 			break
 		}
-		off := x.AuxInt
-		sym := x.Aux
+		off := auxIntToInt32(x.AuxInt)
+		sym := auxToSym(x.Aux)
 		mem := x.Args[1]
 		ptr := x.Args[0]
 		if !(x.Uses == 1 && clobber(x)) {
@@ -14791,8 +14760,8 @@
 		b = x.Block
 		v0 := b.NewValue0(x.Pos, OpAMD64MOVWload, v.Type)
 		v.copyOf(v0)
-		v0.AuxInt = off
-		v0.Aux = sym
+		v0.AuxInt = int32ToAuxInt(off)
+		v0.Aux = symToAux(sym)
 		v0.AddArg2(ptr, mem)
 		return true
 	}
@@ -14804,8 +14773,8 @@
 		if x.Op != OpAMD64MOVLload {
 			break
 		}
-		off := x.AuxInt
-		sym := x.Aux
+		off := auxIntToInt32(x.AuxInt)
+		sym := auxToSym(x.Aux)
 		mem := x.Args[1]
 		ptr := x.Args[0]
 		if !(x.Uses == 1 && clobber(x)) {
@@ -14814,8 +14783,8 @@
 		b = x.Block
 		v0 := b.NewValue0(x.Pos, OpAMD64MOVWload, v.Type)
 		v.copyOf(v0)
-		v0.AuxInt = off
-		v0.Aux = sym
+		v0.AuxInt = int32ToAuxInt(off)
+		v0.Aux = symToAux(sym)
 		v0.AddArg2(ptr, mem)
 		return true
 	}
@@ -14827,8 +14796,8 @@
 		if x.Op != OpAMD64MOVQload {
 			break
 		}
-		off := x.AuxInt
-		sym := x.Aux
+		off := auxIntToInt32(x.AuxInt)
+		sym := auxToSym(x.Aux)
 		mem := x.Args[1]
 		ptr := x.Args[0]
 		if !(x.Uses == 1 && clobber(x)) {
@@ -14837,8 +14806,8 @@
 		b = x.Block
 		v0 := b.NewValue0(x.Pos, OpAMD64MOVWload, v.Type)
 		v.copyOf(v0)
-		v0.AuxInt = off
-		v0.Aux = sym
+		v0.AuxInt = int32ToAuxInt(off)
+		v0.Aux = symToAux(sym)
 		v0.AddArg2(ptr, mem)
 		return true
 	}
@@ -14859,10 +14828,10 @@
 		if v_0.Op != OpAMD64ANDLconst {
 			break
 		}
-		c := v_0.AuxInt
+		c := auxIntToInt32(v_0.AuxInt)
 		x := v_0.Args[0]
 		v.reset(OpAMD64ANDLconst)
-		v.AuxInt = c & 0xffff
+		v.AuxInt = int32ToAuxInt(c & 0xffff)
 		v.AddArg(x)
 		return true
 	}
@@ -14899,14 +14868,14 @@
 	// cond: sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)
 	// result: (MOVWQZX x)
 	for {
-		off := v.AuxInt
-		sym := v.Aux
+		off := auxIntToInt32(v.AuxInt)
+		sym := auxToSym(v.Aux)
 		ptr := v_0
 		if v_1.Op != OpAMD64MOVWstore {
 			break
 		}
-		off2 := v_1.AuxInt
-		sym2 := v_1.Aux
+		off2 := auxIntToInt32(v_1.AuxInt)
+		sym2 := auxToSym(v_1.Aux)
 		x := v_1.Args[1]
 		ptr2 := v_1.Args[0]
 		if !(sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)) {
@@ -14917,23 +14886,23 @@
 		return true
 	}
 	// match: (MOVWload [off1] {sym} (ADDQconst [off2] ptr) mem)
-	// cond: is32Bit(off1+off2)
+	// cond: is32Bit(int64(off1)+int64(off2))
 	// result: (MOVWload [off1+off2] {sym} ptr mem)
 	for {
-		off1 := v.AuxInt
-		sym := v.Aux
+		off1 := auxIntToInt32(v.AuxInt)
+		sym := auxToSym(v.Aux)
 		if v_0.Op != OpAMD64ADDQconst {
 			break
 		}
-		off2 := v_0.AuxInt
+		off2 := auxIntToInt32(v_0.AuxInt)
 		ptr := v_0.Args[0]
 		mem := v_1
-		if !(is32Bit(off1 + off2)) {
+		if !(is32Bit(int64(off1) + int64(off2))) {
 			break
 		}
 		v.reset(OpAMD64MOVWload)
-		v.AuxInt = off1 + off2
-		v.Aux = sym
+		v.AuxInt = int32ToAuxInt(off1 + off2)
+		v.Aux = symToAux(sym)
 		v.AddArg2(ptr, mem)
 		return true
 	}
@@ -15026,8 +14995,8 @@
 	// match: (MOVWstore [off] {sym} ptr (MOVWQSX x) mem)
 	// result: (MOVWstore [off] {sym} ptr x mem)
 	for {
-		off := v.AuxInt
-		sym := v.Aux
+		off := auxIntToInt32(v.AuxInt)
+		sym := auxToSym(v.Aux)
 		ptr := v_0
 		if v_1.Op != OpAMD64MOVWQSX {
 			break
@@ -15035,16 +15004,16 @@
 		x := v_1.Args[0]
 		mem := v_2
 		v.reset(OpAMD64MOVWstore)
-		v.AuxInt = off
-		v.Aux = sym
+		v.AuxInt = int32ToAuxInt(off)
+		v.Aux = symToAux(sym)
 		v.AddArg3(ptr, x, mem)
 		return true
 	}
 	// match: (MOVWstore [off] {sym} ptr (MOVWQZX x) mem)
 	// result: (MOVWstore [off] {sym} ptr x mem)
 	for {
-		off := v.AuxInt
-		sym := v.Aux
+		off := auxIntToInt32(v.AuxInt)
+		sym := auxToSym(v.Aux)
 		ptr := v_0
 		if v_1.Op != OpAMD64MOVWQZX {
 			break
@@ -15052,72 +15021,64 @@
 		x := v_1.Args[0]
 		mem := v_2
 		v.reset(OpAMD64MOVWstore)
-		v.AuxInt = off
-		v.Aux = sym
+		v.AuxInt = int32ToAuxInt(off)
+		v.Aux = symToAux(sym)
 		v.AddArg3(ptr, x, mem)
 		return true
 	}
 	// match: (MOVWstore [off1] {sym} (ADDQconst [off2] ptr) val mem)
-	// cond: is32Bit(off1+off2)
+	// cond: is32Bit(int64(off1)+int64(off2))
 	// result: (MOVWstore [off1+off2] {sym} ptr val mem)
 	for {
-		off1 := v.AuxInt
-		sym := v.Aux
+		off1 := auxIntToInt32(v.AuxInt)
+		sym := auxToSym(v.Aux)
 		if v_0.Op != OpAMD64ADDQconst {
 			break
 		}
-		off2 := v_0.AuxInt
+		off2 := auxIntToInt32(v_0.AuxInt)
 		ptr := v_0.Args[0]
 		val := v_1
 		mem := v_2
-		if !(is32Bit(off1 + off2)) {
+		if !(is32Bit(int64(off1) + int64(off2))) {
 			break
 		}
 		v.reset(OpAMD64MOVWstore)
-		v.AuxInt = off1 + off2
-		v.Aux = sym
+		v.AuxInt = int32ToAuxInt(off1 + off2)
+		v.Aux = symToAux(sym)
 		v.AddArg3(ptr, val, mem)
 		return true
 	}
 	// match: (MOVWstore [off] {sym} ptr (MOVLconst [c]) mem)
-	// cond: validOff(off)
-	// result: (MOVWstoreconst [makeValAndOff(int64(int16(c)),off)] {sym} ptr mem)
+	// result: (MOVWstoreconst [makeValAndOff32(int32(int16(c)),off)] {sym} ptr mem)
 	for {
-		off := v.AuxInt
-		sym := v.Aux
+		off := auxIntToInt32(v.AuxInt)
+		sym := auxToSym(v.Aux)
 		ptr := v_0
 		if v_1.Op != OpAMD64MOVLconst {
 			break
 		}
-		c := v_1.AuxInt
+		c := auxIntToInt32(v_1.AuxInt)
 		mem := v_2
-		if !(validOff(off)) {
-			break
-		}
 		v.reset(OpAMD64MOVWstoreconst)
-		v.AuxInt = makeValAndOff(int64(int16(c)), off)
-		v.Aux = sym
+		v.AuxInt = valAndOffToAuxInt(makeValAndOff32(int32(int16(c)), off))
+		v.Aux = symToAux(sym)
 		v.AddArg2(ptr, mem)
 		return true
 	}
 	// match: (MOVWstore [off] {sym} ptr (MOVQconst [c]) mem)
-	// cond: validOff(off)
-	// result: (MOVWstoreconst [makeValAndOff(int64(int16(c)),off)] {sym} ptr mem)
+	// result: (MOVWstoreconst [makeValAndOff32(int32(int16(c)),off)] {sym} ptr mem)
 	for {
-		off := v.AuxInt
-		sym := v.Aux
+		off := auxIntToInt32(v.AuxInt)
+		sym := auxToSym(v.Aux)
 		ptr := v_0
 		if v_1.Op != OpAMD64MOVQconst {
 			break
 		}
-		c := v_1.AuxInt
+		c := auxIntToInt64(v_1.AuxInt)
 		mem := v_2
-		if !(validOff(off)) {
-			break
-		}
 		v.reset(OpAMD64MOVWstoreconst)
-		v.AuxInt = makeValAndOff(int64(int16(c)), off)
-		v.Aux = sym
+		v.AuxInt = valAndOffToAuxInt(makeValAndOff32(int32(int16(c)), off))
+		v.Aux = symToAux(sym)
 		v.AddArg2(ptr, mem)
 		return true
 	}
@@ -15454,23 +15415,23 @@
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
 	// match: (MOVWstoreconst [sc] {s} (ADDQconst [off] ptr) mem)
-	// cond: ValAndOff(sc).canAdd(off)
-	// result: (MOVWstoreconst [ValAndOff(sc).add(off)] {s} ptr mem)
+	// cond: ValAndOff(sc).canAdd32(off)
+	// result: (MOVWstoreconst [ValAndOff(sc).addOffset32(off)] {s} ptr mem)
 	for {
-		sc := v.AuxInt
-		s := v.Aux
+		sc := auxIntToValAndOff(v.AuxInt)
+		s := auxToSym(v.Aux)
 		if v_0.Op != OpAMD64ADDQconst {
 			break
 		}
-		off := v_0.AuxInt
+		off := auxIntToInt32(v_0.AuxInt)
 		ptr := v_0.Args[0]
 		mem := v_1
-		if !(ValAndOff(sc).canAdd(off)) {
+		if !(ValAndOff(sc).canAdd32(off)) {
 			break
 		}
 		v.reset(OpAMD64MOVWstoreconst)
-		v.AuxInt = ValAndOff(sc).add(off)
-		v.Aux = s
+		v.AuxInt = valAndOffToAuxInt(ValAndOff(sc).addOffset32(off))
+		v.Aux = symToAux(s)
 		v.AddArg2(ptr, mem)
 		return true
 	}
@@ -15602,9 +15563,9 @@
 			if v_1.Op != OpAMD64MOVLconst {
 				continue
 			}
-			c := v_1.AuxInt
+			c := auxIntToInt32(v_1.AuxInt)
 			v.reset(OpAMD64MULLconst)
-			v.AuxInt = c
+			v.AuxInt = int32ToAuxInt(c)
 			v.AddArg(x)
 			return true
 		}
@@ -15616,23 +15577,23 @@
 	v_0 := v.Args[0]
 	b := v.Block
 	// match: (MULLconst [c] (MULLconst [d] x))
-	// result: (MULLconst [int64(int32(c * d))] x)
+	// result: (MULLconst [c * d] x)
 	for {
-		c := v.AuxInt
+		c := auxIntToInt32(v.AuxInt)
 		if v_0.Op != OpAMD64MULLconst {
 			break
 		}
-		d := v_0.AuxInt
+		d := auxIntToInt32(v_0.AuxInt)
 		x := v_0.Args[0]
 		v.reset(OpAMD64MULLconst)
-		v.AuxInt = int64(int32(c * d))
+		v.AuxInt = int32ToAuxInt(c * d)
 		v.AddArg(x)
 		return true
 	}
 	// match: (MULLconst [-9] x)
 	// result: (NEGL (LEAL8 <v.Type> x x))
 	for {
-		if v.AuxInt != -9 {
+		if auxIntToInt32(v.AuxInt) != -9 {
 			break
 		}
 		x := v_0
@@ -15645,7 +15606,7 @@
 	// match: (MULLconst [-5] x)
 	// result: (NEGL (LEAL4 <v.Type> x x))
 	for {
-		if v.AuxInt != -5 {
+		if auxIntToInt32(v.AuxInt) != -5 {
 			break
 		}
 		x := v_0
@@ -15658,7 +15619,7 @@
 	// match: (MULLconst [-3] x)
 	// result: (NEGL (LEAL2 <v.Type> x x))
 	for {
-		if v.AuxInt != -3 {
+		if auxIntToInt32(v.AuxInt) != -3 {
 			break
 		}
 		x := v_0
@@ -15671,7 +15632,7 @@
 	// match: (MULLconst [-1] x)
 	// result: (NEGL x)
 	for {
-		if v.AuxInt != -1 {
+		if auxIntToInt32(v.AuxInt) != -1 {
 			break
 		}
 		x := v_0
@@ -15682,17 +15643,17 @@
 	// match: (MULLconst [ 0] _)
 	// result: (MOVLconst [0])
 	for {
-		if v.AuxInt != 0 {
+		if auxIntToInt32(v.AuxInt) != 0 {
 			break
 		}
 		v.reset(OpAMD64MOVLconst)
-		v.AuxInt = 0
+		v.AuxInt = int32ToAuxInt(0)
 		return true
 	}
 	// match: (MULLconst [ 1] x)
 	// result: x
 	for {
-		if v.AuxInt != 1 {
+		if auxIntToInt32(v.AuxInt) != 1 {
 			break
 		}
 		x := v_0
@@ -15702,7 +15663,7 @@
 	// match: (MULLconst [ 3] x)
 	// result: (LEAL2 x x)
 	for {
-		if v.AuxInt != 3 {
+		if auxIntToInt32(v.AuxInt) != 3 {
 			break
 		}
 		x := v_0
@@ -15713,7 +15674,7 @@
 	// match: (MULLconst [ 5] x)
 	// result: (LEAL4 x x)
 	for {
-		if v.AuxInt != 5 {
+		if auxIntToInt32(v.AuxInt) != 5 {
 			break
 		}
 		x := v_0
@@ -15724,7 +15685,7 @@
 	// match: (MULLconst [ 7] x)
 	// result: (LEAL2 x (LEAL2 <v.Type> x x))
 	for {
-		if v.AuxInt != 7 {
+		if auxIntToInt32(v.AuxInt) != 7 {
 			break
 		}
 		x := v_0
@@ -15737,7 +15698,7 @@
 	// match: (MULLconst [ 9] x)
 	// result: (LEAL8 x x)
 	for {
-		if v.AuxInt != 9 {
+		if auxIntToInt32(v.AuxInt) != 9 {
 			break
 		}
 		x := v_0
@@ -15748,7 +15709,7 @@
 	// match: (MULLconst [11] x)
 	// result: (LEAL2 x (LEAL4 <v.Type> x x))
 	for {
-		if v.AuxInt != 11 {
+		if auxIntToInt32(v.AuxInt) != 11 {
 			break
 		}
 		x := v_0
@@ -15761,7 +15722,7 @@
 	// match: (MULLconst [13] x)
 	// result: (LEAL4 x (LEAL2 <v.Type> x x))
 	for {
-		if v.AuxInt != 13 {
+		if auxIntToInt32(v.AuxInt) != 13 {
 			break
 		}
 		x := v_0
@@ -15774,7 +15735,7 @@
 	// match: (MULLconst [19] x)
 	// result: (LEAL2 x (LEAL8 <v.Type> x x))
 	for {
-		if v.AuxInt != 19 {
+		if auxIntToInt32(v.AuxInt) != 19 {
 			break
 		}
 		x := v_0
@@ -15787,7 +15748,7 @@
 	// match: (MULLconst [21] x)
 	// result: (LEAL4 x (LEAL4 <v.Type> x x))
 	for {
-		if v.AuxInt != 21 {
+		if auxIntToInt32(v.AuxInt) != 21 {
 			break
 		}
 		x := v_0
@@ -15800,7 +15761,7 @@
 	// match: (MULLconst [25] x)
 	// result: (LEAL8 x (LEAL2 <v.Type> x x))
 	for {
-		if v.AuxInt != 25 {
+		if auxIntToInt32(v.AuxInt) != 25 {
 			break
 		}
 		x := v_0
@@ -15813,7 +15774,7 @@
 	// match: (MULLconst [27] x)
 	// result: (LEAL8 (LEAL2 <v.Type> x x) (LEAL2 <v.Type> x x))
 	for {
-		if v.AuxInt != 27 {
+		if auxIntToInt32(v.AuxInt) != 27 {
 			break
 		}
 		x := v_0
@@ -15826,7 +15787,7 @@
 	// match: (MULLconst [37] x)
 	// result: (LEAL4 x (LEAL8 <v.Type> x x))
 	for {
-		if v.AuxInt != 37 {
+		if auxIntToInt32(v.AuxInt) != 37 {
 			break
 		}
 		x := v_0
@@ -15839,7 +15800,7 @@
 	// match: (MULLconst [41] x)
 	// result: (LEAL8 x (LEAL4 <v.Type> x x))
 	for {
-		if v.AuxInt != 41 {
+		if auxIntToInt32(v.AuxInt) != 41 {
 			break
 		}
 		x := v_0
@@ -15852,7 +15813,7 @@
 	// match: (MULLconst [45] x)
 	// result: (LEAL8 (LEAL4 <v.Type> x x) (LEAL4 <v.Type> x x))
 	for {
-		if v.AuxInt != 45 {
+		if auxIntToInt32(v.AuxInt) != 45 {
 			break
 		}
 		x := v_0
@@ -15865,7 +15826,7 @@
 	// match: (MULLconst [73] x)
 	// result: (LEAL8 x (LEAL8 <v.Type> x x))
 	for {
-		if v.AuxInt != 73 {
+		if auxIntToInt32(v.AuxInt) != 73 {
 			break
 		}
 		x := v_0
@@ -15878,7 +15839,7 @@
 	// match: (MULLconst [81] x)
 	// result: (LEAL8 (LEAL8 <v.Type> x x) (LEAL8 <v.Type> x x))
 	for {
-		if v.AuxInt != 81 {
+		if auxIntToInt32(v.AuxInt) != 81 {
 			break
 		}
 		x := v_0
@@ -15889,128 +15850,128 @@
 		return true
 	}
 	// match: (MULLconst [c] x)
-	// cond: isPowerOfTwo(c+1) && c >= 15
-	// result: (SUBL (SHLLconst <v.Type> [log2(c+1)] x) x)
+	// cond: isPowerOfTwo(int64(c)+1) && c >= 15
+	// result: (SUBL (SHLLconst <v.Type> [int8(log2(int64(c)+1))] x) x)
 	for {
-		c := v.AuxInt
+		c := auxIntToInt32(v.AuxInt)
 		x := v_0
-		if !(isPowerOfTwo(c+1) && c >= 15) {
+		if !(isPowerOfTwo(int64(c)+1) && c >= 15) {
 			break
 		}
 		v.reset(OpAMD64SUBL)
 		v0 := b.NewValue0(v.Pos, OpAMD64SHLLconst, v.Type)
-		v0.AuxInt = log2(c + 1)
+		v0.AuxInt = int8ToAuxInt(int8(log2(int64(c) + 1)))
 		v0.AddArg(x)
 		v.AddArg2(v0, x)
 		return true
 	}
 	// match: (MULLconst [c] x)
-	// cond: isPowerOfTwo(c-1) && c >= 17
-	// result: (LEAL1 (SHLLconst <v.Type> [log2(c-1)] x) x)
+	// cond: isPowerOfTwo32(c-1) && c >= 17
+	// result: (LEAL1 (SHLLconst <v.Type> [int8(log32(c-1))] x) x)
 	for {
-		c := v.AuxInt
+		c := auxIntToInt32(v.AuxInt)
 		x := v_0
-		if !(isPowerOfTwo(c-1) && c >= 17) {
+		if !(isPowerOfTwo32(c-1) && c >= 17) {
 			break
 		}
 		v.reset(OpAMD64LEAL1)
 		v0 := b.NewValue0(v.Pos, OpAMD64SHLLconst, v.Type)
-		v0.AuxInt = log2(c - 1)
+		v0.AuxInt = int8ToAuxInt(int8(log32(c - 1)))
 		v0.AddArg(x)
 		v.AddArg2(v0, x)
 		return true
 	}
 	// match: (MULLconst [c] x)
-	// cond: isPowerOfTwo(c-2) && c >= 34
-	// result: (LEAL2 (SHLLconst <v.Type> [log2(c-2)] x) x)
+	// cond: isPowerOfTwo32(c-2) && c >= 34
+	// result: (LEAL2 (SHLLconst <v.Type> [int8(log32(c-2))] x) x)
 	for {
-		c := v.AuxInt
+		c := auxIntToInt32(v.AuxInt)
 		x := v_0
-		if !(isPowerOfTwo(c-2) && c >= 34) {
+		if !(isPowerOfTwo32(c-2) && c >= 34) {
 			break
 		}
 		v.reset(OpAMD64LEAL2)
 		v0 := b.NewValue0(v.Pos, OpAMD64SHLLconst, v.Type)
-		v0.AuxInt = log2(c - 2)
+		v0.AuxInt = int8ToAuxInt(int8(log32(c - 2)))
 		v0.AddArg(x)
 		v.AddArg2(v0, x)
 		return true
 	}
 	// match: (MULLconst [c] x)
-	// cond: isPowerOfTwo(c-4) && c >= 68
-	// result: (LEAL4 (SHLLconst <v.Type> [log2(c-4)] x) x)
+	// cond: isPowerOfTwo32(c-4) && c >= 68
+	// result: (LEAL4 (SHLLconst <v.Type> [int8(log32(c-4))] x) x)
 	for {
-		c := v.AuxInt
+		c := auxIntToInt32(v.AuxInt)
 		x := v_0
-		if !(isPowerOfTwo(c-4) && c >= 68) {
+		if !(isPowerOfTwo32(c-4) && c >= 68) {
 			break
 		}
 		v.reset(OpAMD64LEAL4)
 		v0 := b.NewValue0(v.Pos, OpAMD64SHLLconst, v.Type)
-		v0.AuxInt = log2(c - 4)
+		v0.AuxInt = int8ToAuxInt(int8(log32(c - 4)))
 		v0.AddArg(x)
 		v.AddArg2(v0, x)
 		return true
 	}
 	// match: (MULLconst [c] x)
-	// cond: isPowerOfTwo(c-8) && c >= 136
-	// result: (LEAL8 (SHLLconst <v.Type> [log2(c-8)] x) x)
+	// cond: isPowerOfTwo32(c-8) && c >= 136
+	// result: (LEAL8 (SHLLconst <v.Type> [int8(log32(c-8))] x) x)
 	for {
-		c := v.AuxInt
+		c := auxIntToInt32(v.AuxInt)
 		x := v_0
-		if !(isPowerOfTwo(c-8) && c >= 136) {
+		if !(isPowerOfTwo32(c-8) && c >= 136) {
 			break
 		}
 		v.reset(OpAMD64LEAL8)
 		v0 := b.NewValue0(v.Pos, OpAMD64SHLLconst, v.Type)
-		v0.AuxInt = log2(c - 8)
+		v0.AuxInt = int8ToAuxInt(int8(log32(c - 8)))
 		v0.AddArg(x)
 		v.AddArg2(v0, x)
 		return true
 	}
 	// match: (MULLconst [c] x)
-	// cond: c%3 == 0 && isPowerOfTwo(c/3)
-	// result: (SHLLconst [log2(c/3)] (LEAL2 <v.Type> x x))
+	// cond: c%3 == 0 && isPowerOfTwo32(c/3)
+	// result: (SHLLconst [int8(log32(c/3))] (LEAL2 <v.Type> x x))
 	for {
-		c := v.AuxInt
+		c := auxIntToInt32(v.AuxInt)
 		x := v_0
-		if !(c%3 == 0 && isPowerOfTwo(c/3)) {
+		if !(c%3 == 0 && isPowerOfTwo32(c/3)) {
 			break
 		}
 		v.reset(OpAMD64SHLLconst)
-		v.AuxInt = log2(c / 3)
+		v.AuxInt = int8ToAuxInt(int8(log32(c / 3)))
 		v0 := b.NewValue0(v.Pos, OpAMD64LEAL2, v.Type)
 		v0.AddArg2(x, x)
 		v.AddArg(v0)
 		return true
 	}
 	// match: (MULLconst [c] x)
-	// cond: c%5 == 0 && isPowerOfTwo(c/5)
-	// result: (SHLLconst [log2(c/5)] (LEAL4 <v.Type> x x))
+	// cond: c%5 == 0 && isPowerOfTwo32(c/5)
+	// result: (SHLLconst [int8(log32(c/5))] (LEAL4 <v.Type> x x))
 	for {
-		c := v.AuxInt
+		c := auxIntToInt32(v.AuxInt)
 		x := v_0
-		if !(c%5 == 0 && isPowerOfTwo(c/5)) {
+		if !(c%5 == 0 && isPowerOfTwo32(c/5)) {
 			break
 		}
 		v.reset(OpAMD64SHLLconst)
-		v.AuxInt = log2(c / 5)
+		v.AuxInt = int8ToAuxInt(int8(log32(c / 5)))
 		v0 := b.NewValue0(v.Pos, OpAMD64LEAL4, v.Type)
 		v0.AddArg2(x, x)
 		v.AddArg(v0)
 		return true
 	}
 	// match: (MULLconst [c] x)
-	// cond: c%9 == 0 && isPowerOfTwo(c/9)
-	// result: (SHLLconst [log2(c/9)] (LEAL8 <v.Type> x x))
+	// cond: c%9 == 0 && isPowerOfTwo32(c/9)
+	// result: (SHLLconst [int8(log32(c/9))] (LEAL8 <v.Type> x x))
 	for {
-		c := v.AuxInt
+		c := auxIntToInt32(v.AuxInt)
 		x := v_0
-		if !(c%9 == 0 && isPowerOfTwo(c/9)) {
+		if !(c%9 == 0 && isPowerOfTwo32(c/9)) {
 			break
 		}
 		v.reset(OpAMD64SHLLconst)
-		v.AuxInt = log2(c / 9)
+		v.AuxInt = int8ToAuxInt(int8(log32(c / 9)))
 		v0 := b.NewValue0(v.Pos, OpAMD64LEAL8, v.Type)
 		v0.AddArg2(x, x)
 		v.AddArg(v0)
@@ -16035,19 +15996,19 @@
 	v_0 := v.Args[0]
 	// match: (MULQ x (MOVQconst [c]))
 	// cond: is32Bit(c)
-	// result: (MULQconst [c] x)
+	// result: (MULQconst [int32(c)] x)
 	for {
 		for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
 			x := v_0
 			if v_1.Op != OpAMD64MOVQconst {
 				continue
 			}
-			c := v_1.AuxInt
+			c := auxIntToInt64(v_1.AuxInt)
 			if !(is32Bit(c)) {
 				continue
 			}
 			v.reset(OpAMD64MULQconst)
-			v.AuxInt = c
+			v.AuxInt = int32ToAuxInt(int32(c))
 			v.AddArg(x)
 			return true
 		}
@@ -16059,27 +16020,27 @@
 	v_0 := v.Args[0]
 	b := v.Block
 	// match: (MULQconst [c] (MULQconst [d] x))
-	// cond: is32Bit(c*d)
+	// cond: is32Bit(int64(c)*int64(d))
 	// result: (MULQconst [c * d] x)
 	for {
-		c := v.AuxInt
+		c := auxIntToInt32(v.AuxInt)
 		if v_0.Op != OpAMD64MULQconst {
 			break
 		}
-		d := v_0.AuxInt
+		d := auxIntToInt32(v_0.AuxInt)
 		x := v_0.Args[0]
-		if !(is32Bit(c * d)) {
+		if !(is32Bit(int64(c) * int64(d))) {
 			break
 		}
 		v.reset(OpAMD64MULQconst)
-		v.AuxInt = c * d
+		v.AuxInt = int32ToAuxInt(c * d)
 		v.AddArg(x)
 		return true
 	}
 	// match: (MULQconst [-9] x)
 	// result: (NEGQ (LEAQ8 <v.Type> x x))
 	for {
-		if v.AuxInt != -9 {
+		if auxIntToInt32(v.AuxInt) != -9 {
 			break
 		}
 		x := v_0
@@ -16092,7 +16053,7 @@
 	// match: (MULQconst [-5] x)
 	// result: (NEGQ (LEAQ4 <v.Type> x x))
 	for {
-		if v.AuxInt != -5 {
+		if auxIntToInt32(v.AuxInt) != -5 {
 			break
 		}
 		x := v_0
@@ -16105,7 +16066,7 @@
 	// match: (MULQconst [-3] x)
 	// result: (NEGQ (LEAQ2 <v.Type> x x))
 	for {
-		if v.AuxInt != -3 {
+		if auxIntToInt32(v.AuxInt) != -3 {
 			break
 		}
 		x := v_0
@@ -16118,7 +16079,7 @@
 	// match: (MULQconst [-1] x)
 	// result: (NEGQ x)
 	for {
-		if v.AuxInt != -1 {
+		if auxIntToInt32(v.AuxInt) != -1 {
 			break
 		}
 		x := v_0
@@ -16129,17 +16090,17 @@
 	// match: (MULQconst [ 0] _)
 	// result: (MOVQconst [0])
 	for {
-		if v.AuxInt != 0 {
+		if auxIntToInt32(v.AuxInt) != 0 {
 			break
 		}
 		v.reset(OpAMD64MOVQconst)
-		v.AuxInt = 0
+		v.AuxInt = int64ToAuxInt(0)
 		return true
 	}
 	// match: (MULQconst [ 1] x)
 	// result: x
 	for {
-		if v.AuxInt != 1 {
+		if auxIntToInt32(v.AuxInt) != 1 {
 			break
 		}
 		x := v_0
@@ -16149,7 +16110,7 @@
 	// match: (MULQconst [ 3] x)
 	// result: (LEAQ2 x x)
 	for {
-		if v.AuxInt != 3 {
+		if auxIntToInt32(v.AuxInt) != 3 {
 			break
 		}
 		x := v_0
@@ -16160,7 +16121,7 @@
 	// match: (MULQconst [ 5] x)
 	// result: (LEAQ4 x x)
 	for {
-		if v.AuxInt != 5 {
+		if auxIntToInt32(v.AuxInt) != 5 {
 			break
 		}
 		x := v_0
@@ -16171,7 +16132,7 @@
 	// match: (MULQconst [ 7] x)
 	// result: (LEAQ2 x (LEAQ2 <v.Type> x x))
 	for {
-		if v.AuxInt != 7 {
+		if auxIntToInt32(v.AuxInt) != 7 {
 			break
 		}
 		x := v_0
@@ -16184,7 +16145,7 @@
 	// match: (MULQconst [ 9] x)
 	// result: (LEAQ8 x x)
 	for {
-		if v.AuxInt != 9 {
+		if auxIntToInt32(v.AuxInt) != 9 {
 			break
 		}
 		x := v_0
@@ -16195,7 +16156,7 @@
 	// match: (MULQconst [11] x)
 	// result: (LEAQ2 x (LEAQ4 <v.Type> x x))
 	for {
-		if v.AuxInt != 11 {
+		if auxIntToInt32(v.AuxInt) != 11 {
 			break
 		}
 		x := v_0
@@ -16208,7 +16169,7 @@
 	// match: (MULQconst [13] x)
 	// result: (LEAQ4 x (LEAQ2 <v.Type> x x))
 	for {
-		if v.AuxInt != 13 {
+		if auxIntToInt32(v.AuxInt) != 13 {
 			break
 		}
 		x := v_0
@@ -16221,7 +16182,7 @@
 	// match: (MULQconst [19] x)
 	// result: (LEAQ2 x (LEAQ8 <v.Type> x x))
 	for {
-		if v.AuxInt != 19 {
+		if auxIntToInt32(v.AuxInt) != 19 {
 			break
 		}
 		x := v_0
@@ -16234,7 +16195,7 @@
 	// match: (MULQconst [21] x)
 	// result: (LEAQ4 x (LEAQ4 <v.Type> x x))
 	for {
-		if v.AuxInt != 21 {
+		if auxIntToInt32(v.AuxInt) != 21 {
 			break
 		}
 		x := v_0
@@ -16247,7 +16208,7 @@
 	// match: (MULQconst [25] x)
 	// result: (LEAQ8 x (LEAQ2 <v.Type> x x))
 	for {
-		if v.AuxInt != 25 {
+		if auxIntToInt32(v.AuxInt) != 25 {
 			break
 		}
 		x := v_0
@@ -16260,7 +16221,7 @@
 	// match: (MULQconst [27] x)
 	// result: (LEAQ8 (LEAQ2 <v.Type> x x) (LEAQ2 <v.Type> x x))
 	for {
-		if v.AuxInt != 27 {
+		if auxIntToInt32(v.AuxInt) != 27 {
 			break
 		}
 		x := v_0
@@ -16273,7 +16234,7 @@
 	// match: (MULQconst [37] x)
 	// result: (LEAQ4 x (LEAQ8 <v.Type> x x))
 	for {
-		if v.AuxInt != 37 {
+		if auxIntToInt32(v.AuxInt) != 37 {
 			break
 		}
 		x := v_0
@@ -16286,7 +16247,7 @@
 	// match: (MULQconst [41] x)
 	// result: (LEAQ8 x (LEAQ4 <v.Type> x x))
 	for {
-		if v.AuxInt != 41 {
+		if auxIntToInt32(v.AuxInt) != 41 {
 			break
 		}
 		x := v_0
@@ -16299,7 +16260,7 @@
 	// match: (MULQconst [45] x)
 	// result: (LEAQ8 (LEAQ4 <v.Type> x x) (LEAQ4 <v.Type> x x))
 	for {
-		if v.AuxInt != 45 {
+		if auxIntToInt32(v.AuxInt) != 45 {
 			break
 		}
 		x := v_0
@@ -16312,7 +16273,7 @@
 	// match: (MULQconst [73] x)
 	// result: (LEAQ8 x (LEAQ8 <v.Type> x x))
 	for {
-		if v.AuxInt != 73 {
+		if auxIntToInt32(v.AuxInt) != 73 {
 			break
 		}
 		x := v_0
@@ -16325,7 +16286,7 @@
 	// match: (MULQconst [81] x)
 	// result: (LEAQ8 (LEAQ8 <v.Type> x x) (LEAQ8 <v.Type> x x))
 	for {
-		if v.AuxInt != 81 {
+		if auxIntToInt32(v.AuxInt) != 81 {
 			break
 		}
 		x := v_0
@@ -16336,128 +16297,128 @@
 		return true
 	}
 	// match: (MULQconst [c] x)
-	// cond: isPowerOfTwo(c+1) && c >= 15
-	// result: (SUBQ (SHLQconst <v.Type> [log2(c+1)] x) x)
+	// cond: isPowerOfTwo(int64(c)+1) && c >= 15
+	// result: (SUBQ (SHLQconst <v.Type> [int8(log2(int64(c)+1))] x) x)
 	for {
-		c := v.AuxInt
+		c := auxIntToInt32(v.AuxInt)
 		x := v_0
-		if !(isPowerOfTwo(c+1) && c >= 15) {
+		if !(isPowerOfTwo(int64(c)+1) && c >= 15) {
 			break
 		}
 		v.reset(OpAMD64SUBQ)
 		v0 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
-		v0.AuxInt = log2(c + 1)
+		v0.AuxInt = int8ToAuxInt(int8(log2(int64(c) + 1)))
 		v0.AddArg(x)
 		v.AddArg2(v0, x)
 		return true
 	}
 	// match: (MULQconst [c] x)
-	// cond: isPowerOfTwo(c-1) && c >= 17
-	// result: (LEAQ1 (SHLQconst <v.Type> [log2(c-1)] x) x)
+	// cond: isPowerOfTwo32(c-1) && c >= 17
+	// result: (LEAQ1 (SHLQconst <v.Type> [int8(log32(c-1))] x) x)
 	for {
-		c := v.AuxInt
+		c := auxIntToInt32(v.AuxInt)
 		x := v_0
-		if !(isPowerOfTwo(c-1) && c >= 17) {
+		if !(isPowerOfTwo32(c-1) && c >= 17) {
 			break
 		}
 		v.reset(OpAMD64LEAQ1)
 		v0 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
-		v0.AuxInt = log2(c - 1)
+		v0.AuxInt = int8ToAuxInt(int8(log32(c - 1)))
 		v0.AddArg(x)
 		v.AddArg2(v0, x)
 		return true
 	}
 	// match: (MULQconst [c] x)
-	// cond: isPowerOfTwo(c-2) && c >= 34
-	// result: (LEAQ2 (SHLQconst <v.Type> [log2(c-2)] x) x)
+	// cond: isPowerOfTwo32(c-2) && c >= 34
+	// result: (LEAQ2 (SHLQconst <v.Type> [int8(log32(c-2))] x) x)
 	for {
-		c := v.AuxInt
+		c := auxIntToInt32(v.AuxInt)
 		x := v_0
-		if !(isPowerOfTwo(c-2) && c >= 34) {
+		if !(isPowerOfTwo32(c-2) && c >= 34) {
 			break
 		}
 		v.reset(OpAMD64LEAQ2)
 		v0 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
-		v0.AuxInt = log2(c - 2)
+		v0.AuxInt = int8ToAuxInt(int8(log32(c - 2)))
 		v0.AddArg(x)
 		v.AddArg2(v0, x)
 		return true
 	}
 	// match: (MULQconst [c] x)
-	// cond: isPowerOfTwo(c-4) && c >= 68
-	// result: (LEAQ4 (SHLQconst <v.Type> [log2(c-4)] x) x)
+	// cond: isPowerOfTwo32(c-4) && c >= 68
+	// result: (LEAQ4 (SHLQconst <v.Type> [int8(log32(c-4))] x) x)
 	for {
-		c := v.AuxInt
+		c := auxIntToInt32(v.AuxInt)
 		x := v_0
-		if !(isPowerOfTwo(c-4) && c >= 68) {
+		if !(isPowerOfTwo32(c-4) && c >= 68) {
 			break
 		}
 		v.reset(OpAMD64LEAQ4)
 		v0 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
-		v0.AuxInt = log2(c - 4)
+		v0.AuxInt = int8ToAuxInt(int8(log32(c - 4)))
 		v0.AddArg(x)
 		v.AddArg2(v0, x)
 		return true
 	}
 	// match: (MULQconst [c] x)
-	// cond: isPowerOfTwo(c-8) && c >= 136
-	// result: (LEAQ8 (SHLQconst <v.Type> [log2(c-8)] x) x)
+	// cond: isPowerOfTwo32(c-8) && c >= 136
+	// result: (LEAQ8 (SHLQconst <v.Type> [int8(log32(c-8))] x) x)
 	for {
-		c := v.AuxInt
+		c := auxIntToInt32(v.AuxInt)
 		x := v_0
-		if !(isPowerOfTwo(c-8) && c >= 136) {
+		if !(isPowerOfTwo32(c-8) && c >= 136) {
 			break
 		}
 		v.reset(OpAMD64LEAQ8)
 		v0 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
-		v0.AuxInt = log2(c - 8)
+		v0.AuxInt = int8ToAuxInt(int8(log32(c - 8)))
 		v0.AddArg(x)
 		v.AddArg2(v0, x)
 		return true
 	}
 	// match: (MULQconst [c] x)
-	// cond: c%3 == 0 && isPowerOfTwo(c/3)
-	// result: (SHLQconst [log2(c/3)] (LEAQ2 <v.Type> x x))
+	// cond: c%3 == 0 && isPowerOfTwo32(c/3)
+	// result: (SHLQconst [int8(log32(c/3))] (LEAQ2 <v.Type> x x))
 	for {
-		c := v.AuxInt
+		c := auxIntToInt32(v.AuxInt)
 		x := v_0
-		if !(c%3 == 0 && isPowerOfTwo(c/3)) {
+		if !(c%3 == 0 && isPowerOfTwo32(c/3)) {
 			break
 		}
 		v.reset(OpAMD64SHLQconst)
-		v.AuxInt = log2(c / 3)
+		v.AuxInt = int8ToAuxInt(int8(log32(c / 3)))
 		v0 := b.NewValue0(v.Pos, OpAMD64LEAQ2, v.Type)
 		v0.AddArg2(x, x)
 		v.AddArg(v0)
 		return true
 	}
 	// match: (MULQconst [c] x)
-	// cond: c%5 == 0 && isPowerOfTwo(c/5)
-	// result: (SHLQconst [log2(c/5)] (LEAQ4 <v.Type> x x))
+	// cond: c%5 == 0 && isPowerOfTwo32(c/5)
+	// result: (SHLQconst [int8(log32(c/5))] (LEAQ4 <v.Type> x x))
 	for {
-		c := v.AuxInt
+		c := auxIntToInt32(v.AuxInt)
 		x := v_0
-		if !(c%5 == 0 && isPowerOfTwo(c/5)) {
+		if !(c%5 == 0 && isPowerOfTwo32(c/5)) {
 			break
 		}
 		v.reset(OpAMD64SHLQconst)
-		v.AuxInt = log2(c / 5)
+		v.AuxInt = int8ToAuxInt(int8(log32(c / 5)))
 		v0 := b.NewValue0(v.Pos, OpAMD64LEAQ4, v.Type)
 		v0.AddArg2(x, x)
 		v.AddArg(v0)
 		return true
 	}
 	// match: (MULQconst [c] x)
-	// cond: c%9 == 0 && isPowerOfTwo(c/9)
-	// result: (SHLQconst [log2(c/9)] (LEAQ8 <v.Type> x x))
+	// cond: c%9 == 0 && isPowerOfTwo32(c/9)
+	// result: (SHLQconst [int8(log32(c/9))] (LEAQ8 <v.Type> x x))
 	for {
-		c := v.AuxInt
+		c := auxIntToInt32(v.AuxInt)
 		x := v_0
-		if !(c%9 == 0 && isPowerOfTwo(c/9)) {
+		if !(c%9 == 0 && isPowerOfTwo32(c/9)) {
 			break
 		}
 		v.reset(OpAMD64SHLQconst)
-		v.AuxInt = log2(c / 9)
+		v.AuxInt = int8ToAuxInt(int8(log32(c / 9)))
 		v0 := b.NewValue0(v.Pos, OpAMD64LEAQ8, v.Type)
 		v0.AddArg2(x, x)
 		v.AddArg(v0)
@@ -16531,24 +16492,24 @@
 	b := v.Block
 	typ := &b.Func.Config.Types
 	// match: (MULSDload [off1] {sym} val (ADDQconst [off2] base) mem)
-	// cond: is32Bit(off1+off2)
+	// cond: is32Bit(int64(off1)+int64(off2))
 	// result: (MULSDload [off1+off2] {sym} val base mem)
 	for {
-		off1 := v.AuxInt
-		sym := v.Aux
+		off1 := auxIntToInt32(v.AuxInt)
+		sym := auxToSym(v.Aux)
 		val := v_0
 		if v_1.Op != OpAMD64ADDQconst {
 			break
 		}
-		off2 := v_1.AuxInt
+		off2 := auxIntToInt32(v_1.AuxInt)
 		base := v_1.Args[0]
 		mem := v_2
-		if !(is32Bit(off1 + off2)) {
+		if !(is32Bit(int64(off1) + int64(off2))) {
 			break
 		}
 		v.reset(OpAMD64MULSDload)
-		v.AuxInt = off1 + off2
-		v.Aux = sym
+		v.AuxInt = int32ToAuxInt(off1 + off2)
+		v.Aux = symToAux(sym)
 		v.AddArg3(val, base, mem)
 		return true
 	}
@@ -16634,24 +16595,24 @@
 	b := v.Block
 	typ := &b.Func.Config.Types
 	// match: (MULSSload [off1] {sym} val (ADDQconst [off2] base) mem)
-	// cond: is32Bit(off1+off2)
+	// cond: is32Bit(int64(off1)+int64(off2))
 	// result: (MULSSload [off1+off2] {sym} val base mem)
 	for {
-		off1 := v.AuxInt
-		sym := v.Aux
+		off1 := auxIntToInt32(v.AuxInt)
+		sym := auxToSym(v.Aux)
 		val := v_0
 		if v_1.Op != OpAMD64ADDQconst {
 			break
 		}
-		off2 := v_1.AuxInt
+		off2 := auxIntToInt32(v_1.AuxInt)
 		base := v_1.Args[0]
 		mem := v_2
-		if !(is32Bit(off1 + off2)) {
+		if !(is32Bit(int64(off1) + int64(off2))) {
 			break
 		}
 		v.reset(OpAMD64MULSSload)
-		v.AuxInt = off1 + off2
-		v.Aux = sym
+		v.AuxInt = int32ToAuxInt(off1 + off2)
+		v.Aux = symToAux(sym)
 		v.AddArg3(val, base, mem)
 		return true
 	}
@@ -16849,7 +16810,7 @@
 			}
 			y := v_0.Args[1]
 			v_0_0 := v_0.Args[0]
-			if v_0_0.Op != OpAMD64MOVLconst || v_0_0.AuxInt != 1 {
+			if v_0_0.Op != OpAMD64MOVLconst || auxIntToInt32(v_0_0.AuxInt) != 1 {
 				continue
 			}
 			x := v_1
@@ -16860,20 +16821,20 @@
 		break
 	}
 	// match: (ORL (MOVLconst [c]) x)
-	// cond: isUint32PowerOfTwo(c) && uint64(c) >= 128
-	// result: (BTSLconst [log2uint32(c)] x)
+	// cond: isUint32PowerOfTwo(int64(c)) && uint64(c) >= 128
+	// result: (BTSLconst [int8(log32(c))] x)
 	for {
 		for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
 			if v_0.Op != OpAMD64MOVLconst {
 				continue
 			}
-			c := v_0.AuxInt
+			c := auxIntToInt32(v_0.AuxInt)
 			x := v_1
-			if !(isUint32PowerOfTwo(c) && uint64(c) >= 128) {
+			if !(isUint32PowerOfTwo(int64(c)) && uint64(c) >= 128) {
 				continue
 			}
 			v.reset(OpAMD64BTSLconst)
-			v.AuxInt = log2uint32(c)
+			v.AuxInt = int8ToAuxInt(int8(log32(c)))
 			v.AddArg(x)
 			return true
 		}
@@ -16887,9 +16848,9 @@
 			if v_1.Op != OpAMD64MOVLconst {
 				continue
 			}
-			c := v_1.AuxInt
+			c := auxIntToInt32(v_1.AuxInt)
 			v.reset(OpAMD64ORLconst)
-			v.AuxInt = c
+			v.AuxInt = int32ToAuxInt(c)
 			v.AddArg(x)
 			return true
 		}
@@ -16903,17 +16864,17 @@
 			if v_0.Op != OpAMD64SHLLconst {
 				continue
 			}
-			c := v_0.AuxInt
+			c := auxIntToInt8(v_0.AuxInt)
 			x := v_0.Args[0]
 			if v_1.Op != OpAMD64SHRLconst {
 				continue
 			}
-			d := v_1.AuxInt
+			d := auxIntToInt8(v_1.AuxInt)
 			if x != v_1.Args[0] || !(d == 32-c) {
 				continue
 			}
 			v.reset(OpAMD64ROLLconst)
-			v.AuxInt = c
+			v.AuxInt = int8ToAuxInt(c)
 			v.AddArg(x)
 			return true
 		}
@@ -16928,17 +16889,17 @@
 			if v_0.Op != OpAMD64SHLLconst {
 				continue
 			}
-			c := v_0.AuxInt
+			c := auxIntToInt8(v_0.AuxInt)
 			x := v_0.Args[0]
 			if v_1.Op != OpAMD64SHRWconst {
 				continue
 			}
-			d := v_1.AuxInt
+			d := auxIntToInt8(v_1.AuxInt)
 			if x != v_1.Args[0] || !(d == 16-c && c < 16 && t.Size() == 2) {
 				continue
 			}
 			v.reset(OpAMD64ROLWconst)
-			v.AuxInt = c
+			v.AuxInt = int8ToAuxInt(c)
 			v.AddArg(x)
 			return true
 		}
@@ -16953,17 +16914,17 @@
 			if v_0.Op != OpAMD64SHLLconst {
 				continue
 			}
-			c := v_0.AuxInt
+			c := auxIntToInt8(v_0.AuxInt)
 			x := v_0.Args[0]
 			if v_1.Op != OpAMD64SHRBconst {
 				continue
 			}
-			d := v_1.AuxInt
+			d := auxIntToInt8(v_1.AuxInt)
 			if x != v_1.Args[0] || !(d == 8-c && c < 8 && t.Size() == 1) {
 				continue
 			}
 			v.reset(OpAMD64ROLBconst)
-			v.AuxInt = c
+			v.AuxInt = int8ToAuxInt(c)
 			v.AddArg(x)
 			return true
 		}
@@ -16997,7 +16958,7 @@
 					continue
 				}
 				v_1_1_0 := v_1_1.Args[0]
-				if v_1_1_0.Op != OpAMD64CMPQconst || v_1_1_0.AuxInt != 32 {
+				if v_1_1_0.Op != OpAMD64CMPQconst || auxIntToInt32(v_1_1_0.AuxInt) != 32 {
 					continue
 				}
 				v_1_1_0_0 := v_1_1_0.Args[0]
@@ -17005,11 +16966,11 @@
 					continue
 				}
 				v_1_1_0_0_0 := v_1_1_0_0.Args[0]
-				if v_1_1_0_0_0.Op != OpAMD64ADDQconst || v_1_1_0_0_0.AuxInt != -32 {
+				if v_1_1_0_0_0.Op != OpAMD64ADDQconst || auxIntToInt32(v_1_1_0_0_0.AuxInt) != -32 {
 					continue
 				}
 				v_1_1_0_0_0_0 := v_1_1_0_0_0.Args[0]
-				if v_1_1_0_0_0_0.Op != OpAMD64ANDQconst || v_1_1_0_0_0_0.AuxInt != 31 || y != v_1_1_0_0_0_0.Args[0] {
+				if v_1_1_0_0_0_0.Op != OpAMD64ANDQconst || auxIntToInt32(v_1_1_0_0_0_0.AuxInt) != 31 || y != v_1_1_0_0_0_0.Args[0] {
 					continue
 				}
 				v.reset(OpAMD64ROLL)
@@ -17047,7 +17008,7 @@
 					continue
 				}
 				v_1_1_0 := v_1_1.Args[0]
-				if v_1_1_0.Op != OpAMD64CMPLconst || v_1_1_0.AuxInt != 32 {
+				if v_1_1_0.Op != OpAMD64CMPLconst || auxIntToInt32(v_1_1_0.AuxInt) != 32 {
 					continue
 				}
 				v_1_1_0_0 := v_1_1_0.Args[0]
@@ -17055,11 +17016,11 @@
 					continue
 				}
 				v_1_1_0_0_0 := v_1_1_0_0.Args[0]
-				if v_1_1_0_0_0.Op != OpAMD64ADDLconst || v_1_1_0_0_0.AuxInt != -32 {
+				if v_1_1_0_0_0.Op != OpAMD64ADDLconst || auxIntToInt32(v_1_1_0_0_0.AuxInt) != -32 {
 					continue
 				}
 				v_1_1_0_0_0_0 := v_1_1_0_0_0.Args[0]
-				if v_1_1_0_0_0_0.Op != OpAMD64ANDLconst || v_1_1_0_0_0_0.AuxInt != 31 || y != v_1_1_0_0_0_0.Args[0] {
+				if v_1_1_0_0_0_0.Op != OpAMD64ANDLconst || auxIntToInt32(v_1_1_0_0_0_0.AuxInt) != 31 || y != v_1_1_0_0_0_0.Args[0] {
 					continue
 				}
 				v.reset(OpAMD64ROLL)
@@ -17097,7 +17058,7 @@
 					continue
 				}
 				v_1_1_0 := v_1_1.Args[0]
-				if v_1_1_0.Op != OpAMD64CMPQconst || v_1_1_0.AuxInt != 32 {
+				if v_1_1_0.Op != OpAMD64CMPQconst || auxIntToInt32(v_1_1_0.AuxInt) != 32 {
 					continue
 				}
 				v_1_1_0_0 := v_1_1_0.Args[0]
@@ -17105,11 +17066,11 @@
 					continue
 				}
 				v_1_1_0_0_0 := v_1_1_0_0.Args[0]
-				if v_1_1_0_0_0.Op != OpAMD64ADDQconst || v_1_1_0_0_0.AuxInt != -32 {
+				if v_1_1_0_0_0.Op != OpAMD64ADDQconst || auxIntToInt32(v_1_1_0_0_0.AuxInt) != -32 {
 					continue
 				}
 				v_1_1_0_0_0_0 := v_1_1_0_0_0.Args[0]
-				if v_1_1_0_0_0_0.Op != OpAMD64ANDQconst || v_1_1_0_0_0_0.AuxInt != 31 || y != v_1_1_0_0_0_0.Args[0] {
+				if v_1_1_0_0_0_0.Op != OpAMD64ANDQconst || auxIntToInt32(v_1_1_0_0_0_0.AuxInt) != 31 || y != v_1_1_0_0_0_0.Args[0] {
 					continue
 				}
 				v.reset(OpAMD64RORL)
@@ -17147,7 +17108,7 @@
 					continue
 				}
 				v_1_1_0 := v_1_1.Args[0]
-				if v_1_1_0.Op != OpAMD64CMPLconst || v_1_1_0.AuxInt != 32 {
+				if v_1_1_0.Op != OpAMD64CMPLconst || auxIntToInt32(v_1_1_0.AuxInt) != 32 {
 					continue
 				}
 				v_1_1_0_0 := v_1_1_0.Args[0]
@@ -17155,11 +17116,11 @@
 					continue
 				}
 				v_1_1_0_0_0 := v_1_1_0_0.Args[0]
-				if v_1_1_0_0_0.Op != OpAMD64ADDLconst || v_1_1_0_0_0.AuxInt != -32 {
+				if v_1_1_0_0_0.Op != OpAMD64ADDLconst || auxIntToInt32(v_1_1_0_0_0.AuxInt) != -32 {
 					continue
 				}
 				v_1_1_0_0_0_0 := v_1_1_0_0_0.Args[0]
-				if v_1_1_0_0_0_0.Op != OpAMD64ANDLconst || v_1_1_0_0_0_0.AuxInt != 31 || y != v_1_1_0_0_0_0.Args[0] {
+				if v_1_1_0_0_0_0.Op != OpAMD64ANDLconst || auxIntToInt32(v_1_1_0_0_0_0.AuxInt) != 31 || y != v_1_1_0_0_0_0.Args[0] {
 					continue
 				}
 				v.reset(OpAMD64RORL)
@@ -17308,7 +17269,7 @@
 			_ = v_0.Args[1]
 			x := v_0.Args[0]
 			v_0_1 := v_0.Args[1]
-			if v_0_1.Op != OpAMD64ANDQconst || v_0_1.AuxInt != 15 {
+			if v_0_1.Op != OpAMD64ANDQconst || auxIntToInt32(v_0_1.AuxInt) != 15 {
 				continue
 			}
 			y := v_0_1.Args[0]
@@ -17324,11 +17285,11 @@
 				continue
 			}
 			v_1_1_0 := v_1_1.Args[0]
-			if v_1_1_0.Op != OpAMD64ADDQconst || v_1_1_0.AuxInt != -16 {
+			if v_1_1_0.Op != OpAMD64ADDQconst || auxIntToInt32(v_1_1_0.AuxInt) != -16 {
 				continue
 			}
 			v_1_1_0_0 := v_1_1_0.Args[0]
-			if v_1_1_0_0.Op != OpAMD64ANDQconst || v_1_1_0_0.AuxInt != 15 || y != v_1_1_0_0.Args[0] || !(v.Type.Size() == 2) {
+			if v_1_1_0_0.Op != OpAMD64ANDQconst || auxIntToInt32(v_1_1_0_0.AuxInt) != 15 || y != v_1_1_0_0.Args[0] || !(v.Type.Size() == 2) {
 				continue
 			}
 			v.reset(OpAMD64RORW)
@@ -17348,7 +17309,7 @@
 			_ = v_0.Args[1]
 			x := v_0.Args[0]
 			v_0_1 := v_0.Args[1]
-			if v_0_1.Op != OpAMD64ANDLconst || v_0_1.AuxInt != 15 {
+			if v_0_1.Op != OpAMD64ANDLconst || auxIntToInt32(v_0_1.AuxInt) != 15 {
 				continue
 			}
 			y := v_0_1.Args[0]
@@ -17364,11 +17325,11 @@
 				continue
 			}
 			v_1_1_0 := v_1_1.Args[0]
-			if v_1_1_0.Op != OpAMD64ADDLconst || v_1_1_0.AuxInt != -16 {
+			if v_1_1_0.Op != OpAMD64ADDLconst || auxIntToInt32(v_1_1_0.AuxInt) != -16 {
 				continue
 			}
 			v_1_1_0_0 := v_1_1_0.Args[0]
-			if v_1_1_0_0.Op != OpAMD64ANDLconst || v_1_1_0_0.AuxInt != 15 || y != v_1_1_0_0.Args[0] || !(v.Type.Size() == 2) {
+			if v_1_1_0_0.Op != OpAMD64ANDLconst || auxIntToInt32(v_1_1_0_0.AuxInt) != 15 || y != v_1_1_0_0.Args[0] || !(v.Type.Size() == 2) {
 				continue
 			}
 			v.reset(OpAMD64RORW)
@@ -17388,7 +17349,7 @@
 			_ = v_0.Args[1]
 			x := v_0.Args[0]
 			v_0_1 := v_0.Args[1]
-			if v_0_1.Op != OpAMD64ANDQconst || v_0_1.AuxInt != 7 {
+			if v_0_1.Op != OpAMD64ANDQconst || auxIntToInt32(v_0_1.AuxInt) != 7 {
 				continue
 			}
 			y := v_0_1.Args[0]
@@ -17411,15 +17372,15 @@
 					continue
 				}
 				v_1_0_1_0 := v_1_0_1.Args[0]
-				if v_1_0_1_0.Op != OpAMD64ADDQconst || v_1_0_1_0.AuxInt != -8 {
+				if v_1_0_1_0.Op != OpAMD64ADDQconst || auxIntToInt32(v_1_0_1_0.AuxInt) != -8 {
 					continue
 				}
 				v_1_0_1_0_0 := v_1_0_1_0.Args[0]
-				if v_1_0_1_0_0.Op != OpAMD64ANDQconst || v_1_0_1_0_0.AuxInt != 7 || y != v_1_0_1_0_0.Args[0] || v_1_1.Op != OpAMD64SBBLcarrymask {
+				if v_1_0_1_0_0.Op != OpAMD64ANDQconst || auxIntToInt32(v_1_0_1_0_0.AuxInt) != 7 || y != v_1_0_1_0_0.Args[0] || v_1_1.Op != OpAMD64SBBLcarrymask {
 					continue
 				}
 				v_1_1_0 := v_1_1.Args[0]
-				if v_1_1_0.Op != OpAMD64CMPQconst || v_1_1_0.AuxInt != 8 {
+				if v_1_1_0.Op != OpAMD64CMPQconst || auxIntToInt32(v_1_1_0.AuxInt) != 8 {
 					continue
 				}
 				v_1_1_0_0 := v_1_1_0.Args[0]
@@ -17427,11 +17388,11 @@
 					continue
 				}
 				v_1_1_0_0_0 := v_1_1_0_0.Args[0]
-				if v_1_1_0_0_0.Op != OpAMD64ADDQconst || v_1_1_0_0_0.AuxInt != -8 {
+				if v_1_1_0_0_0.Op != OpAMD64ADDQconst || auxIntToInt32(v_1_1_0_0_0.AuxInt) != -8 {
 					continue
 				}
 				v_1_1_0_0_0_0 := v_1_1_0_0_0.Args[0]
-				if v_1_1_0_0_0_0.Op != OpAMD64ANDQconst || v_1_1_0_0_0_0.AuxInt != 7 || y != v_1_1_0_0_0_0.Args[0] || !(v.Type.Size() == 1) {
+				if v_1_1_0_0_0_0.Op != OpAMD64ANDQconst || auxIntToInt32(v_1_1_0_0_0_0.AuxInt) != 7 || y != v_1_1_0_0_0_0.Args[0] || !(v.Type.Size() == 1) {
 					continue
 				}
 				v.reset(OpAMD64ROLB)
@@ -17452,7 +17413,7 @@
 			_ = v_0.Args[1]
 			x := v_0.Args[0]
 			v_0_1 := v_0.Args[1]
-			if v_0_1.Op != OpAMD64ANDLconst || v_0_1.AuxInt != 7 {
+			if v_0_1.Op != OpAMD64ANDLconst || auxIntToInt32(v_0_1.AuxInt) != 7 {
 				continue
 			}
 			y := v_0_1.Args[0]
@@ -17475,15 +17436,15 @@
 					continue
 				}
 				v_1_0_1_0 := v_1_0_1.Args[0]
-				if v_1_0_1_0.Op != OpAMD64ADDLconst || v_1_0_1_0.AuxInt != -8 {
+				if v_1_0_1_0.Op != OpAMD64ADDLconst || auxIntToInt32(v_1_0_1_0.AuxInt) != -8 {
 					continue
 				}
 				v_1_0_1_0_0 := v_1_0_1_0.Args[0]
-				if v_1_0_1_0_0.Op != OpAMD64ANDLconst || v_1_0_1_0_0.AuxInt != 7 || y != v_1_0_1_0_0.Args[0] || v_1_1.Op != OpAMD64SBBLcarrymask {
+				if v_1_0_1_0_0.Op != OpAMD64ANDLconst || auxIntToInt32(v_1_0_1_0_0.AuxInt) != 7 || y != v_1_0_1_0_0.Args[0] || v_1_1.Op != OpAMD64SBBLcarrymask {
 					continue
 				}
 				v_1_1_0 := v_1_1.Args[0]
-				if v_1_1_0.Op != OpAMD64CMPLconst || v_1_1_0.AuxInt != 8 {
+				if v_1_1_0.Op != OpAMD64CMPLconst || auxIntToInt32(v_1_1_0.AuxInt) != 8 {
 					continue
 				}
 				v_1_1_0_0 := v_1_1_0.Args[0]
@@ -17491,11 +17452,11 @@
 					continue
 				}
 				v_1_1_0_0_0 := v_1_1_0_0.Args[0]
-				if v_1_1_0_0_0.Op != OpAMD64ADDLconst || v_1_1_0_0_0.AuxInt != -8 {
+				if v_1_1_0_0_0.Op != OpAMD64ADDLconst || auxIntToInt32(v_1_1_0_0_0.AuxInt) != -8 {
 					continue
 				}
 				v_1_1_0_0_0_0 := v_1_1_0_0_0.Args[0]
-				if v_1_1_0_0_0_0.Op != OpAMD64ANDLconst || v_1_1_0_0_0_0.AuxInt != 7 || y != v_1_1_0_0_0_0.Args[0] || !(v.Type.Size() == 1) {
+				if v_1_1_0_0_0_0.Op != OpAMD64ANDLconst || auxIntToInt32(v_1_1_0_0_0_0.AuxInt) != 7 || y != v_1_1_0_0_0_0.Args[0] || !(v.Type.Size() == 1) {
 					continue
 				}
 				v.reset(OpAMD64ROLB)
@@ -17516,7 +17477,7 @@
 			_ = v_0.Args[1]
 			x := v_0.Args[0]
 			v_0_1 := v_0.Args[1]
-			if v_0_1.Op != OpAMD64ANDQconst || v_0_1.AuxInt != 7 {
+			if v_0_1.Op != OpAMD64ANDQconst || auxIntToInt32(v_0_1.AuxInt) != 7 {
 				continue
 			}
 			y := v_0_1.Args[0]
@@ -17532,11 +17493,11 @@
 				continue
 			}
 			v_1_1_0 := v_1_1.Args[0]
-			if v_1_1_0.Op != OpAMD64ADDQconst || v_1_1_0.AuxInt != -8 {
+			if v_1_1_0.Op != OpAMD64ADDQconst || auxIntToInt32(v_1_1_0.AuxInt) != -8 {
 				continue
 			}
 			v_1_1_0_0 := v_1_1_0.Args[0]
-			if v_1_1_0_0.Op != OpAMD64ANDQconst || v_1_1_0_0.AuxInt != 7 || y != v_1_1_0_0.Args[0] || !(v.Type.Size() == 1) {
+			if v_1_1_0_0.Op != OpAMD64ANDQconst || auxIntToInt32(v_1_1_0_0.AuxInt) != 7 || y != v_1_1_0_0.Args[0] || !(v.Type.Size() == 1) {
 				continue
 			}
 			v.reset(OpAMD64RORB)
@@ -17556,7 +17517,7 @@
 			_ = v_0.Args[1]
 			x := v_0.Args[0]
 			v_0_1 := v_0.Args[1]
-			if v_0_1.Op != OpAMD64ANDLconst || v_0_1.AuxInt != 7 {
+			if v_0_1.Op != OpAMD64ANDLconst || auxIntToInt32(v_0_1.AuxInt) != 7 {
 				continue
 			}
 			y := v_0_1.Args[0]
@@ -17572,11 +17533,11 @@
 				continue
 			}
 			v_1_1_0 := v_1_1.Args[0]
-			if v_1_1_0.Op != OpAMD64ADDLconst || v_1_1_0.AuxInt != -8 {
+			if v_1_1_0.Op != OpAMD64ADDLconst || auxIntToInt32(v_1_1_0.AuxInt) != -8 {
 				continue
 			}
 			v_1_1_0_0 := v_1_1_0.Args[0]
-			if v_1_1_0_0.Op != OpAMD64ANDLconst || v_1_1_0_0.AuxInt != 7 || y != v_1_1_0_0.Args[0] || !(v.Type.Size() == 1) {
+			if v_1_1_0_0.Op != OpAMD64ANDLconst || auxIntToInt32(v_1_1_0_0.AuxInt) != 7 || y != v_1_1_0_0.Args[0] || !(v.Type.Size() == 1) {
 				continue
 			}
 			v.reset(OpAMD64RORB)
@@ -18203,16 +18164,16 @@
 func rewriteValueAMD64_OpAMD64ORLconst(v *Value) bool {
 	v_0 := v.Args[0]
 	// match: (ORLconst [c] x)
-	// cond: isUint32PowerOfTwo(c) && uint64(c) >= 128
-	// result: (BTSLconst [log2uint32(c)] x)
+	// cond: isUint32PowerOfTwo(int64(c)) && uint64(c) >= 128
+	// result: (BTSLconst [int8(log32(c))] x)
 	for {
-		c := v.AuxInt
+		c := auxIntToInt32(v.AuxInt)
 		x := v_0
-		if !(isUint32PowerOfTwo(c) && uint64(c) >= 128) {
+		if !(isUint32PowerOfTwo(int64(c)) && uint64(c) >= 128) {
 			break
 		}
 		v.reset(OpAMD64BTSLconst)
-		v.AuxInt = log2uint32(c)
+		v.AuxInt = int8ToAuxInt(int8(log32(c)))
 		v.AddArg(x)
 		return true
 	}
@@ -18286,23 +18247,23 @@
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
 	// match: (ORLconstmodify [valoff1] {sym} (ADDQconst [off2] base) mem)
-	// cond: ValAndOff(valoff1).canAdd(off2)
-	// result: (ORLconstmodify [ValAndOff(valoff1).add(off2)] {sym} base mem)
+	// cond: ValAndOff(valoff1).canAdd32(off2)
+	// result: (ORLconstmodify [ValAndOff(valoff1).addOffset32(off2)] {sym} base mem)
 	for {
-		valoff1 := v.AuxInt
-		sym := v.Aux
+		valoff1 := auxIntToValAndOff(v.AuxInt)
+		sym := auxToSym(v.Aux)
 		if v_0.Op != OpAMD64ADDQconst {
 			break
 		}
-		off2 := v_0.AuxInt
+		off2 := auxIntToInt32(v_0.AuxInt)
 		base := v_0.Args[0]
 		mem := v_1
-		if !(ValAndOff(valoff1).canAdd(off2)) {
+		if !(ValAndOff(valoff1).canAdd32(off2)) {
 			break
 		}
 		v.reset(OpAMD64ORLconstmodify)
-		v.AuxInt = ValAndOff(valoff1).add(off2)
-		v.Aux = sym
+		v.AuxInt = valAndOffToAuxInt(ValAndOff(valoff1).addOffset32(off2))
+		v.Aux = symToAux(sym)
 		v.AddArg2(base, mem)
 		return true
 	}
@@ -18337,24 +18298,24 @@
 	b := v.Block
 	typ := &b.Func.Config.Types
 	// match: (ORLload [off1] {sym} val (ADDQconst [off2] base) mem)
-	// cond: is32Bit(off1+off2)
+	// cond: is32Bit(int64(off1)+int64(off2))
 	// result: (ORLload [off1+off2] {sym} val base mem)
 	for {
-		off1 := v.AuxInt
-		sym := v.Aux
+		off1 := auxIntToInt32(v.AuxInt)
+		sym := auxToSym(v.Aux)
 		val := v_0
 		if v_1.Op != OpAMD64ADDQconst {
 			break
 		}
-		off2 := v_1.AuxInt
+		off2 := auxIntToInt32(v_1.AuxInt)
 		base := v_1.Args[0]
 		mem := v_2
-		if !(is32Bit(off1 + off2)) {
+		if !(is32Bit(int64(off1) + int64(off2))) {
 			break
 		}
 		v.reset(OpAMD64ORLload)
-		v.AuxInt = off1 + off2
-		v.Aux = sym
+		v.AuxInt = int32ToAuxInt(off1 + off2)
+		v.Aux = symToAux(sym)
 		v.AddArg3(val, base, mem)
 		return true
 	}
@@ -18408,24 +18369,24 @@
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
 	// match: (ORLmodify [off1] {sym} (ADDQconst [off2] base) val mem)
-	// cond: is32Bit(off1+off2)
+	// cond: is32Bit(int64(off1)+int64(off2))
 	// result: (ORLmodify [off1+off2] {sym} base val mem)
 	for {
-		off1 := v.AuxInt
-		sym := v.Aux
+		off1 := auxIntToInt32(v.AuxInt)
+		sym := auxToSym(v.Aux)
 		if v_0.Op != OpAMD64ADDQconst {
 			break
 		}
-		off2 := v_0.AuxInt
+		off2 := auxIntToInt32(v_0.AuxInt)
 		base := v_0.Args[0]
 		val := v_1
 		mem := v_2
-		if !(is32Bit(off1 + off2)) {
+		if !(is32Bit(int64(off1) + int64(off2))) {
 			break
 		}
 		v.reset(OpAMD64ORLmodify)
-		v.AuxInt = off1 + off2
-		v.Aux = sym
+		v.AuxInt = int32ToAuxInt(off1 + off2)
+		v.Aux = symToAux(sym)
 		v.AddArg3(base, val, mem)
 		return true
 	}
@@ -18468,7 +18429,7 @@
 			}
 			y := v_0.Args[1]
 			v_0_0 := v_0.Args[0]
-			if v_0_0.Op != OpAMD64MOVQconst || v_0_0.AuxInt != 1 {
+			if v_0_0.Op != OpAMD64MOVQconst || auxIntToInt64(v_0_0.AuxInt) != 1 {
 				continue
 			}
 			x := v_1
@@ -18480,19 +18441,19 @@
 	}
 	// match: (ORQ (MOVQconst [c]) x)
 	// cond: isUint64PowerOfTwo(c) && uint64(c) >= 128
-	// result: (BTSQconst [log2(c)] x)
+	// result: (BTSQconst [int8(log2(c))] x)
 	for {
 		for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
 			if v_0.Op != OpAMD64MOVQconst {
 				continue
 			}
-			c := v_0.AuxInt
+			c := auxIntToInt64(v_0.AuxInt)
 			x := v_1
 			if !(isUint64PowerOfTwo(c) && uint64(c) >= 128) {
 				continue
 			}
 			v.reset(OpAMD64BTSQconst)
-			v.AuxInt = log2(c)
+			v.AuxInt = int8ToAuxInt(int8(log2(c)))
 			v.AddArg(x)
 			return true
 		}
@@ -18500,19 +18461,19 @@
 	}
 	// match: (ORQ x (MOVQconst [c]))
 	// cond: is32Bit(c)
-	// result: (ORQconst [c] x)
+	// result: (ORQconst [int32(c)] x)
 	for {
 		for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
 			x := v_0
 			if v_1.Op != OpAMD64MOVQconst {
 				continue
 			}
-			c := v_1.AuxInt
+			c := auxIntToInt64(v_1.AuxInt)
 			if !(is32Bit(c)) {
 				continue
 			}
 			v.reset(OpAMD64ORQconst)
-			v.AuxInt = c
+			v.AuxInt = int32ToAuxInt(int32(c))
 			v.AddArg(x)
 			return true
 		}
@@ -18526,9 +18487,9 @@
 			if v_1.Op != OpAMD64MOVLconst {
 				continue
 			}
-			c := v_1.AuxInt
+			c := auxIntToInt32(v_1.AuxInt)
 			v.reset(OpAMD64ORQconst)
-			v.AuxInt = c
+			v.AuxInt = int32ToAuxInt(c)
 			v.AddArg(x)
 			return true
 		}
@@ -18542,17 +18503,17 @@
 			if v_0.Op != OpAMD64SHLQconst {
 				continue
 			}
-			c := v_0.AuxInt
+			c := auxIntToInt8(v_0.AuxInt)
 			x := v_0.Args[0]
 			if v_1.Op != OpAMD64SHRQconst {
 				continue
 			}
-			d := v_1.AuxInt
+			d := auxIntToInt8(v_1.AuxInt)
 			if x != v_1.Args[0] || !(d == 64-c) {
 				continue
 			}
 			v.reset(OpAMD64ROLQconst)
-			v.AuxInt = c
+			v.AuxInt = int8ToAuxInt(c)
 			v.AddArg(x)
 			return true
 		}
@@ -18586,7 +18547,7 @@
 					continue
 				}
 				v_1_1_0 := v_1_1.Args[0]
-				if v_1_1_0.Op != OpAMD64CMPQconst || v_1_1_0.AuxInt != 64 {
+				if v_1_1_0.Op != OpAMD64CMPQconst || auxIntToInt32(v_1_1_0.AuxInt) != 64 {
 					continue
 				}
 				v_1_1_0_0 := v_1_1_0.Args[0]
@@ -18594,11 +18555,11 @@
 					continue
 				}
 				v_1_1_0_0_0 := v_1_1_0_0.Args[0]
-				if v_1_1_0_0_0.Op != OpAMD64ADDQconst || v_1_1_0_0_0.AuxInt != -64 {
+				if v_1_1_0_0_0.Op != OpAMD64ADDQconst || auxIntToInt32(v_1_1_0_0_0.AuxInt) != -64 {
 					continue
 				}
 				v_1_1_0_0_0_0 := v_1_1_0_0_0.Args[0]
-				if v_1_1_0_0_0_0.Op != OpAMD64ANDQconst || v_1_1_0_0_0_0.AuxInt != 63 || y != v_1_1_0_0_0_0.Args[0] {
+				if v_1_1_0_0_0_0.Op != OpAMD64ANDQconst || auxIntToInt32(v_1_1_0_0_0_0.AuxInt) != 63 || y != v_1_1_0_0_0_0.Args[0] {
 					continue
 				}
 				v.reset(OpAMD64ROLQ)
@@ -18636,7 +18597,7 @@
 					continue
 				}
 				v_1_1_0 := v_1_1.Args[0]
-				if v_1_1_0.Op != OpAMD64CMPLconst || v_1_1_0.AuxInt != 64 {
+				if v_1_1_0.Op != OpAMD64CMPLconst || auxIntToInt32(v_1_1_0.AuxInt) != 64 {
 					continue
 				}
 				v_1_1_0_0 := v_1_1_0.Args[0]
@@ -18644,11 +18605,11 @@
 					continue
 				}
 				v_1_1_0_0_0 := v_1_1_0_0.Args[0]
-				if v_1_1_0_0_0.Op != OpAMD64ADDLconst || v_1_1_0_0_0.AuxInt != -64 {
+				if v_1_1_0_0_0.Op != OpAMD64ADDLconst || auxIntToInt32(v_1_1_0_0_0.AuxInt) != -64 {
 					continue
 				}
 				v_1_1_0_0_0_0 := v_1_1_0_0_0.Args[0]
-				if v_1_1_0_0_0_0.Op != OpAMD64ANDLconst || v_1_1_0_0_0_0.AuxInt != 63 || y != v_1_1_0_0_0_0.Args[0] {
+				if v_1_1_0_0_0_0.Op != OpAMD64ANDLconst || auxIntToInt32(v_1_1_0_0_0_0.AuxInt) != 63 || y != v_1_1_0_0_0_0.Args[0] {
 					continue
 				}
 				v.reset(OpAMD64ROLQ)
@@ -18686,7 +18647,7 @@
 					continue
 				}
 				v_1_1_0 := v_1_1.Args[0]
-				if v_1_1_0.Op != OpAMD64CMPQconst || v_1_1_0.AuxInt != 64 {
+				if v_1_1_0.Op != OpAMD64CMPQconst || auxIntToInt32(v_1_1_0.AuxInt) != 64 {
 					continue
 				}
 				v_1_1_0_0 := v_1_1_0.Args[0]
@@ -18694,11 +18655,11 @@
 					continue
 				}
 				v_1_1_0_0_0 := v_1_1_0_0.Args[0]
-				if v_1_1_0_0_0.Op != OpAMD64ADDQconst || v_1_1_0_0_0.AuxInt != -64 {
+				if v_1_1_0_0_0.Op != OpAMD64ADDQconst || auxIntToInt32(v_1_1_0_0_0.AuxInt) != -64 {
 					continue
 				}
 				v_1_1_0_0_0_0 := v_1_1_0_0_0.Args[0]
-				if v_1_1_0_0_0_0.Op != OpAMD64ANDQconst || v_1_1_0_0_0_0.AuxInt != 63 || y != v_1_1_0_0_0_0.Args[0] {
+				if v_1_1_0_0_0_0.Op != OpAMD64ANDQconst || auxIntToInt32(v_1_1_0_0_0_0.AuxInt) != 63 || y != v_1_1_0_0_0_0.Args[0] {
 					continue
 				}
 				v.reset(OpAMD64RORQ)
@@ -18736,7 +18697,7 @@
 					continue
 				}
 				v_1_1_0 := v_1_1.Args[0]
-				if v_1_1_0.Op != OpAMD64CMPLconst || v_1_1_0.AuxInt != 64 {
+				if v_1_1_0.Op != OpAMD64CMPLconst || auxIntToInt32(v_1_1_0.AuxInt) != 64 {
 					continue
 				}
 				v_1_1_0_0 := v_1_1_0.Args[0]
@@ -18744,11 +18705,11 @@
 					continue
 				}
 				v_1_1_0_0_0 := v_1_1_0_0.Args[0]
-				if v_1_1_0_0_0.Op != OpAMD64ADDLconst || v_1_1_0_0_0.AuxInt != -64 {
+				if v_1_1_0_0_0.Op != OpAMD64ADDLconst || auxIntToInt32(v_1_1_0_0_0.AuxInt) != -64 {
 					continue
 				}
 				v_1_1_0_0_0_0 := v_1_1_0_0_0.Args[0]
-				if v_1_1_0_0_0_0.Op != OpAMD64ANDLconst || v_1_1_0_0_0_0.AuxInt != 63 || y != v_1_1_0_0_0_0.Args[0] {
+				if v_1_1_0_0_0_0.Op != OpAMD64ANDLconst || auxIntToInt32(v_1_1_0_0_0_0.AuxInt) != 63 || y != v_1_1_0_0_0_0.Args[0] {
 					continue
 				}
 				v.reset(OpAMD64RORQ)
@@ -19830,16 +19791,16 @@
 func rewriteValueAMD64_OpAMD64ORQconst(v *Value) bool {
 	v_0 := v.Args[0]
 	// match: (ORQconst [c] x)
-	// cond: isUint64PowerOfTwo(c) && uint64(c) >= 128
-	// result: (BTSQconst [log2(c)] x)
+	// cond: isUint64PowerOfTwo(int64(c)) && uint64(c) >= 128
+	// result: (BTSQconst [int8(log32(c))] x)
 	for {
-		c := v.AuxInt
+		c := auxIntToInt32(v.AuxInt)
 		x := v_0
-		if !(isUint64PowerOfTwo(c) && uint64(c) >= 128) {
+		if !(isUint64PowerOfTwo(int64(c)) && uint64(c) >= 128) {
 			break
 		}
 		v.reset(OpAMD64BTSQconst)
-		v.AuxInt = log2(c)
+		v.AuxInt = int8ToAuxInt(int8(log32(c)))
 		v.AddArg(x)
 		return true
 	}
@@ -19913,23 +19874,23 @@
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
 	// match: (ORQconstmodify [valoff1] {sym} (ADDQconst [off2] base) mem)
-	// cond: ValAndOff(valoff1).canAdd(off2)
-	// result: (ORQconstmodify [ValAndOff(valoff1).add(off2)] {sym} base mem)
+	// cond: ValAndOff(valoff1).canAdd32(off2)
+	// result: (ORQconstmodify [ValAndOff(valoff1).addOffset32(off2)] {sym} base mem)
 	for {
-		valoff1 := v.AuxInt
-		sym := v.Aux
+		valoff1 := auxIntToValAndOff(v.AuxInt)
+		sym := auxToSym(v.Aux)
 		if v_0.Op != OpAMD64ADDQconst {
 			break
 		}
-		off2 := v_0.AuxInt
+		off2 := auxIntToInt32(v_0.AuxInt)
 		base := v_0.Args[0]
 		mem := v_1
-		if !(ValAndOff(valoff1).canAdd(off2)) {
+		if !(ValAndOff(valoff1).canAdd32(off2)) {
 			break
 		}
 		v.reset(OpAMD64ORQconstmodify)
-		v.AuxInt = ValAndOff(valoff1).add(off2)
-		v.Aux = sym
+		v.AuxInt = valAndOffToAuxInt(ValAndOff(valoff1).addOffset32(off2))
+		v.Aux = symToAux(sym)
 		v.AddArg2(base, mem)
 		return true
 	}
@@ -19964,24 +19925,24 @@
 	b := v.Block
 	typ := &b.Func.Config.Types
 	// match: (ORQload [off1] {sym} val (ADDQconst [off2] base) mem)
-	// cond: is32Bit(off1+off2)
+	// cond: is32Bit(int64(off1)+int64(off2))
 	// result: (ORQload [off1+off2] {sym} val base mem)
 	for {
-		off1 := v.AuxInt
-		sym := v.Aux
+		off1 := auxIntToInt32(v.AuxInt)
+		sym := auxToSym(v.Aux)
 		val := v_0
 		if v_1.Op != OpAMD64ADDQconst {
 			break
 		}
-		off2 := v_1.AuxInt
+		off2 := auxIntToInt32(v_1.AuxInt)
 		base := v_1.Args[0]
 		mem := v_2
-		if !(is32Bit(off1 + off2)) {
+		if !(is32Bit(int64(off1) + int64(off2))) {
 			break
 		}
 		v.reset(OpAMD64ORQload)
-		v.AuxInt = off1 + off2
-		v.Aux = sym
+		v.AuxInt = int32ToAuxInt(off1 + off2)
+		v.Aux = symToAux(sym)
 		v.AddArg3(val, base, mem)
 		return true
 	}
@@ -20035,24 +19996,24 @@
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
 	// match: (ORQmodify [off1] {sym} (ADDQconst [off2] base) val mem)
-	// cond: is32Bit(off1+off2)
+	// cond: is32Bit(int64(off1)+int64(off2))
 	// result: (ORQmodify [off1+off2] {sym} base val mem)
 	for {
-		off1 := v.AuxInt
-		sym := v.Aux
+		off1 := auxIntToInt32(v.AuxInt)
+		sym := auxToSym(v.Aux)
 		if v_0.Op != OpAMD64ADDQconst {
 			break
 		}
-		off2 := v_0.AuxInt
+		off2 := auxIntToInt32(v_0.AuxInt)
 		base := v_0.Args[0]
 		val := v_1
 		mem := v_2
-		if !(is32Bit(off1 + off2)) {
+		if !(is32Bit(int64(off1) + int64(off2))) {
 			break
 		}
 		v.reset(OpAMD64ORQmodify)
-		v.AuxInt = off1 + off2
-		v.Aux = sym
+		v.AuxInt = int32ToAuxInt(off1 + off2)
+		v.Aux = symToAux(sym)
 		v.AddArg3(base, val, mem)
 		return true
 	}
@@ -20109,28 +20070,28 @@
 		return true
 	}
 	// match: (ROLB x (MOVQconst [c]))
-	// result: (ROLBconst [c&7 ] x)
+	// result: (ROLBconst [int8(c&7) ] x)
 	for {
 		x := v_0
 		if v_1.Op != OpAMD64MOVQconst {
 			break
 		}
-		c := v_1.AuxInt
+		c := auxIntToInt64(v_1.AuxInt)
 		v.reset(OpAMD64ROLBconst)
-		v.AuxInt = c & 7
+		v.AuxInt = int8ToAuxInt(int8(c & 7))
 		v.AddArg(x)
 		return true
 	}
 	// match: (ROLB x (MOVLconst [c]))
-	// result: (ROLBconst [c&7 ] x)
+	// result: (ROLBconst [int8(c&7) ] x)
 	for {
 		x := v_0
 		if v_1.Op != OpAMD64MOVLconst {
 			break
 		}
-		c := v_1.AuxInt
+		c := auxIntToInt32(v_1.AuxInt)
 		v.reset(OpAMD64ROLBconst)
-		v.AuxInt = c & 7
+		v.AuxInt = int8ToAuxInt(int8(c & 7))
 		v.AddArg(x)
 		return true
 	}
@@ -20141,21 +20102,21 @@
 	// match: (ROLBconst [c] (ROLBconst [d] x))
 	// result: (ROLBconst [(c+d)& 7] x)
 	for {
-		c := v.AuxInt
+		c := auxIntToInt8(v.AuxInt)
 		if v_0.Op != OpAMD64ROLBconst {
 			break
 		}
-		d := v_0.AuxInt
+		d := auxIntToInt8(v_0.AuxInt)
 		x := v_0.Args[0]
 		v.reset(OpAMD64ROLBconst)
-		v.AuxInt = (c + d) & 7
+		v.AuxInt = int8ToAuxInt((c + d) & 7)
 		v.AddArg(x)
 		return true
 	}
 	// match: (ROLBconst x [0])
 	// result: x
 	for {
-		if v.AuxInt != 0 {
+		if auxIntToInt8(v.AuxInt) != 0 {
 			break
 		}
 		x := v_0
@@ -20192,28 +20153,28 @@
 		return true
 	}
 	// match: (ROLL x (MOVQconst [c]))
-	// result: (ROLLconst [c&31] x)
+	// result: (ROLLconst [int8(c&31)] x)
 	for {
 		x := v_0
 		if v_1.Op != OpAMD64MOVQconst {
 			break
 		}
-		c := v_1.AuxInt
+		c := auxIntToInt64(v_1.AuxInt)
 		v.reset(OpAMD64ROLLconst)
-		v.AuxInt = c & 31
+		v.AuxInt = int8ToAuxInt(int8(c & 31))
 		v.AddArg(x)
 		return true
 	}
 	// match: (ROLL x (MOVLconst [c]))
-	// result: (ROLLconst [c&31] x)
+	// result: (ROLLconst [int8(c&31)] x)
 	for {
 		x := v_0
 		if v_1.Op != OpAMD64MOVLconst {
 			break
 		}
-		c := v_1.AuxInt
+		c := auxIntToInt32(v_1.AuxInt)
 		v.reset(OpAMD64ROLLconst)
-		v.AuxInt = c & 31
+		v.AuxInt = int8ToAuxInt(int8(c & 31))
 		v.AddArg(x)
 		return true
 	}
@@ -20224,21 +20185,21 @@
 	// match: (ROLLconst [c] (ROLLconst [d] x))
 	// result: (ROLLconst [(c+d)&31] x)
 	for {
-		c := v.AuxInt
+		c := auxIntToInt8(v.AuxInt)
 		if v_0.Op != OpAMD64ROLLconst {
 			break
 		}
-		d := v_0.AuxInt
+		d := auxIntToInt8(v_0.AuxInt)
 		x := v_0.Args[0]
 		v.reset(OpAMD64ROLLconst)
-		v.AuxInt = (c + d) & 31
+		v.AuxInt = int8ToAuxInt((c + d) & 31)
 		v.AddArg(x)
 		return true
 	}
 	// match: (ROLLconst x [0])
 	// result: x
 	for {
-		if v.AuxInt != 0 {
+		if auxIntToInt8(v.AuxInt) != 0 {
 			break
 		}
 		x := v_0
@@ -20275,28 +20236,28 @@
 		return true
 	}
 	// match: (ROLQ x (MOVQconst [c]))
-	// result: (ROLQconst [c&63] x)
+	// result: (ROLQconst [int8(c&63)] x)
 	for {
 		x := v_0
 		if v_1.Op != OpAMD64MOVQconst {
 			break
 		}
-		c := v_1.AuxInt
+		c := auxIntToInt64(v_1.AuxInt)
 		v.reset(OpAMD64ROLQconst)
-		v.AuxInt = c & 63
+		v.AuxInt = int8ToAuxInt(int8(c & 63))
 		v.AddArg(x)
 		return true
 	}
 	// match: (ROLQ x (MOVLconst [c]))
-	// result: (ROLQconst [c&63] x)
+	// result: (ROLQconst [int8(c&63)] x)
 	for {
 		x := v_0
 		if v_1.Op != OpAMD64MOVLconst {
 			break
 		}
-		c := v_1.AuxInt
+		c := auxIntToInt32(v_1.AuxInt)
 		v.reset(OpAMD64ROLQconst)
-		v.AuxInt = c & 63
+		v.AuxInt = int8ToAuxInt(int8(c & 63))
 		v.AddArg(x)
 		return true
 	}
@@ -20307,21 +20268,21 @@
 	// match: (ROLQconst [c] (ROLQconst [d] x))
 	// result: (ROLQconst [(c+d)&63] x)
 	for {
-		c := v.AuxInt
+		c := auxIntToInt8(v.AuxInt)
 		if v_0.Op != OpAMD64ROLQconst {
 			break
 		}
-		d := v_0.AuxInt
+		d := auxIntToInt8(v_0.AuxInt)
 		x := v_0.Args[0]
 		v.reset(OpAMD64ROLQconst)
-		v.AuxInt = (c + d) & 63
+		v.AuxInt = int8ToAuxInt((c + d) & 63)
 		v.AddArg(x)
 		return true
 	}
 	// match: (ROLQconst x [0])
 	// result: x
 	for {
-		if v.AuxInt != 0 {
+		if auxIntToInt8(v.AuxInt) != 0 {
 			break
 		}
 		x := v_0
@@ -20358,28 +20319,28 @@
 		return true
 	}
 	// match: (ROLW x (MOVQconst [c]))
-	// result: (ROLWconst [c&15] x)
+	// result: (ROLWconst [int8(c&15)] x)
 	for {
 		x := v_0
 		if v_1.Op != OpAMD64MOVQconst {
 			break
 		}
-		c := v_1.AuxInt
+		c := auxIntToInt64(v_1.AuxInt)
 		v.reset(OpAMD64ROLWconst)
-		v.AuxInt = c & 15
+		v.AuxInt = int8ToAuxInt(int8(c & 15))
 		v.AddArg(x)
 		return true
 	}
 	// match: (ROLW x (MOVLconst [c]))
-	// result: (ROLWconst [c&15] x)
+	// result: (ROLWconst [int8(c&15)] x)
 	for {
 		x := v_0
 		if v_1.Op != OpAMD64MOVLconst {
 			break
 		}
-		c := v_1.AuxInt
+		c := auxIntToInt32(v_1.AuxInt)
 		v.reset(OpAMD64ROLWconst)
-		v.AuxInt = c & 15
+		v.AuxInt = int8ToAuxInt(int8(c & 15))
 		v.AddArg(x)
 		return true
 	}
@@ -20390,21 +20351,21 @@
 	// match: (ROLWconst [c] (ROLWconst [d] x))
 	// result: (ROLWconst [(c+d)&15] x)
 	for {
-		c := v.AuxInt
+		c := auxIntToInt8(v.AuxInt)
 		if v_0.Op != OpAMD64ROLWconst {
 			break
 		}
-		d := v_0.AuxInt
+		d := auxIntToInt8(v_0.AuxInt)
 		x := v_0.Args[0]
 		v.reset(OpAMD64ROLWconst)
-		v.AuxInt = (c + d) & 15
+		v.AuxInt = int8ToAuxInt((c + d) & 15)
 		v.AddArg(x)
 		return true
 	}
 	// match: (ROLWconst x [0])
 	// result: x
 	for {
-		if v.AuxInt != 0 {
+		if auxIntToInt8(v.AuxInt) != 0 {
 			break
 		}
 		x := v_0
@@ -20441,28 +20402,28 @@
 		return true
 	}
 	// match: (RORB x (MOVQconst [c]))
-	// result: (ROLBconst [(-c)&7 ] x)
+	// result: (ROLBconst [int8((-c)&7) ] x)
 	for {
 		x := v_0
 		if v_1.Op != OpAMD64MOVQconst {
 			break
 		}
-		c := v_1.AuxInt
+		c := auxIntToInt64(v_1.AuxInt)
 		v.reset(OpAMD64ROLBconst)
-		v.AuxInt = (-c) & 7
+		v.AuxInt = int8ToAuxInt(int8((-c) & 7))
 		v.AddArg(x)
 		return true
 	}
 	// match: (RORB x (MOVLconst [c]))
-	// result: (ROLBconst [(-c)&7 ] x)
+	// result: (ROLBconst [int8((-c)&7) ] x)
 	for {
 		x := v_0
 		if v_1.Op != OpAMD64MOVLconst {
 			break
 		}
-		c := v_1.AuxInt
+		c := auxIntToInt32(v_1.AuxInt)
 		v.reset(OpAMD64ROLBconst)
-		v.AuxInt = (-c) & 7
+		v.AuxInt = int8ToAuxInt(int8((-c) & 7))
 		v.AddArg(x)
 		return true
 	}
@@ -20496,28 +20457,28 @@
 		return true
 	}
 	// match: (RORL x (MOVQconst [c]))
-	// result: (ROLLconst [(-c)&31] x)
+	// result: (ROLLconst [int8((-c)&31)] x)
 	for {
 		x := v_0
 		if v_1.Op != OpAMD64MOVQconst {
 			break
 		}
-		c := v_1.AuxInt
+		c := auxIntToInt64(v_1.AuxInt)
 		v.reset(OpAMD64ROLLconst)
-		v.AuxInt = (-c) & 31
+		v.AuxInt = int8ToAuxInt(int8((-c) & 31))
 		v.AddArg(x)
 		return true
 	}
 	// match: (RORL x (MOVLconst [c]))
-	// result: (ROLLconst [(-c)&31] x)
+	// result: (ROLLconst [int8((-c)&31)] x)
 	for {
 		x := v_0
 		if v_1.Op != OpAMD64MOVLconst {
 			break
 		}
-		c := v_1.AuxInt
+		c := auxIntToInt32(v_1.AuxInt)
 		v.reset(OpAMD64ROLLconst)
-		v.AuxInt = (-c) & 31
+		v.AuxInt = int8ToAuxInt(int8((-c) & 31))
 		v.AddArg(x)
 		return true
 	}
@@ -20551,28 +20512,28 @@
 		return true
 	}
 	// match: (RORQ x (MOVQconst [c]))
-	// result: (ROLQconst [(-c)&63] x)
+	// result: (ROLQconst [int8((-c)&63)] x)
 	for {
 		x := v_0
 		if v_1.Op != OpAMD64MOVQconst {
 			break
 		}
-		c := v_1.AuxInt
+		c := auxIntToInt64(v_1.AuxInt)
 		v.reset(OpAMD64ROLQconst)
-		v.AuxInt = (-c) & 63
+		v.AuxInt = int8ToAuxInt(int8((-c) & 63))
 		v.AddArg(x)
 		return true
 	}
 	// match: (RORQ x (MOVLconst [c]))
-	// result: (ROLQconst [(-c)&63] x)
+	// result: (ROLQconst [int8((-c)&63)] x)
 	for {
 		x := v_0
 		if v_1.Op != OpAMD64MOVLconst {
 			break
 		}
-		c := v_1.AuxInt
+		c := auxIntToInt32(v_1.AuxInt)
 		v.reset(OpAMD64ROLQconst)
-		v.AuxInt = (-c) & 63
+		v.AuxInt = int8ToAuxInt(int8((-c) & 63))
 		v.AddArg(x)
 		return true
 	}
@@ -20606,28 +20567,28 @@
 		return true
 	}
 	// match: (RORW x (MOVQconst [c]))
-	// result: (ROLWconst [(-c)&15] x)
+	// result: (ROLWconst [int8((-c)&15)] x)
 	for {
 		x := v_0
 		if v_1.Op != OpAMD64MOVQconst {
 			break
 		}
-		c := v_1.AuxInt
+		c := auxIntToInt64(v_1.AuxInt)
 		v.reset(OpAMD64ROLWconst)
-		v.AuxInt = (-c) & 15
+		v.AuxInt = int8ToAuxInt(int8((-c) & 15))
 		v.AddArg(x)
 		return true
 	}
 	// match: (RORW x (MOVLconst [c]))
-	// result: (ROLWconst [(-c)&15] x)
+	// result: (ROLWconst [int8((-c)&15)] x)
 	for {
 		x := v_0
 		if v_1.Op != OpAMD64MOVLconst {
 			break
 		}
-		c := v_1.AuxInt
+		c := auxIntToInt32(v_1.AuxInt)
 		v.reset(OpAMD64ROLWconst)
-		v.AuxInt = (-c) & 15
+		v.AuxInt = int8ToAuxInt(int8((-c) & 15))
 		v.AddArg(x)
 		return true
 	}
@@ -20637,28 +20598,28 @@
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
 	// match: (SARB x (MOVQconst [c]))
-	// result: (SARBconst [min(c&31,7)] x)
+	// result: (SARBconst [int8(min(int64(c)&31,7))] x)
 	for {
 		x := v_0
 		if v_1.Op != OpAMD64MOVQconst {
 			break
 		}
-		c := v_1.AuxInt
+		c := auxIntToInt64(v_1.AuxInt)
 		v.reset(OpAMD64SARBconst)
-		v.AuxInt = min(c&31, 7)
+		v.AuxInt = int8ToAuxInt(int8(min(int64(c)&31, 7)))
 		v.AddArg(x)
 		return true
 	}
 	// match: (SARB x (MOVLconst [c]))
-	// result: (SARBconst [min(c&31,7)] x)
+	// result: (SARBconst [int8(min(int64(c)&31,7))] x)
 	for {
 		x := v_0
 		if v_1.Op != OpAMD64MOVLconst {
 			break
 		}
-		c := v_1.AuxInt
+		c := auxIntToInt32(v_1.AuxInt)
 		v.reset(OpAMD64SARBconst)
-		v.AuxInt = min(c&31, 7)
+		v.AuxInt = int8ToAuxInt(int8(min(int64(c)&31, 7)))
 		v.AddArg(x)
 		return true
 	}
@@ -20669,7 +20630,7 @@
 	// match: (SARBconst x [0])
 	// result: x
 	for {
-		if v.AuxInt != 0 {
+		if auxIntToInt8(v.AuxInt) != 0 {
 			break
 		}
 		x := v_0
@@ -20695,28 +20656,28 @@
 	v_0 := v.Args[0]
 	b := v.Block
 	// match: (SARL x (MOVQconst [c]))
-	// result: (SARLconst [c&31] x)
+	// result: (SARLconst [int8(c&31)] x)
 	for {
 		x := v_0
 		if v_1.Op != OpAMD64MOVQconst {
 			break
 		}
-		c := v_1.AuxInt
+		c := auxIntToInt64(v_1.AuxInt)
 		v.reset(OpAMD64SARLconst)
-		v.AuxInt = c & 31
+		v.AuxInt = int8ToAuxInt(int8(c & 31))
 		v.AddArg(x)
 		return true
 	}
 	// match: (SARL x (MOVLconst [c]))
-	// result: (SARLconst [c&31] x)
+	// result: (SARLconst [int8(c&31)] x)
 	for {
 		x := v_0
 		if v_1.Op != OpAMD64MOVLconst {
 			break
 		}
-		c := v_1.AuxInt
+		c := auxIntToInt32(v_1.AuxInt)
 		v.reset(OpAMD64SARLconst)
-		v.AuxInt = c & 31
+		v.AuxInt = int8ToAuxInt(int8(c & 31))
 		v.AddArg(x)
 		return true
 	}
@@ -20728,7 +20689,7 @@
 		if v_1.Op != OpAMD64ADDQconst {
 			break
 		}
-		c := v_1.AuxInt
+		c := auxIntToInt32(v_1.AuxInt)
 		y := v_1.Args[0]
 		if !(c&31 == 0) {
 			break
@@ -20750,7 +20711,7 @@
 		if v_1_0.Op != OpAMD64ADDQconst {
 			break
 		}
-		c := v_1_0.AuxInt
+		c := auxIntToInt32(v_1_0.AuxInt)
 		y := v_1_0.Args[0]
 		if !(c&31 == 0) {
 			break
@@ -20769,7 +20730,7 @@
 		if v_1.Op != OpAMD64ANDQconst {
 			break
 		}
-		c := v_1.AuxInt
+		c := auxIntToInt32(v_1.AuxInt)
 		y := v_1.Args[0]
 		if !(c&31 == 31) {
 			break
@@ -20791,7 +20752,7 @@
 		if v_1_0.Op != OpAMD64ANDQconst {
 			break
 		}
-		c := v_1_0.AuxInt
+		c := auxIntToInt32(v_1_0.AuxInt)
 		y := v_1_0.Args[0]
 		if !(c&31 == 31) {
 			break
@@ -20810,7 +20771,7 @@
 		if v_1.Op != OpAMD64ADDLconst {
 			break
 		}
-		c := v_1.AuxInt
+		c := auxIntToInt32(v_1.AuxInt)
 		y := v_1.Args[0]
 		if !(c&31 == 0) {
 			break
@@ -20832,7 +20793,7 @@
 		if v_1_0.Op != OpAMD64ADDLconst {
 			break
 		}
-		c := v_1_0.AuxInt
+		c := auxIntToInt32(v_1_0.AuxInt)
 		y := v_1_0.Args[0]
 		if !(c&31 == 0) {
 			break
@@ -20851,7 +20812,7 @@
 		if v_1.Op != OpAMD64ANDLconst {
 			break
 		}
-		c := v_1.AuxInt
+		c := auxIntToInt32(v_1.AuxInt)
 		y := v_1.Args[0]
 		if !(c&31 == 31) {
 			break
@@ -20873,7 +20834,7 @@
 		if v_1_0.Op != OpAMD64ANDLconst {
 			break
 		}
-		c := v_1_0.AuxInt
+		c := auxIntToInt32(v_1_0.AuxInt)
 		y := v_1_0.Args[0]
 		if !(c&31 == 31) {
 			break
@@ -20891,7 +20852,7 @@
 	// match: (SARLconst x [0])
 	// result: x
 	for {
-		if v.AuxInt != 0 {
+		if auxIntToInt8(v.AuxInt) != 0 {
 			break
 		}
 		x := v_0
@@ -20917,28 +20878,28 @@
 	v_0 := v.Args[0]
 	b := v.Block
 	// match: (SARQ x (MOVQconst [c]))
-	// result: (SARQconst [c&63] x)
+	// result: (SARQconst [int8(c&63)] x)
 	for {
 		x := v_0
 		if v_1.Op != OpAMD64MOVQconst {
 			break
 		}
-		c := v_1.AuxInt
+		c := auxIntToInt64(v_1.AuxInt)
 		v.reset(OpAMD64SARQconst)
-		v.AuxInt = c & 63
+		v.AuxInt = int8ToAuxInt(int8(c & 63))
 		v.AddArg(x)
 		return true
 	}
 	// match: (SARQ x (MOVLconst [c]))
-	// result: (SARQconst [c&63] x)
+	// result: (SARQconst [int8(c&63)] x)
 	for {
 		x := v_0
 		if v_1.Op != OpAMD64MOVLconst {
 			break
 		}
-		c := v_1.AuxInt
+		c := auxIntToInt32(v_1.AuxInt)
 		v.reset(OpAMD64SARQconst)
-		v.AuxInt = c & 63
+		v.AuxInt = int8ToAuxInt(int8(c & 63))
 		v.AddArg(x)
 		return true
 	}
@@ -20950,7 +20911,7 @@
 		if v_1.Op != OpAMD64ADDQconst {
 			break
 		}
-		c := v_1.AuxInt
+		c := auxIntToInt32(v_1.AuxInt)
 		y := v_1.Args[0]
 		if !(c&63 == 0) {
 			break
@@ -20972,7 +20933,7 @@
 		if v_1_0.Op != OpAMD64ADDQconst {
 			break
 		}
-		c := v_1_0.AuxInt
+		c := auxIntToInt32(v_1_0.AuxInt)
 		y := v_1_0.Args[0]
 		if !(c&63 == 0) {
 			break
@@ -20991,7 +20952,7 @@
 		if v_1.Op != OpAMD64ANDQconst {
 			break
 		}
-		c := v_1.AuxInt
+		c := auxIntToInt32(v_1.AuxInt)
 		y := v_1.Args[0]
 		if !(c&63 == 63) {
 			break
@@ -21013,7 +20974,7 @@
 		if v_1_0.Op != OpAMD64ANDQconst {
 			break
 		}
-		c := v_1_0.AuxInt
+		c := auxIntToInt32(v_1_0.AuxInt)
 		y := v_1_0.Args[0]
 		if !(c&63 == 63) {
 			break
@@ -21032,7 +20993,7 @@
 		if v_1.Op != OpAMD64ADDLconst {
 			break
 		}
-		c := v_1.AuxInt
+		c := auxIntToInt32(v_1.AuxInt)
 		y := v_1.Args[0]
 		if !(c&63 == 0) {
 			break
@@ -21054,7 +21015,7 @@
 		if v_1_0.Op != OpAMD64ADDLconst {
 			break
 		}
-		c := v_1_0.AuxInt
+		c := auxIntToInt32(v_1_0.AuxInt)
 		y := v_1_0.Args[0]
 		if !(c&63 == 0) {
 			break
@@ -21073,7 +21034,7 @@
 		if v_1.Op != OpAMD64ANDLconst {
 			break
 		}
-		c := v_1.AuxInt
+		c := auxIntToInt32(v_1.AuxInt)
 		y := v_1.Args[0]
 		if !(c&63 == 63) {
 			break
@@ -21095,7 +21056,7 @@
 		if v_1_0.Op != OpAMD64ANDLconst {
 			break
 		}
-		c := v_1_0.AuxInt
+		c := auxIntToInt32(v_1_0.AuxInt)
 		y := v_1_0.Args[0]
 		if !(c&63 == 63) {
 			break
@@ -21113,7 +21074,7 @@
 	// match: (SARQconst x [0])
 	// result: x
 	for {
-		if v.AuxInt != 0 {
+		if auxIntToInt8(v.AuxInt) != 0 {
 			break
 		}
 		x := v_0
@@ -21138,28 +21099,28 @@
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
 	// match: (SARW x (MOVQconst [c]))
-	// result: (SARWconst [min(c&31,15)] x)
+	// result: (SARWconst [int8(min(int64(c)&31,15))] x)
 	for {
 		x := v_0
 		if v_1.Op != OpAMD64MOVQconst {
 			break
 		}
-		c := v_1.AuxInt
+		c := auxIntToInt64(v_1.AuxInt)
 		v.reset(OpAMD64SARWconst)
-		v.AuxInt = min(c&31, 15)
+		v.AuxInt = int8ToAuxInt(int8(min(int64(c)&31, 15)))
 		v.AddArg(x)
 		return true
 	}
 	// match: (SARW x (MOVLconst [c]))
-	// result: (SARWconst [min(c&31,15)] x)
+	// result: (SARWconst [int8(min(int64(c)&31,15))] x)
 	for {
 		x := v_0
 		if v_1.Op != OpAMD64MOVLconst {
 			break
 		}
-		c := v_1.AuxInt
+		c := auxIntToInt32(v_1.AuxInt)
 		v.reset(OpAMD64SARWconst)
-		v.AuxInt = min(c&31, 15)
+		v.AuxInt = int8ToAuxInt(int8(min(int64(c)&31, 15)))
 		v.AddArg(x)
 		return true
 	}
@@ -21170,7 +21131,7 @@
 	// match: (SARWconst x [0])
 	// result: x
 	for {
-		if v.AuxInt != 0 {
+		if auxIntToInt8(v.AuxInt) != 0 {
 			break
 		}
 		x := v_0
@@ -21421,7 +21382,7 @@
 func rewriteValueAMD64_OpAMD64SETAE(v *Value) bool {
 	v_0 := v.Args[0]
 	// match: (SETAE (TESTQ x x))
-	// result: (ConstBool [1])
+	// result: (ConstBool [true])
 	for {
 		if v_0.Op != OpAMD64TESTQ {
 			break
@@ -21431,11 +21392,11 @@
 			break
 		}
 		v.reset(OpConstBool)
-		v.AuxInt = 1
+		v.AuxInt = boolToAuxInt(true)
 		return true
 	}
 	// match: (SETAE (TESTL x x))
-	// result: (ConstBool [1])
+	// result: (ConstBool [true])
 	for {
 		if v_0.Op != OpAMD64TESTL {
 			break
@@ -21445,11 +21406,11 @@
 			break
 		}
 		v.reset(OpConstBool)
-		v.AuxInt = 1
+		v.AuxInt = boolToAuxInt(true)
 		return true
 	}
 	// match: (SETAE (TESTW x x))
-	// result: (ConstBool [1])
+	// result: (ConstBool [true])
 	for {
 		if v_0.Op != OpAMD64TESTW {
 			break
@@ -21459,11 +21420,11 @@
 			break
 		}
 		v.reset(OpConstBool)
-		v.AuxInt = 1
+		v.AuxInt = boolToAuxInt(true)
 		return true
 	}
 	// match: (SETAE (TESTB x x))
-	// result: (ConstBool [1])
+	// result: (ConstBool [true])
 	for {
 		if v_0.Op != OpAMD64TESTB {
 			break
@@ -21473,7 +21434,7 @@
 			break
 		}
 		v.reset(OpConstBool)
-		v.AuxInt = 1
+		v.AuxInt = boolToAuxInt(true)
 		return true
 	}
 	// match: (SETAE (InvertFlags x))
@@ -21548,8 +21509,8 @@
 	// match: (SETAEstore [off] {sym} ptr (InvertFlags x) mem)
 	// result: (SETBEstore [off] {sym} ptr x mem)
 	for {
-		off := v.AuxInt
-		sym := v.Aux
+		off := auxIntToInt32(v.AuxInt)
+		sym := auxToSym(v.Aux)
 		ptr := v_0
 		if v_1.Op != OpAMD64InvertFlags {
 			break
@@ -21557,30 +21518,30 @@
 		x := v_1.Args[0]
 		mem := v_2
 		v.reset(OpAMD64SETBEstore)
-		v.AuxInt = off
-		v.Aux = sym
+		v.AuxInt = int32ToAuxInt(off)
+		v.Aux = symToAux(sym)
 		v.AddArg3(ptr, x, mem)
 		return true
 	}
 	// match: (SETAEstore [off1] {sym} (ADDQconst [off2] base) val mem)
-	// cond: is32Bit(off1+off2)
+	// cond: is32Bit(int64(off1)+int64(off2))
 	// result: (SETAEstore [off1+off2] {sym} base val mem)
 	for {
-		off1 := v.AuxInt
-		sym := v.Aux
+		off1 := auxIntToInt32(v.AuxInt)
+		sym := auxToSym(v.Aux)
 		if v_0.Op != OpAMD64ADDQconst {
 			break
 		}
-		off2 := v_0.AuxInt
+		off2 := auxIntToInt32(v_0.AuxInt)
 		base := v_0.Args[0]
 		val := v_1
 		mem := v_2
-		if !(is32Bit(off1 + off2)) {
+		if !(is32Bit(int64(off1) + int64(off2))) {
 			break
 		}
 		v.reset(OpAMD64SETAEstore)
-		v.AuxInt = off1 + off2
-		v.Aux = sym
+		v.AuxInt = int32ToAuxInt(off1 + off2)
+		v.Aux = symToAux(sym)
 		v.AddArg3(base, val, mem)
 		return true
 	}
@@ -21708,8 +21669,8 @@
 	// match: (SETAstore [off] {sym} ptr (InvertFlags x) mem)
 	// result: (SETBstore [off] {sym} ptr x mem)
 	for {
-		off := v.AuxInt
-		sym := v.Aux
+		off := auxIntToInt32(v.AuxInt)
+		sym := auxToSym(v.Aux)
 		ptr := v_0
 		if v_1.Op != OpAMD64InvertFlags {
 			break
@@ -21717,30 +21678,30 @@
 		x := v_1.Args[0]
 		mem := v_2
 		v.reset(OpAMD64SETBstore)
-		v.AuxInt = off
-		v.Aux = sym
+		v.AuxInt = int32ToAuxInt(off)
+		v.Aux = symToAux(sym)
 		v.AddArg3(ptr, x, mem)
 		return true
 	}
 	// match: (SETAstore [off1] {sym} (ADDQconst [off2] base) val mem)
-	// cond: is32Bit(off1+off2)
+	// cond: is32Bit(int64(off1)+int64(off2))
 	// result: (SETAstore [off1+off2] {sym} base val mem)
 	for {
-		off1 := v.AuxInt
-		sym := v.Aux
+		off1 := auxIntToInt32(v.AuxInt)
+		sym := auxToSym(v.Aux)
 		if v_0.Op != OpAMD64ADDQconst {
 			break
 		}
-		off2 := v_0.AuxInt
+		off2 := auxIntToInt32(v_0.AuxInt)
 		base := v_0.Args[0]
 		val := v_1
 		mem := v_2
-		if !(is32Bit(off1 + off2)) {
+		if !(is32Bit(int64(off1) + int64(off2))) {
 			break
 		}
 		v.reset(OpAMD64SETAstore)
-		v.AuxInt = off1 + off2
-		v.Aux = sym
+		v.AuxInt = int32ToAuxInt(off1 + off2)
+		v.Aux = symToAux(sym)
 		v.AddArg3(base, val, mem)
 		return true
 	}
@@ -21862,7 +21823,7 @@
 func rewriteValueAMD64_OpAMD64SETB(v *Value) bool {
 	v_0 := v.Args[0]
 	// match: (SETB (TESTQ x x))
-	// result: (ConstBool [0])
+	// result: (ConstBool [false])
 	for {
 		if v_0.Op != OpAMD64TESTQ {
 			break
@@ -21872,11 +21833,11 @@
 			break
 		}
 		v.reset(OpConstBool)
-		v.AuxInt = 0
+		v.AuxInt = boolToAuxInt(false)
 		return true
 	}
 	// match: (SETB (TESTL x x))
-	// result: (ConstBool [0])
+	// result: (ConstBool [false])
 	for {
 		if v_0.Op != OpAMD64TESTL {
 			break
@@ -21886,11 +21847,11 @@
 			break
 		}
 		v.reset(OpConstBool)
-		v.AuxInt = 0
+		v.AuxInt = boolToAuxInt(false)
 		return true
 	}
 	// match: (SETB (TESTW x x))
-	// result: (ConstBool [0])
+	// result: (ConstBool [false])
 	for {
 		if v_0.Op != OpAMD64TESTW {
 			break
@@ -21900,11 +21861,11 @@
 			break
 		}
 		v.reset(OpConstBool)
-		v.AuxInt = 0
+		v.AuxInt = boolToAuxInt(false)
 		return true
 	}
 	// match: (SETB (TESTB x x))
-	// result: (ConstBool [0])
+	// result: (ConstBool [false])
 	for {
 		if v_0.Op != OpAMD64TESTB {
 			break
@@ -21914,7 +21875,7 @@
 			break
 		}
 		v.reset(OpConstBool)
-		v.AuxInt = 0
+		v.AuxInt = boolToAuxInt(false)
 		return true
 	}
 	// match: (SETB (BTLconst [0] x))
@@ -22078,8 +22039,8 @@
 	// match: (SETBEstore [off] {sym} ptr (InvertFlags x) mem)
 	// result: (SETAEstore [off] {sym} ptr x mem)
 	for {
-		off := v.AuxInt
-		sym := v.Aux
+		off := auxIntToInt32(v.AuxInt)
+		sym := auxToSym(v.Aux)
 		ptr := v_0
 		if v_1.Op != OpAMD64InvertFlags {
 			break
@@ -22087,30 +22048,30 @@
 		x := v_1.Args[0]
 		mem := v_2
 		v.reset(OpAMD64SETAEstore)
-		v.AuxInt = off
-		v.Aux = sym
+		v.AuxInt = int32ToAuxInt(off)
+		v.Aux = symToAux(sym)
 		v.AddArg3(ptr, x, mem)
 		return true
 	}
 	// match: (SETBEstore [off1] {sym} (ADDQconst [off2] base) val mem)
-	// cond: is32Bit(off1+off2)
+	// cond: is32Bit(int64(off1)+int64(off2))
 	// result: (SETBEstore [off1+off2] {sym} base val mem)
 	for {
-		off1 := v.AuxInt
-		sym := v.Aux
+		off1 := auxIntToInt32(v.AuxInt)
+		sym := auxToSym(v.Aux)
 		if v_0.Op != OpAMD64ADDQconst {
 			break
 		}
-		off2 := v_0.AuxInt
+		off2 := auxIntToInt32(v_0.AuxInt)
 		base := v_0.Args[0]
 		val := v_1
 		mem := v_2
-		if !(is32Bit(off1 + off2)) {
+		if !(is32Bit(int64(off1) + int64(off2))) {
 			break
 		}
 		v.reset(OpAMD64SETBEstore)
-		v.AuxInt = off1 + off2
-		v.Aux = sym
+		v.AuxInt = int32ToAuxInt(off1 + off2)
+		v.Aux = symToAux(sym)
 		v.AddArg3(base, val, mem)
 		return true
 	}
@@ -22238,8 +22199,8 @@
 	// match: (SETBstore [off] {sym} ptr (InvertFlags x) mem)
 	// result: (SETAstore [off] {sym} ptr x mem)
 	for {
-		off := v.AuxInt
-		sym := v.Aux
+		off := auxIntToInt32(v.AuxInt)
+		sym := auxToSym(v.Aux)
 		ptr := v_0
 		if v_1.Op != OpAMD64InvertFlags {
 			break
@@ -22247,30 +22208,30 @@
 		x := v_1.Args[0]
 		mem := v_2
 		v.reset(OpAMD64SETAstore)
-		v.AuxInt = off
-		v.Aux = sym
+		v.AuxInt = int32ToAuxInt(off)
+		v.Aux = symToAux(sym)
 		v.AddArg3(ptr, x, mem)
 		return true
 	}
 	// match: (SETBstore [off1] {sym} (ADDQconst [off2] base) val mem)
-	// cond: is32Bit(off1+off2)
+	// cond: is32Bit(int64(off1)+int64(off2))
 	// result: (SETBstore [off1+off2] {sym} base val mem)
 	for {
-		off1 := v.AuxInt
-		sym := v.Aux
+		off1 := auxIntToInt32(v.AuxInt)
+		sym := auxToSym(v.Aux)
 		if v_0.Op != OpAMD64ADDQconst {
 			break
 		}
-		off2 := v_0.AuxInt
+		off2 := auxIntToInt32(v_0.AuxInt)
 		base := v_0.Args[0]
 		val := v_1
 		mem := v_2
-		if !(is32Bit(off1 + off2)) {
+		if !(is32Bit(int64(off1) + int64(off2))) {
 			break
 		}
 		v.reset(OpAMD64SETBstore)
-		v.AuxInt = off1 + off2
-		v.Aux = sym
+		v.AuxInt = int32ToAuxInt(off1 + off2)
+		v.Aux = symToAux(sym)
 		v.AddArg3(base, val, mem)
 		return true
 	}
@@ -22407,7 +22368,7 @@
 			}
 			x := v_0_0.Args[1]
 			v_0_0_0 := v_0_0.Args[0]
-			if v_0_0_0.Op != OpAMD64MOVLconst || v_0_0_0.AuxInt != 1 {
+			if v_0_0_0.Op != OpAMD64MOVLconst || auxIntToInt32(v_0_0_0.AuxInt) != 1 {
 				continue
 			}
 			y := v_0_1
@@ -22434,7 +22395,7 @@
 			}
 			x := v_0_0.Args[1]
 			v_0_0_0 := v_0_0.Args[0]
-			if v_0_0_0.Op != OpAMD64MOVQconst || v_0_0_0.AuxInt != 1 {
+			if v_0_0_0.Op != OpAMD64MOVQconst || auxIntToInt64(v_0_0_0.AuxInt) != 1 {
 				continue
 			}
 			y := v_0_1
@@ -22447,46 +22408,46 @@
 		break
 	}
 	// match: (SETEQ (TESTLconst [c] x))
-	// cond: isUint32PowerOfTwo(c)
-	// result: (SETAE (BTLconst [log2uint32(c)] x))
+	// cond: isUint32PowerOfTwo(int64(c))
+	// result: (SETAE (BTLconst [int8(log32(c))] x))
 	for {
 		if v_0.Op != OpAMD64TESTLconst {
 			break
 		}
-		c := v_0.AuxInt
+		c := auxIntToInt32(v_0.AuxInt)
 		x := v_0.Args[0]
-		if !(isUint32PowerOfTwo(c)) {
+		if !(isUint32PowerOfTwo(int64(c))) {
 			break
 		}
 		v.reset(OpAMD64SETAE)
 		v0 := b.NewValue0(v.Pos, OpAMD64BTLconst, types.TypeFlags)
-		v0.AuxInt = log2uint32(c)
+		v0.AuxInt = int8ToAuxInt(int8(log32(c)))
 		v0.AddArg(x)
 		v.AddArg(v0)
 		return true
 	}
 	// match: (SETEQ (TESTQconst [c] x))
-	// cond: isUint64PowerOfTwo(c)
-	// result: (SETAE (BTQconst [log2(c)] x))
+	// cond: isUint64PowerOfTwo(int64(c))
+	// result: (SETAE (BTQconst [int8(log32(c))] x))
 	for {
 		if v_0.Op != OpAMD64TESTQconst {
 			break
 		}
-		c := v_0.AuxInt
+		c := auxIntToInt32(v_0.AuxInt)
 		x := v_0.Args[0]
-		if !(isUint64PowerOfTwo(c)) {
+		if !(isUint64PowerOfTwo(int64(c))) {
 			break
 		}
 		v.reset(OpAMD64SETAE)
 		v0 := b.NewValue0(v.Pos, OpAMD64BTQconst, types.TypeFlags)
-		v0.AuxInt = log2(c)
+		v0.AuxInt = int8ToAuxInt(int8(log32(c)))
 		v0.AddArg(x)
 		v.AddArg(v0)
 		return true
 	}
 	// match: (SETEQ (TESTQ (MOVQconst [c]) x))
 	// cond: isUint64PowerOfTwo(c)
-	// result: (SETAE (BTQconst [log2(c)] x))
+	// result: (SETAE (BTQconst [int8(log2(c))] x))
 	for {
 		if v_0.Op != OpAMD64TESTQ {
 			break
@@ -22498,14 +22459,14 @@
 			if v_0_0.Op != OpAMD64MOVQconst {
 				continue
 			}
-			c := v_0_0.AuxInt
+			c := auxIntToInt64(v_0_0.AuxInt)
 			x := v_0_1
 			if !(isUint64PowerOfTwo(c)) {
 				continue
 			}
 			v.reset(OpAMD64SETAE)
 			v0 := b.NewValue0(v.Pos, OpAMD64BTQconst, types.TypeFlags)
-			v0.AuxInt = log2(c)
+			v0.AuxInt = int8ToAuxInt(int8(log2(c)))
 			v0.AddArg(x)
 			v.AddArg(v0)
 			return true
@@ -22515,16 +22476,16 @@
 	// match: (SETEQ (CMPLconst [1] s:(ANDLconst [1] _)))
 	// result: (SETNE (CMPLconst [0] s))
 	for {
-		if v_0.Op != OpAMD64CMPLconst || v_0.AuxInt != 1 {
+		if v_0.Op != OpAMD64CMPLconst || auxIntToInt32(v_0.AuxInt) != 1 {
 			break
 		}
 		s := v_0.Args[0]
-		if s.Op != OpAMD64ANDLconst || s.AuxInt != 1 {
+		if s.Op != OpAMD64ANDLconst || auxIntToInt32(s.AuxInt) != 1 {
 			break
 		}
 		v.reset(OpAMD64SETNE)
 		v0 := b.NewValue0(v.Pos, OpAMD64CMPLconst, types.TypeFlags)
-		v0.AuxInt = 0
+		v0.AuxInt = int32ToAuxInt(0)
 		v0.AddArg(s)
 		v.AddArg(v0)
 		return true
@@ -22532,16 +22493,16 @@
 	// match: (SETEQ (CMPQconst [1] s:(ANDQconst [1] _)))
 	// result: (SETNE (CMPQconst [0] s))
 	for {
-		if v_0.Op != OpAMD64CMPQconst || v_0.AuxInt != 1 {
+		if v_0.Op != OpAMD64CMPQconst || auxIntToInt32(v_0.AuxInt) != 1 {
 			break
 		}
 		s := v_0.Args[0]
-		if s.Op != OpAMD64ANDQconst || s.AuxInt != 1 {
+		if s.Op != OpAMD64ANDQconst || auxIntToInt32(s.AuxInt) != 1 {
 			break
 		}
 		v.reset(OpAMD64SETNE)
 		v0 := b.NewValue0(v.Pos, OpAMD64CMPQconst, types.TypeFlags)
-		v0.AuxInt = 0
+		v0.AuxInt = int32ToAuxInt(0)
 		v0.AddArg(s)
 		v.AddArg(v0)
 		return true
@@ -22558,11 +22519,11 @@
 		v_0_1 := v_0.Args[1]
 		for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 {
 			z1 := v_0_0
-			if z1.Op != OpAMD64SHLQconst || z1.AuxInt != 63 {
+			if z1.Op != OpAMD64SHLQconst || auxIntToInt8(z1.AuxInt) != 63 {
 				continue
 			}
 			z1_0 := z1.Args[0]
-			if z1_0.Op != OpAMD64SHRQconst || z1_0.AuxInt != 63 {
+			if z1_0.Op != OpAMD64SHRQconst || auxIntToInt8(z1_0.AuxInt) != 63 {
 				continue
 			}
 			x := z1_0.Args[0]
@@ -22572,7 +22533,7 @@
 			}
 			v.reset(OpAMD64SETAE)
 			v0 := b.NewValue0(v.Pos, OpAMD64BTQconst, types.TypeFlags)
-			v0.AuxInt = 63
+			v0.AuxInt = int8ToAuxInt(63)
 			v0.AddArg(x)
 			v.AddArg(v0)
 			return true
@@ -22591,11 +22552,11 @@
 		v_0_1 := v_0.Args[1]
 		for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 {
 			z1 := v_0_0
-			if z1.Op != OpAMD64SHLLconst || z1.AuxInt != 31 {
+			if z1.Op != OpAMD64SHLLconst || auxIntToInt8(z1.AuxInt) != 31 {
 				continue
 			}
 			z1_0 := z1.Args[0]
-			if z1_0.Op != OpAMD64SHRQconst || z1_0.AuxInt != 31 {
+			if z1_0.Op != OpAMD64SHRQconst || auxIntToInt8(z1_0.AuxInt) != 31 {
 				continue
 			}
 			x := z1_0.Args[0]
@@ -22605,7 +22566,7 @@
 			}
 			v.reset(OpAMD64SETAE)
 			v0 := b.NewValue0(v.Pos, OpAMD64BTQconst, types.TypeFlags)
-			v0.AuxInt = 31
+			v0.AuxInt = int8ToAuxInt(31)
 			v0.AddArg(x)
 			v.AddArg(v0)
 			return true
@@ -22624,11 +22585,11 @@
 		v_0_1 := v_0.Args[1]
 		for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 {
 			z1 := v_0_0
-			if z1.Op != OpAMD64SHRQconst || z1.AuxInt != 63 {
+			if z1.Op != OpAMD64SHRQconst || auxIntToInt8(z1.AuxInt) != 63 {
 				continue
 			}
 			z1_0 := z1.Args[0]
-			if z1_0.Op != OpAMD64SHLQconst || z1_0.AuxInt != 63 {
+			if z1_0.Op != OpAMD64SHLQconst || auxIntToInt8(z1_0.AuxInt) != 63 {
 				continue
 			}
 			x := z1_0.Args[0]
@@ -22638,7 +22599,7 @@
 			}
 			v.reset(OpAMD64SETAE)
 			v0 := b.NewValue0(v.Pos, OpAMD64BTQconst, types.TypeFlags)
-			v0.AuxInt = 0
+			v0.AuxInt = int8ToAuxInt(0)
 			v0.AddArg(x)
 			v.AddArg(v0)
 			return true
@@ -22657,11 +22618,11 @@
 		v_0_1 := v_0.Args[1]
 		for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 {
 			z1 := v_0_0
-			if z1.Op != OpAMD64SHRLconst || z1.AuxInt != 31 {
+			if z1.Op != OpAMD64SHRLconst || auxIntToInt8(z1.AuxInt) != 31 {
 				continue
 			}
 			z1_0 := z1.Args[0]
-			if z1_0.Op != OpAMD64SHLLconst || z1_0.AuxInt != 31 {
+			if z1_0.Op != OpAMD64SHLLconst || auxIntToInt8(z1_0.AuxInt) != 31 {
 				continue
 			}
 			x := z1_0.Args[0]
@@ -22671,7 +22632,7 @@
 			}
 			v.reset(OpAMD64SETAE)
 			v0 := b.NewValue0(v.Pos, OpAMD64BTLconst, types.TypeFlags)
-			v0.AuxInt = 0
+			v0.AuxInt = int8ToAuxInt(0)
 			v0.AddArg(x)
 			v.AddArg(v0)
 			return true
@@ -22690,7 +22651,7 @@
 		v_0_1 := v_0.Args[1]
 		for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 {
 			z1 := v_0_0
-			if z1.Op != OpAMD64SHRQconst || z1.AuxInt != 63 {
+			if z1.Op != OpAMD64SHRQconst || auxIntToInt8(z1.AuxInt) != 63 {
 				continue
 			}
 			x := z1.Args[0]
@@ -22700,7 +22661,7 @@
 			}
 			v.reset(OpAMD64SETAE)
 			v0 := b.NewValue0(v.Pos, OpAMD64BTQconst, types.TypeFlags)
-			v0.AuxInt = 63
+			v0.AuxInt = int8ToAuxInt(63)
 			v0.AddArg(x)
 			v.AddArg(v0)
 			return true
@@ -22719,7 +22680,7 @@
 		v_0_1 := v_0.Args[1]
 		for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 {
 			z1 := v_0_0
-			if z1.Op != OpAMD64SHRLconst || z1.AuxInt != 31 {
+			if z1.Op != OpAMD64SHRLconst || auxIntToInt8(z1.AuxInt) != 31 {
 				continue
 			}
 			x := z1.Args[0]
@@ -22729,7 +22690,7 @@
 			}
 			v.reset(OpAMD64SETAE)
 			v0 := b.NewValue0(v.Pos, OpAMD64BTLconst, types.TypeFlags)
-			v0.AuxInt = 31
+			v0.AuxInt = int8ToAuxInt(31)
 			v0.AddArg(x)
 			v.AddArg(v0)
 			return true
@@ -22808,8 +22769,8 @@
 	// match: (SETEQstore [off] {sym} ptr (TESTL (SHLL (MOVLconst [1]) x) y) mem)
 	// result: (SETAEstore [off] {sym} ptr (BTL x y) mem)
 	for {
-		off := v.AuxInt
-		sym := v.Aux
+		off := auxIntToInt32(v.AuxInt)
+		sym := auxToSym(v.Aux)
 		ptr := v_0
 		if v_1.Op != OpAMD64TESTL {
 			break
@@ -22823,14 +22784,14 @@
 			}
 			x := v_1_0.Args[1]
 			v_1_0_0 := v_1_0.Args[0]
-			if v_1_0_0.Op != OpAMD64MOVLconst || v_1_0_0.AuxInt != 1 {
+			if v_1_0_0.Op != OpAMD64MOVLconst || auxIntToInt32(v_1_0_0.AuxInt) != 1 {
 				continue
 			}
 			y := v_1_1
 			mem := v_2
 			v.reset(OpAMD64SETAEstore)
-			v.AuxInt = off
-			v.Aux = sym
+			v.AuxInt = int32ToAuxInt(off)
+			v.Aux = symToAux(sym)
 			v0 := b.NewValue0(v.Pos, OpAMD64BTL, types.TypeFlags)
 			v0.AddArg2(x, y)
 			v.AddArg3(ptr, v0, mem)
@@ -22841,8 +22802,8 @@
 	// match: (SETEQstore [off] {sym} ptr (TESTQ (SHLQ (MOVQconst [1]) x) y) mem)
 	// result: (SETAEstore [off] {sym} ptr (BTQ x y) mem)
 	for {
-		off := v.AuxInt
-		sym := v.Aux
+		off := auxIntToInt32(v.AuxInt)
+		sym := auxToSym(v.Aux)
 		ptr := v_0
 		if v_1.Op != OpAMD64TESTQ {
 			break
@@ -22856,14 +22817,14 @@
 			}
 			x := v_1_0.Args[1]
 			v_1_0_0 := v_1_0.Args[0]
-			if v_1_0_0.Op != OpAMD64MOVQconst || v_1_0_0.AuxInt != 1 {
+			if v_1_0_0.Op != OpAMD64MOVQconst || auxIntToInt64(v_1_0_0.AuxInt) != 1 {
 				continue
 			}
 			y := v_1_1
 			mem := v_2
 			v.reset(OpAMD64SETAEstore)
-			v.AuxInt = off
-			v.Aux = sym
+			v.AuxInt = int32ToAuxInt(off)
+			v.Aux = symToAux(sym)
 			v0 := b.NewValue0(v.Pos, OpAMD64BTQ, types.TypeFlags)
 			v0.AddArg2(x, y)
 			v.AddArg3(ptr, v0, mem)
@@ -22872,61 +22833,61 @@
 		break
 	}
 	// match: (SETEQstore [off] {sym} ptr (TESTLconst [c] x) mem)
-	// cond: isUint32PowerOfTwo(c)
-	// result: (SETAEstore [off] {sym} ptr (BTLconst [log2uint32(c)] x) mem)
+	// cond: isUint32PowerOfTwo(int64(c))
+	// result: (SETAEstore [off] {sym} ptr (BTLconst [int8(log32(c))] x) mem)
 	for {
-		off := v.AuxInt
-		sym := v.Aux
+		off := auxIntToInt32(v.AuxInt)
+		sym := auxToSym(v.Aux)
 		ptr := v_0
 		if v_1.Op != OpAMD64TESTLconst {
 			break
 		}
-		c := v_1.AuxInt
+		c := auxIntToInt32(v_1.AuxInt)
 		x := v_1.Args[0]
 		mem := v_2
-		if !(isUint32PowerOfTwo(c)) {
+		if !(isUint32PowerOfTwo(int64(c))) {
 			break
 		}
 		v.reset(OpAMD64SETAEstore)
-		v.AuxInt = off
-		v.Aux = sym
+		v.AuxInt = int32ToAuxInt(off)
+		v.Aux = symToAux(sym)
 		v0 := b.NewValue0(v.Pos, OpAMD64BTLconst, types.TypeFlags)
-		v0.AuxInt = log2uint32(c)
+		v0.AuxInt = int8ToAuxInt(int8(log32(c)))
 		v0.AddArg(x)
 		v.AddArg3(ptr, v0, mem)
 		return true
 	}
 	// match: (SETEQstore [off] {sym} ptr (TESTQconst [c] x) mem)
-	// cond: isUint64PowerOfTwo(c)
-	// result: (SETAEstore [off] {sym} ptr (BTQconst [log2(c)] x) mem)
+	// cond: isUint64PowerOfTwo(int64(c))
+	// result: (SETAEstore [off] {sym} ptr (BTQconst [int8(log32(c))] x) mem)
 	for {
-		off := v.AuxInt
-		sym := v.Aux
+		off := auxIntToInt32(v.AuxInt)
+		sym := auxToSym(v.Aux)
 		ptr := v_0
 		if v_1.Op != OpAMD64TESTQconst {
 			break
 		}
-		c := v_1.AuxInt
+		c := auxIntToInt32(v_1.AuxInt)
 		x := v_1.Args[0]
 		mem := v_2
-		if !(isUint64PowerOfTwo(c)) {
+		if !(isUint64PowerOfTwo(int64(c))) {
 			break
 		}
 		v.reset(OpAMD64SETAEstore)
-		v.AuxInt = off
-		v.Aux = sym
+		v.AuxInt = int32ToAuxInt(off)
+		v.Aux = symToAux(sym)
 		v0 := b.NewValue0(v.Pos, OpAMD64BTQconst, types.TypeFlags)
-		v0.AuxInt = log2(c)
+		v0.AuxInt = int8ToAuxInt(int8(log32(c)))
 		v0.AddArg(x)
 		v.AddArg3(ptr, v0, mem)
 		return true
 	}
 	// match: (SETEQstore [off] {sym} ptr (TESTQ (MOVQconst [c]) x) mem)
 	// cond: isUint64PowerOfTwo(c)
-	// result: (SETAEstore [off] {sym} ptr (BTQconst [log2(c)] x) mem)
+	// result: (SETAEstore [off] {sym} ptr (BTQconst [int8(log2(c))] x) mem)
 	for {
-		off := v.AuxInt
-		sym := v.Aux
+		off := auxIntToInt32(v.AuxInt)
+		sym := auxToSym(v.Aux)
 		ptr := v_0
 		if v_1.Op != OpAMD64TESTQ {
 			break
@@ -22938,17 +22899,17 @@
 			if v_1_0.Op != OpAMD64MOVQconst {
 				continue
 			}
-			c := v_1_0.AuxInt
+			c := auxIntToInt64(v_1_0.AuxInt)
 			x := v_1_1
 			mem := v_2
 			if !(isUint64PowerOfTwo(c)) {
 				continue
 			}
 			v.reset(OpAMD64SETAEstore)
-			v.AuxInt = off
-			v.Aux = sym
+			v.AuxInt = int32ToAuxInt(off)
+			v.Aux = symToAux(sym)
 			v0 := b.NewValue0(v.Pos, OpAMD64BTQconst, types.TypeFlags)
-			v0.AuxInt = log2(c)
+			v0.AuxInt = int8ToAuxInt(int8(log2(c)))
 			v0.AddArg(x)
 			v.AddArg3(ptr, v0, mem)
 			return true
@@ -22958,22 +22919,22 @@
 	// match: (SETEQstore [off] {sym} ptr (CMPLconst [1] s:(ANDLconst [1] _)) mem)
 	// result: (SETNEstore [off] {sym} ptr (CMPLconst [0] s) mem)
 	for {
-		off := v.AuxInt
-		sym := v.Aux
+		off := auxIntToInt32(v.AuxInt)
+		sym := auxToSym(v.Aux)
 		ptr := v_0
-		if v_1.Op != OpAMD64CMPLconst || v_1.AuxInt != 1 {
+		if v_1.Op != OpAMD64CMPLconst || auxIntToInt32(v_1.AuxInt) != 1 {
 			break
 		}
 		s := v_1.Args[0]
-		if s.Op != OpAMD64ANDLconst || s.AuxInt != 1 {
+		if s.Op != OpAMD64ANDLconst || auxIntToInt32(s.AuxInt) != 1 {
 			break
 		}
 		mem := v_2
 		v.reset(OpAMD64SETNEstore)
-		v.AuxInt = off
-		v.Aux = sym
+		v.AuxInt = int32ToAuxInt(off)
+		v.Aux = symToAux(sym)
 		v0 := b.NewValue0(v.Pos, OpAMD64CMPLconst, types.TypeFlags)
-		v0.AuxInt = 0
+		v0.AuxInt = int32ToAuxInt(0)
 		v0.AddArg(s)
 		v.AddArg3(ptr, v0, mem)
 		return true
@@ -22981,22 +22942,22 @@
 	// match: (SETEQstore [off] {sym} ptr (CMPQconst [1] s:(ANDQconst [1] _)) mem)
 	// result: (SETNEstore [off] {sym} ptr (CMPQconst [0] s) mem)
 	for {
-		off := v.AuxInt
-		sym := v.Aux
+		off := auxIntToInt32(v.AuxInt)
+		sym := auxToSym(v.Aux)
 		ptr := v_0
-		if v_1.Op != OpAMD64CMPQconst || v_1.AuxInt != 1 {
+		if v_1.Op != OpAMD64CMPQconst || auxIntToInt32(v_1.AuxInt) != 1 {
 			break
 		}
 		s := v_1.Args[0]
-		if s.Op != OpAMD64ANDQconst || s.AuxInt != 1 {
+		if s.Op != OpAMD64ANDQconst || auxIntToInt32(s.AuxInt) != 1 {
 			break
 		}
 		mem := v_2
 		v.reset(OpAMD64SETNEstore)
-		v.AuxInt = off
-		v.Aux = sym
+		v.AuxInt = int32ToAuxInt(off)
+		v.Aux = symToAux(sym)
 		v0 := b.NewValue0(v.Pos, OpAMD64CMPQconst, types.TypeFlags)
-		v0.AuxInt = 0
+		v0.AuxInt = int32ToAuxInt(0)
 		v0.AddArg(s)
 		v.AddArg3(ptr, v0, mem)
 		return true
@@ -23005,8 +22966,8 @@
 	// cond: z1==z2
 	// result: (SETAEstore [off] {sym} ptr (BTQconst [63] x) mem)
 	for {
-		off := v.AuxInt
-		sym := v.Aux
+		off := auxIntToInt32(v.AuxInt)
+		sym := auxToSym(v.Aux)
 		ptr := v_0
 		if v_1.Op != OpAMD64TESTQ {
 			break
@@ -23016,11 +22977,11 @@
 		v_1_1 := v_1.Args[1]
 		for _i0 := 0; _i0 <= 1; _i0, v_1_0, v_1_1 = _i0+1, v_1_1, v_1_0 {
 			z1 := v_1_0
-			if z1.Op != OpAMD64SHLQconst || z1.AuxInt != 63 {
+			if z1.Op != OpAMD64SHLQconst || auxIntToInt8(z1.AuxInt) != 63 {
 				continue
 			}
 			z1_0 := z1.Args[0]
-			if z1_0.Op != OpAMD64SHRQconst || z1_0.AuxInt != 63 {
+			if z1_0.Op != OpAMD64SHRQconst || auxIntToInt8(z1_0.AuxInt) != 63 {
 				continue
 			}
 			x := z1_0.Args[0]
@@ -23030,10 +22991,10 @@
 				continue
 			}
 			v.reset(OpAMD64SETAEstore)
-			v.AuxInt = off
-			v.Aux = sym
+			v.AuxInt = int32ToAuxInt(off)
+			v.Aux = symToAux(sym)
 			v0 := b.NewValue0(v.Pos, OpAMD64BTQconst, types.TypeFlags)
-			v0.AuxInt = 63
+			v0.AuxInt = int8ToAuxInt(63)
 			v0.AddArg(x)
 			v.AddArg3(ptr, v0, mem)
 			return true
@@ -23044,8 +23005,8 @@
 	// cond: z1==z2
 	// result: (SETAEstore [off] {sym} ptr (BTLconst [31] x) mem)
 	for {
-		off := v.AuxInt
-		sym := v.Aux
+		off := auxIntToInt32(v.AuxInt)
+		sym := auxToSym(v.Aux)
 		ptr := v_0
 		if v_1.Op != OpAMD64TESTL {
 			break
@@ -23055,11 +23016,11 @@
 		v_1_1 := v_1.Args[1]
 		for _i0 := 0; _i0 <= 1; _i0, v_1_0, v_1_1 = _i0+1, v_1_1, v_1_0 {
 			z1 := v_1_0
-			if z1.Op != OpAMD64SHLLconst || z1.AuxInt != 31 {
+			if z1.Op != OpAMD64SHLLconst || auxIntToInt8(z1.AuxInt) != 31 {
 				continue
 			}
 			z1_0 := z1.Args[0]
-			if z1_0.Op != OpAMD64SHRLconst || z1_0.AuxInt != 31 {
+			if z1_0.Op != OpAMD64SHRLconst || auxIntToInt8(z1_0.AuxInt) != 31 {
 				continue
 			}
 			x := z1_0.Args[0]
@@ -23069,10 +23030,10 @@
 				continue
 			}
 			v.reset(OpAMD64SETAEstore)
-			v.AuxInt = off
-			v.Aux = sym
+			v.AuxInt = int32ToAuxInt(off)
+			v.Aux = symToAux(sym)
 			v0 := b.NewValue0(v.Pos, OpAMD64BTLconst, types.TypeFlags)
-			v0.AuxInt = 31
+			v0.AuxInt = int8ToAuxInt(31)
 			v0.AddArg(x)
 			v.AddArg3(ptr, v0, mem)
 			return true
@@ -23083,8 +23044,8 @@
 	// cond: z1==z2
 	// result: (SETAEstore [off] {sym} ptr (BTQconst [0] x) mem)
 	for {
-		off := v.AuxInt
-		sym := v.Aux
+		off := auxIntToInt32(v.AuxInt)
+		sym := auxToSym(v.Aux)
 		ptr := v_0
 		if v_1.Op != OpAMD64TESTQ {
 			break
@@ -23094,11 +23055,11 @@
 		v_1_1 := v_1.Args[1]
 		for _i0 := 0; _i0 <= 1; _i0, v_1_0, v_1_1 = _i0+1, v_1_1, v_1_0 {
 			z1 := v_1_0
-			if z1.Op != OpAMD64SHRQconst || z1.AuxInt != 63 {
+			if z1.Op != OpAMD64SHRQconst || auxIntToInt8(z1.AuxInt) != 63 {
 				continue
 			}
 			z1_0 := z1.Args[0]
-			if z1_0.Op != OpAMD64SHLQconst || z1_0.AuxInt != 63 {
+			if z1_0.Op != OpAMD64SHLQconst || auxIntToInt8(z1_0.AuxInt) != 63 {
 				continue
 			}
 			x := z1_0.Args[0]
@@ -23108,10 +23069,10 @@
 				continue
 			}
 			v.reset(OpAMD64SETAEstore)
-			v.AuxInt = off
-			v.Aux = sym
+			v.AuxInt = int32ToAuxInt(off)
+			v.Aux = symToAux(sym)
 			v0 := b.NewValue0(v.Pos, OpAMD64BTQconst, types.TypeFlags)
-			v0.AuxInt = 0
+			v0.AuxInt = int8ToAuxInt(0)
 			v0.AddArg(x)
 			v.AddArg3(ptr, v0, mem)
 			return true
@@ -23122,8 +23083,8 @@
 	// cond: z1==z2
 	// result: (SETAEstore [off] {sym} ptr (BTLconst [0] x) mem)
 	for {
-		off := v.AuxInt
-		sym := v.Aux
+		off := auxIntToInt32(v.AuxInt)
+		sym := auxToSym(v.Aux)
 		ptr := v_0
 		if v_1.Op != OpAMD64TESTL {
 			break
@@ -23133,11 +23094,11 @@
 		v_1_1 := v_1.Args[1]
 		for _i0 := 0; _i0 <= 1; _i0, v_1_0, v_1_1 = _i0+1, v_1_1, v_1_0 {
 			z1 := v_1_0
-			if z1.Op != OpAMD64SHRLconst || z1.AuxInt != 31 {
+			if z1.Op != OpAMD64SHRLconst || auxIntToInt8(z1.AuxInt) != 31 {
 				continue
 			}
 			z1_0 := z1.Args[0]
-			if z1_0.Op != OpAMD64SHLLconst || z1_0.AuxInt != 31 {
+			if z1_0.Op != OpAMD64SHLLconst || auxIntToInt8(z1_0.AuxInt) != 31 {
 				continue
 			}
 			x := z1_0.Args[0]
@@ -23147,10 +23108,10 @@
 				continue
 			}
 			v.reset(OpAMD64SETAEstore)
-			v.AuxInt = off
-			v.Aux = sym
+			v.AuxInt = int32ToAuxInt(off)
+			v.Aux = symToAux(sym)
 			v0 := b.NewValue0(v.Pos, OpAMD64BTLconst, types.TypeFlags)
-			v0.AuxInt = 0
+			v0.AuxInt = int8ToAuxInt(0)
 			v0.AddArg(x)
 			v.AddArg3(ptr, v0, mem)
 			return true
@@ -23161,8 +23122,8 @@
 	// cond: z1==z2
 	// result: (SETAEstore [off] {sym} ptr (BTQconst [63] x) mem)
 	for {
-		off := v.AuxInt
-		sym := v.Aux
+		off := auxIntToInt32(v.AuxInt)
+		sym := auxToSym(v.Aux)
 		ptr := v_0
 		if v_1.Op != OpAMD64TESTQ {
 			break
@@ -23172,7 +23133,7 @@
 		v_1_1 := v_1.Args[1]
 		for _i0 := 0; _i0 <= 1; _i0, v_1_0, v_1_1 = _i0+1, v_1_1, v_1_0 {
 			z1 := v_1_0
-			if z1.Op != OpAMD64SHRQconst || z1.AuxInt != 63 {
+			if z1.Op != OpAMD64SHRQconst || auxIntToInt8(z1.AuxInt) != 63 {
 				continue
 			}
 			x := z1.Args[0]
@@ -23182,10 +23143,10 @@
 				continue
 			}
 			v.reset(OpAMD64SETAEstore)
-			v.AuxInt = off
-			v.Aux = sym
+			v.AuxInt = int32ToAuxInt(off)
+			v.Aux = symToAux(sym)
 			v0 := b.NewValue0(v.Pos, OpAMD64BTQconst, types.TypeFlags)
-			v0.AuxInt = 63
+			v0.AuxInt = int8ToAuxInt(63)
 			v0.AddArg(x)
 			v.AddArg3(ptr, v0, mem)
 			return true
@@ -23196,8 +23157,8 @@
 	// cond: z1==z2
 	// result: (SETAEstore [off] {sym} ptr (BTLconst [31] x) mem)
 	for {
-		off := v.AuxInt
-		sym := v.Aux
+		off := auxIntToInt32(v.AuxInt)
+		sym := auxToSym(v.Aux)
 		ptr := v_0
 		if v_1.Op != OpAMD64TESTL {
 			break
@@ -23207,7 +23168,7 @@
 		v_1_1 := v_1.Args[1]
 		for _i0 := 0; _i0 <= 1; _i0, v_1_0, v_1_1 = _i0+1, v_1_1, v_1_0 {
 			z1 := v_1_0
-			if z1.Op != OpAMD64SHRLconst || z1.AuxInt != 31 {
+			if z1.Op != OpAMD64SHRLconst || auxIntToInt8(z1.AuxInt) != 31 {
 				continue
 			}
 			x := z1.Args[0]
@@ -23217,10 +23178,10 @@
 				continue
 			}
 			v.reset(OpAMD64SETAEstore)
-			v.AuxInt = off
-			v.Aux = sym
+			v.AuxInt = int32ToAuxInt(off)
+			v.Aux = symToAux(sym)
 			v0 := b.NewValue0(v.Pos, OpAMD64BTLconst, types.TypeFlags)
-			v0.AuxInt = 31
+			v0.AuxInt = int8ToAuxInt(31)
 			v0.AddArg(x)
 			v.AddArg3(ptr, v0, mem)
 			return true
@@ -23230,8 +23191,8 @@
 	// match: (SETEQstore [off] {sym} ptr (InvertFlags x) mem)
 	// result: (SETEQstore [off] {sym} ptr x mem)
 	for {
-		off := v.AuxInt
-		sym := v.Aux
+		off := auxIntToInt32(v.AuxInt)
+		sym := auxToSym(v.Aux)
 		ptr := v_0
 		if v_1.Op != OpAMD64InvertFlags {
 			break
@@ -23239,30 +23200,30 @@
 		x := v_1.Args[0]
 		mem := v_2
 		v.reset(OpAMD64SETEQstore)
-		v.AuxInt = off
-		v.Aux = sym
+		v.AuxInt = int32ToAuxInt(off)
+		v.Aux = symToAux(sym)
 		v.AddArg3(ptr, x, mem)
 		return true
 	}
 	// match: (SETEQstore [off1] {sym} (ADDQconst [off2] base) val mem)
-	// cond: is32Bit(off1+off2)
+	// cond: is32Bit(int64(off1)+int64(off2))
 	// result: (SETEQstore [off1+off2] {sym} base val mem)
 	for {
-		off1 := v.AuxInt
-		sym := v.Aux
+		off1 := auxIntToInt32(v.AuxInt)
+		sym := auxToSym(v.Aux)
 		if v_0.Op != OpAMD64ADDQconst {
 			break
 		}
-		off2 := v_0.AuxInt
+		off2 := auxIntToInt32(v_0.AuxInt)
 		base := v_0.Args[0]
 		val := v_1
 		mem := v_2
-		if !(is32Bit(off1 + off2)) {
+		if !(is32Bit(int64(off1) + int64(off2))) {
 			break
 		}
 		v.reset(OpAMD64SETEQstore)
-		v.AuxInt = off1 + off2
-		v.Aux = sym
+		v.AuxInt = int32ToAuxInt(off1 + off2)
+		v.Aux = symToAux(sym)
 		v.AddArg3(base, val, mem)
 		return true
 	}
@@ -23520,8 +23481,8 @@
 	// match: (SETGEstore [off] {sym} ptr (InvertFlags x) mem)
 	// result: (SETLEstore [off] {sym} ptr x mem)
 	for {
-		off := v.AuxInt
-		sym := v.Aux
+		off := auxIntToInt32(v.AuxInt)
+		sym := auxToSym(v.Aux)
 		ptr := v_0
 		if v_1.Op != OpAMD64InvertFlags {
 			break
@@ -23529,30 +23490,30 @@
 		x := v_1.Args[0]
 		mem := v_2
 		v.reset(OpAMD64SETLEstore)
-		v.AuxInt = off
-		v.Aux = sym
+		v.AuxInt = int32ToAuxInt(off)
+		v.Aux = symToAux(sym)
 		v.AddArg3(ptr, x, mem)
 		return true
 	}
 	// match: (SETGEstore [off1] {sym} (ADDQconst [off2] base) val mem)
-	// cond: is32Bit(off1+off2)
+	// cond: is32Bit(int64(off1)+int64(off2))
 	// result: (SETGEstore [off1+off2] {sym} base val mem)
 	for {
-		off1 := v.AuxInt
-		sym := v.Aux
+		off1 := auxIntToInt32(v.AuxInt)
+		sym := auxToSym(v.Aux)
 		if v_0.Op != OpAMD64ADDQconst {
 			break
 		}
-		off2 := v_0.AuxInt
+		off2 := auxIntToInt32(v_0.AuxInt)
 		base := v_0.Args[0]
 		val := v_1
 		mem := v_2
-		if !(is32Bit(off1 + off2)) {
+		if !(is32Bit(int64(off1) + int64(off2))) {
 			break
 		}
 		v.reset(OpAMD64SETGEstore)
-		v.AuxInt = off1 + off2
-		v.Aux = sym
+		v.AuxInt = int32ToAuxInt(off1 + off2)
+		v.Aux = symToAux(sym)
 		v.AddArg3(base, val, mem)
 		return true
 	}
@@ -23680,8 +23641,8 @@
 	// match: (SETGstore [off] {sym} ptr (InvertFlags x) mem)
 	// result: (SETLstore [off] {sym} ptr x mem)
 	for {
-		off := v.AuxInt
-		sym := v.Aux
+		off := auxIntToInt32(v.AuxInt)
+		sym := auxToSym(v.Aux)
 		ptr := v_0
 		if v_1.Op != OpAMD64InvertFlags {
 			break
@@ -23689,30 +23650,30 @@
 		x := v_1.Args[0]
 		mem := v_2
 		v.reset(OpAMD64SETLstore)
-		v.AuxInt = off
-		v.Aux = sym
+		v.AuxInt = int32ToAuxInt(off)
+		v.Aux = symToAux(sym)
 		v.AddArg3(ptr, x, mem)
 		return true
 	}
 	// match: (SETGstore [off1] {sym} (ADDQconst [off2] base) val mem)
-	// cond: is32Bit(off1+off2)
+	// cond: is32Bit(int64(off1)+int64(off2))
 	// result: (SETGstore [off1+off2] {sym} base val mem)
 	for {
-		off1 := v.AuxInt
-		sym := v.Aux
+		off1 := auxIntToInt32(v.AuxInt)
+		sym := auxToSym(v.Aux)
 		if v_0.Op != OpAMD64ADDQconst {
 			break
 		}
-		off2 := v_0.AuxInt
+		off2 := auxIntToInt32(v_0.AuxInt)
 		base := v_0.Args[0]
 		val := v_1
 		mem := v_2
-		if !(is32Bit(off1 + off2)) {
+		if !(is32Bit(int64(off1) + int64(off2))) {
 			break
 		}
 		v.reset(OpAMD64SETGstore)
-		v.AuxInt = off1 + off2
-		v.Aux = sym
+		v.AuxInt = int32ToAuxInt(off1 + off2)
+		v.Aux = symToAux(sym)
 		v.AddArg3(base, val, mem)
 		return true
 	}
@@ -23970,8 +23931,8 @@
 	// match: (SETLEstore [off] {sym} ptr (InvertFlags x) mem)
 	// result: (SETGEstore [off] {sym} ptr x mem)
 	for {
-		off := v.AuxInt
-		sym := v.Aux
+		off := auxIntToInt32(v.AuxInt)
+		sym := auxToSym(v.Aux)
 		ptr := v_0
 		if v_1.Op != OpAMD64InvertFlags {
 			break
@@ -23979,30 +23940,30 @@
 		x := v_1.Args[0]
 		mem := v_2
 		v.reset(OpAMD64SETGEstore)
-		v.AuxInt = off
-		v.Aux = sym
+		v.AuxInt = int32ToAuxInt(off)
+		v.Aux = symToAux(sym)
 		v.AddArg3(ptr, x, mem)
 		return true
 	}
 	// match: (SETLEstore [off1] {sym} (ADDQconst [off2] base) val mem)
-	// cond: is32Bit(off1+off2)
+	// cond: is32Bit(int64(off1)+int64(off2))
 	// result: (SETLEstore [off1+off2] {sym} base val mem)
 	for {
-		off1 := v.AuxInt
-		sym := v.Aux
+		off1 := auxIntToInt32(v.AuxInt)
+		sym := auxToSym(v.Aux)
 		if v_0.Op != OpAMD64ADDQconst {
 			break
 		}
-		off2 := v_0.AuxInt
+		off2 := auxIntToInt32(v_0.AuxInt)
 		base := v_0.Args[0]
 		val := v_1
 		mem := v_2
-		if !(is32Bit(off1 + off2)) {
+		if !(is32Bit(int64(off1) + int64(off2))) {
 			break
 		}
 		v.reset(OpAMD64SETLEstore)
-		v.AuxInt = off1 + off2
-		v.Aux = sym
+		v.AuxInt = int32ToAuxInt(off1 + off2)
+		v.Aux = symToAux(sym)
 		v.AddArg3(base, val, mem)
 		return true
 	}
@@ -24130,8 +24091,8 @@
 	// match: (SETLstore [off] {sym} ptr (InvertFlags x) mem)
 	// result: (SETGstore [off] {sym} ptr x mem)
 	for {
-		off := v.AuxInt
-		sym := v.Aux
+		off := auxIntToInt32(v.AuxInt)
+		sym := auxToSym(v.Aux)
 		ptr := v_0
 		if v_1.Op != OpAMD64InvertFlags {
 			break
@@ -24139,30 +24100,30 @@
 		x := v_1.Args[0]
 		mem := v_2
 		v.reset(OpAMD64SETGstore)
-		v.AuxInt = off
-		v.Aux = sym
+		v.AuxInt = int32ToAuxInt(off)
+		v.Aux = symToAux(sym)
 		v.AddArg3(ptr, x, mem)
 		return true
 	}
 	// match: (SETLstore [off1] {sym} (ADDQconst [off2] base) val mem)
-	// cond: is32Bit(off1+off2)
+	// cond: is32Bit(int64(off1)+int64(off2))
 	// result: (SETLstore [off1+off2] {sym} base val mem)
 	for {
-		off1 := v.AuxInt
-		sym := v.Aux
+		off1 := auxIntToInt32(v.AuxInt)
+		sym := auxToSym(v.Aux)
 		if v_0.Op != OpAMD64ADDQconst {
 			break
 		}
-		off2 := v_0.AuxInt
+		off2 := auxIntToInt32(v_0.AuxInt)
 		base := v_0.Args[0]
 		val := v_1
 		mem := v_2
-		if !(is32Bit(off1 + off2)) {
+		if !(is32Bit(int64(off1) + int64(off2))) {
 			break
 		}
 		v.reset(OpAMD64SETLstore)
-		v.AuxInt = off1 + off2
-		v.Aux = sym
+		v.AuxInt = int32ToAuxInt(off1 + off2)
+		v.Aux = symToAux(sym)
 		v.AddArg3(base, val, mem)
 		return true
 	}
@@ -24323,7 +24284,7 @@
 			}
 			x := v_0_0.Args[1]
 			v_0_0_0 := v_0_0.Args[0]
-			if v_0_0_0.Op != OpAMD64MOVLconst || v_0_0_0.AuxInt != 1 {
+			if v_0_0_0.Op != OpAMD64MOVLconst || auxIntToInt32(v_0_0_0.AuxInt) != 1 {
 				continue
 			}
 			y := v_0_1
@@ -24350,7 +24311,7 @@
 			}
 			x := v_0_0.Args[1]
 			v_0_0_0 := v_0_0.Args[0]
-			if v_0_0_0.Op != OpAMD64MOVQconst || v_0_0_0.AuxInt != 1 {
+			if v_0_0_0.Op != OpAMD64MOVQconst || auxIntToInt64(v_0_0_0.AuxInt) != 1 {
 				continue
 			}
 			y := v_0_1
@@ -24363,46 +24324,46 @@
 		break
 	}
 	// match: (SETNE (TESTLconst [c] x))
-	// cond: isUint32PowerOfTwo(c)
-	// result: (SETB (BTLconst [log2uint32(c)] x))
+	// cond: isUint32PowerOfTwo(int64(c))
+	// result: (SETB (BTLconst [int8(log32(c))] x))
 	for {
 		if v_0.Op != OpAMD64TESTLconst {
 			break
 		}
-		c := v_0.AuxInt
+		c := auxIntToInt32(v_0.AuxInt)
 		x := v_0.Args[0]
-		if !(isUint32PowerOfTwo(c)) {
+		if !(isUint32PowerOfTwo(int64(c))) {
 			break
 		}
 		v.reset(OpAMD64SETB)
 		v0 := b.NewValue0(v.Pos, OpAMD64BTLconst, types.TypeFlags)
-		v0.AuxInt = log2uint32(c)
+		v0.AuxInt = int8ToAuxInt(int8(log32(c)))
 		v0.AddArg(x)
 		v.AddArg(v0)
 		return true
 	}
 	// match: (SETNE (TESTQconst [c] x))
-	// cond: isUint64PowerOfTwo(c)
-	// result: (SETB (BTQconst [log2(c)] x))
+	// cond: isUint64PowerOfTwo(int64(c))
+	// result: (SETB (BTQconst [int8(log32(c))] x))
 	for {
 		if v_0.Op != OpAMD64TESTQconst {
 			break
 		}
-		c := v_0.AuxInt
+		c := auxIntToInt32(v_0.AuxInt)
 		x := v_0.Args[0]
-		if !(isUint64PowerOfTwo(c)) {
+		if !(isUint64PowerOfTwo(int64(c))) {
 			break
 		}
 		v.reset(OpAMD64SETB)
 		v0 := b.NewValue0(v.Pos, OpAMD64BTQconst, types.TypeFlags)
-		v0.AuxInt = log2(c)
+		v0.AuxInt = int8ToAuxInt(int8(log32(c)))
 		v0.AddArg(x)
 		v.AddArg(v0)
 		return true
 	}
 	// match: (SETNE (TESTQ (MOVQconst [c]) x))
 	// cond: isUint64PowerOfTwo(c)
-	// result: (SETB (BTQconst [log2(c)] x))
+	// result: (SETB (BTQconst [int8(log2(c))] x))
 	for {
 		if v_0.Op != OpAMD64TESTQ {
 			break
@@ -24414,14 +24375,14 @@
 			if v_0_0.Op != OpAMD64MOVQconst {
 				continue
 			}
-			c := v_0_0.AuxInt
+			c := auxIntToInt64(v_0_0.AuxInt)
 			x := v_0_1
 			if !(isUint64PowerOfTwo(c)) {
 				continue
 			}
 			v.reset(OpAMD64SETB)
 			v0 := b.NewValue0(v.Pos, OpAMD64BTQconst, types.TypeFlags)
-			v0.AuxInt = log2(c)
+			v0.AuxInt = int8ToAuxInt(int8(log2(c)))
 			v0.AddArg(x)
 			v.AddArg(v0)
 			return true
@@ -24431,16 +24392,16 @@
 	// match: (SETNE (CMPLconst [1] s:(ANDLconst [1] _)))
 	// result: (SETEQ (CMPLconst [0] s))
 	for {
-		if v_0.Op != OpAMD64CMPLconst || v_0.AuxInt != 1 {
+		if v_0.Op != OpAMD64CMPLconst || auxIntToInt32(v_0.AuxInt) != 1 {
 			break
 		}
 		s := v_0.Args[0]
-		if s.Op != OpAMD64ANDLconst || s.AuxInt != 1 {
+		if s.Op != OpAMD64ANDLconst || auxIntToInt32(s.AuxInt) != 1 {
 			break
 		}
 		v.reset(OpAMD64SETEQ)
 		v0 := b.NewValue0(v.Pos, OpAMD64CMPLconst, types.TypeFlags)
-		v0.AuxInt = 0
+		v0.AuxInt = int32ToAuxInt(0)
 		v0.AddArg(s)
 		v.AddArg(v0)
 		return true
@@ -24448,16 +24409,16 @@
 	// match: (SETNE (CMPQconst [1] s:(ANDQconst [1] _)))
 	// result: (SETEQ (CMPQconst [0] s))
 	for {
-		if v_0.Op != OpAMD64CMPQconst || v_0.AuxInt != 1 {
+		if v_0.Op != OpAMD64CMPQconst || auxIntToInt32(v_0.AuxInt) != 1 {
 			break
 		}
 		s := v_0.Args[0]
-		if s.Op != OpAMD64ANDQconst || s.AuxInt != 1 {
+		if s.Op != OpAMD64ANDQconst || auxIntToInt32(s.AuxInt) != 1 {
 			break
 		}
 		v.reset(OpAMD64SETEQ)
 		v0 := b.NewValue0(v.Pos, OpAMD64CMPQconst, types.TypeFlags)
-		v0.AuxInt = 0
+		v0.AuxInt = int32ToAuxInt(0)
 		v0.AddArg(s)
 		v.AddArg(v0)
 		return true
@@ -24474,11 +24435,11 @@
 		v_0_1 := v_0.Args[1]
 		for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 {
 			z1 := v_0_0
-			if z1.Op != OpAMD64SHLQconst || z1.AuxInt != 63 {
+			if z1.Op != OpAMD64SHLQconst || auxIntToInt8(z1.AuxInt) != 63 {
 				continue
 			}
 			z1_0 := z1.Args[0]
-			if z1_0.Op != OpAMD64SHRQconst || z1_0.AuxInt != 63 {
+			if z1_0.Op != OpAMD64SHRQconst || auxIntToInt8(z1_0.AuxInt) != 63 {
 				continue
 			}
 			x := z1_0.Args[0]
@@ -24488,7 +24449,7 @@
 			}
 			v.reset(OpAMD64SETB)
 			v0 := b.NewValue0(v.Pos, OpAMD64BTQconst, types.TypeFlags)
-			v0.AuxInt = 63
+			v0.AuxInt = int8ToAuxInt(63)
 			v0.AddArg(x)
 			v.AddArg(v0)
 			return true
@@ -24507,11 +24468,11 @@
 		v_0_1 := v_0.Args[1]
 		for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 {
 			z1 := v_0_0
-			if z1.Op != OpAMD64SHLLconst || z1.AuxInt != 31 {
+			if z1.Op != OpAMD64SHLLconst || auxIntToInt8(z1.AuxInt) != 31 {
 				continue
 			}
 			z1_0 := z1.Args[0]
-			if z1_0.Op != OpAMD64SHRQconst || z1_0.AuxInt != 31 {
+			if z1_0.Op != OpAMD64SHRQconst || auxIntToInt8(z1_0.AuxInt) != 31 {
 				continue
 			}
 			x := z1_0.Args[0]
@@ -24521,7 +24482,7 @@
 			}
 			v.reset(OpAMD64SETB)
 			v0 := b.NewValue0(v.Pos, OpAMD64BTQconst, types.TypeFlags)
-			v0.AuxInt = 31
+			v0.AuxInt = int8ToAuxInt(31)
 			v0.AddArg(x)
 			v.AddArg(v0)
 			return true
@@ -24540,11 +24501,11 @@
 		v_0_1 := v_0.Args[1]
 		for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 {
 			z1 := v_0_0
-			if z1.Op != OpAMD64SHRQconst || z1.AuxInt != 63 {
+			if z1.Op != OpAMD64SHRQconst || auxIntToInt8(z1.AuxInt) != 63 {
 				continue
 			}
 			z1_0 := z1.Args[0]
-			if z1_0.Op != OpAMD64SHLQconst || z1_0.AuxInt != 63 {
+			if z1_0.Op != OpAMD64SHLQconst || auxIntToInt8(z1_0.AuxInt) != 63 {
 				continue
 			}
 			x := z1_0.Args[0]
@@ -24554,7 +24515,7 @@
 			}
 			v.reset(OpAMD64SETB)
 			v0 := b.NewValue0(v.Pos, OpAMD64BTQconst, types.TypeFlags)
-			v0.AuxInt = 0
+			v0.AuxInt = int8ToAuxInt(0)
 			v0.AddArg(x)
 			v.AddArg(v0)
 			return true
@@ -24573,11 +24534,11 @@
 		v_0_1 := v_0.Args[1]
 		for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 {
 			z1 := v_0_0
-			if z1.Op != OpAMD64SHRLconst || z1.AuxInt != 31 {
+			if z1.Op != OpAMD64SHRLconst || auxIntToInt8(z1.AuxInt) != 31 {
 				continue
 			}
 			z1_0 := z1.Args[0]
-			if z1_0.Op != OpAMD64SHLLconst || z1_0.AuxInt != 31 {
+			if z1_0.Op != OpAMD64SHLLconst || auxIntToInt8(z1_0.AuxInt) != 31 {
 				continue
 			}
 			x := z1_0.Args[0]
@@ -24587,7 +24548,7 @@
 			}
 			v.reset(OpAMD64SETB)
 			v0 := b.NewValue0(v.Pos, OpAMD64BTLconst, types.TypeFlags)
-			v0.AuxInt = 0
+			v0.AuxInt = int8ToAuxInt(0)
 			v0.AddArg(x)
 			v.AddArg(v0)
 			return true
@@ -24606,7 +24567,7 @@
 		v_0_1 := v_0.Args[1]
 		for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 {
 			z1 := v_0_0
-			if z1.Op != OpAMD64SHRQconst || z1.AuxInt != 63 {
+			if z1.Op != OpAMD64SHRQconst || auxIntToInt8(z1.AuxInt) != 63 {
 				continue
 			}
 			x := z1.Args[0]
@@ -24616,7 +24577,7 @@
 			}
 			v.reset(OpAMD64SETB)
 			v0 := b.NewValue0(v.Pos, OpAMD64BTQconst, types.TypeFlags)
-			v0.AuxInt = 63
+			v0.AuxInt = int8ToAuxInt(63)
 			v0.AddArg(x)
 			v.AddArg(v0)
 			return true
@@ -24635,7 +24596,7 @@
 		v_0_1 := v_0.Args[1]
 		for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 {
 			z1 := v_0_0
-			if z1.Op != OpAMD64SHRLconst || z1.AuxInt != 31 {
+			if z1.Op != OpAMD64SHRLconst || auxIntToInt8(z1.AuxInt) != 31 {
 				continue
 			}
 			x := z1.Args[0]
@@ -24645,7 +24606,7 @@
 			}
 			v.reset(OpAMD64SETB)
 			v0 := b.NewValue0(v.Pos, OpAMD64BTLconst, types.TypeFlags)
-			v0.AuxInt = 31
+			v0.AuxInt = int8ToAuxInt(31)
 			v0.AddArg(x)
 			v.AddArg(v0)
 			return true
@@ -24724,8 +24685,8 @@
 	// match: (SETNEstore [off] {sym} ptr (TESTL (SHLL (MOVLconst [1]) x) y) mem)
 	// result: (SETBstore [off] {sym} ptr (BTL x y) mem)
 	for {
-		off := v.AuxInt
-		sym := v.Aux
+		off := auxIntToInt32(v.AuxInt)
+		sym := auxToSym(v.Aux)
 		ptr := v_0
 		if v_1.Op != OpAMD64TESTL {
 			break
@@ -24739,14 +24700,14 @@
 			}
 			x := v_1_0.Args[1]
 			v_1_0_0 := v_1_0.Args[0]
-			if v_1_0_0.Op != OpAMD64MOVLconst || v_1_0_0.AuxInt != 1 {
+			if v_1_0_0.Op != OpAMD64MOVLconst || auxIntToInt32(v_1_0_0.AuxInt) != 1 {
 				continue
 			}
 			y := v_1_1
 			mem := v_2
 			v.reset(OpAMD64SETBstore)
-			v.AuxInt = off
-			v.Aux = sym
+			v.AuxInt = int32ToAuxInt(off)
+			v.Aux = symToAux(sym)
 			v0 := b.NewValue0(v.Pos, OpAMD64BTL, types.TypeFlags)
 			v0.AddArg2(x, y)
 			v.AddArg3(ptr, v0, mem)
@@ -24757,8 +24718,8 @@
 	// match: (SETNEstore [off] {sym} ptr (TESTQ (SHLQ (MOVQconst [1]) x) y) mem)
 	// result: (SETBstore [off] {sym} ptr (BTQ x y) mem)
 	for {
-		off := v.AuxInt
-		sym := v.Aux
+		off := auxIntToInt32(v.AuxInt)
+		sym := auxToSym(v.Aux)
 		ptr := v_0
 		if v_1.Op != OpAMD64TESTQ {
 			break
@@ -24772,14 +24733,14 @@
 			}
 			x := v_1_0.Args[1]
 			v_1_0_0 := v_1_0.Args[0]
-			if v_1_0_0.Op != OpAMD64MOVQconst || v_1_0_0.AuxInt != 1 {
+			if v_1_0_0.Op != OpAMD64MOVQconst || auxIntToInt64(v_1_0_0.AuxInt) != 1 {
 				continue
 			}
 			y := v_1_1
 			mem := v_2
 			v.reset(OpAMD64SETBstore)
-			v.AuxInt = off
-			v.Aux = sym
+			v.AuxInt = int32ToAuxInt(off)
+			v.Aux = symToAux(sym)
 			v0 := b.NewValue0(v.Pos, OpAMD64BTQ, types.TypeFlags)
 			v0.AddArg2(x, y)
 			v.AddArg3(ptr, v0, mem)
@@ -24788,61 +24749,61 @@
 		break
 	}
 	// match: (SETNEstore [off] {sym} ptr (TESTLconst [c] x) mem)
-	// cond: isUint32PowerOfTwo(c)
-	// result: (SETBstore [off] {sym} ptr (BTLconst [log2uint32(c)] x) mem)
+	// cond: isUint32PowerOfTwo(int64(c))
+	// result: (SETBstore [off] {sym} ptr (BTLconst [int8(log32(c))] x) mem)
 	for {
-		off := v.AuxInt
-		sym := v.Aux
+		off := auxIntToInt32(v.AuxInt)
+		sym := auxToSym(v.Aux)
 		ptr := v_0
 		if v_1.Op != OpAMD64TESTLconst {
 			break
 		}
-		c := v_1.AuxInt
+		c := auxIntToInt32(v_1.AuxInt)
 		x := v_1.Args[0]
 		mem := v_2
-		if !(isUint32PowerOfTwo(c)) {
+		if !(isUint32PowerOfTwo(int64(c))) {
 			break
 		}
 		v.reset(OpAMD64SETBstore)
-		v.AuxInt = off
-		v.Aux = sym
+		v.AuxInt = int32ToAuxInt(off)
+		v.Aux = symToAux(sym)
 		v0 := b.NewValue0(v.Pos, OpAMD64BTLconst, types.TypeFlags)
-		v0.AuxInt = log2uint32(c)
+		v0.AuxInt = int8ToAuxInt(int8(log32(c)))
 		v0.AddArg(x)
 		v.AddArg3(ptr, v0, mem)
 		return true
 	}
 	// match: (SETNEstore [off] {sym} ptr (TESTQconst [c] x) mem)
-	// cond: isUint64PowerOfTwo(c)
-	// result: (SETBstore [off] {sym} ptr (BTQconst [log2(c)] x) mem)
+	// cond: isUint64PowerOfTwo(int64(c))
+	// result: (SETBstore [off] {sym} ptr (BTQconst [int8(log32(c))] x) mem)
 	for {
-		off := v.AuxInt
-		sym := v.Aux
+		off := auxIntToInt32(v.AuxInt)
+		sym := auxToSym(v.Aux)
 		ptr := v_0
 		if v_1.Op != OpAMD64TESTQconst {
 			break
 		}
-		c := v_1.AuxInt
+		c := auxIntToInt32(v_1.AuxInt)
 		x := v_1.Args[0]
 		mem := v_2
-		if !(isUint64PowerOfTwo(c)) {
+		if !(isUint64PowerOfTwo(int64(c))) {
 			break
 		}
 		v.reset(OpAMD64SETBstore)
-		v.AuxInt = off
-		v.Aux = sym
+		v.AuxInt = int32ToAuxInt(off)
+		v.Aux = symToAux(sym)
 		v0 := b.NewValue0(v.Pos, OpAMD64BTQconst, types.TypeFlags)
-		v0.AuxInt = log2(c)
+		v0.AuxInt = int8ToAuxInt(int8(log32(c)))
 		v0.AddArg(x)
 		v.AddArg3(ptr, v0, mem)
 		return true
 	}
 	// match: (SETNEstore [off] {sym} ptr (TESTQ (MOVQconst [c]) x) mem)
 	// cond: isUint64PowerOfTwo(c)
-	// result: (SETBstore [off] {sym} ptr (BTQconst [log2(c)] x) mem)
+	// result: (SETBstore [off] {sym} ptr (BTQconst [int8(log2(c))] x) mem)
 	for {
-		off := v.AuxInt
-		sym := v.Aux
+		off := auxIntToInt32(v.AuxInt)
+		sym := auxToSym(v.Aux)
 		ptr := v_0
 		if v_1.Op != OpAMD64TESTQ {
 			break
@@ -24854,17 +24815,17 @@
 			if v_1_0.Op != OpAMD64MOVQconst {
 				continue
 			}
-			c := v_1_0.AuxInt
+			c := auxIntToInt64(v_1_0.AuxInt)
 			x := v_1_1
 			mem := v_2
 			if !(isUint64PowerOfTwo(c)) {
 				continue
 			}
 			v.reset(OpAMD64SETBstore)
-			v.AuxInt = off
-			v.Aux = sym
+			v.AuxInt = int32ToAuxInt(off)
+			v.Aux = symToAux(sym)
 			v0 := b.NewValue0(v.Pos, OpAMD64BTQconst, types.TypeFlags)
-			v0.AuxInt = log2(c)
+			v0.AuxInt = int8ToAuxInt(int8(log2(c)))
 			v0.AddArg(x)
 			v.AddArg3(ptr, v0, mem)
 			return true
@@ -24874,22 +24835,22 @@
 	// match: (SETNEstore [off] {sym} ptr (CMPLconst [1] s:(ANDLconst [1] _)) mem)
 	// result: (SETEQstore [off] {sym} ptr (CMPLconst [0] s) mem)
 	for {
-		off := v.AuxInt
-		sym := v.Aux
+		off := auxIntToInt32(v.AuxInt)
+		sym := auxToSym(v.Aux)
 		ptr := v_0
-		if v_1.Op != OpAMD64CMPLconst || v_1.AuxInt != 1 {
+		if v_1.Op != OpAMD64CMPLconst || auxIntToInt32(v_1.AuxInt) != 1 {
 			break
 		}
 		s := v_1.Args[0]
-		if s.Op != OpAMD64ANDLconst || s.AuxInt != 1 {
+		if s.Op != OpAMD64ANDLconst || auxIntToInt32(s.AuxInt) != 1 {
 			break
 		}
 		mem := v_2
 		v.reset(OpAMD64SETEQstore)
-		v.AuxInt = off
-		v.Aux = sym
+		v.AuxInt = int32ToAuxInt(off)
+		v.Aux = symToAux(sym)
 		v0 := b.NewValue0(v.Pos, OpAMD64CMPLconst, types.TypeFlags)
-		v0.AuxInt = 0
+		v0.AuxInt = int32ToAuxInt(0)
 		v0.AddArg(s)
 		v.AddArg3(ptr, v0, mem)
 		return true
@@ -24897,22 +24858,22 @@
 	// match: (SETNEstore [off] {sym} ptr (CMPQconst [1] s:(ANDQconst [1] _)) mem)
 	// result: (SETEQstore [off] {sym} ptr (CMPQconst [0] s) mem)
 	for {
-		off := v.AuxInt
-		sym := v.Aux
+		off := auxIntToInt32(v.AuxInt)
+		sym := auxToSym(v.Aux)
 		ptr := v_0
-		if v_1.Op != OpAMD64CMPQconst || v_1.AuxInt != 1 {
+		if v_1.Op != OpAMD64CMPQconst || auxIntToInt32(v_1.AuxInt) != 1 {
 			break
 		}
 		s := v_1.Args[0]
-		if s.Op != OpAMD64ANDQconst || s.AuxInt != 1 {
+		if s.Op != OpAMD64ANDQconst || auxIntToInt32(s.AuxInt) != 1 {
 			break
 		}
 		mem := v_2
 		v.reset(OpAMD64SETEQstore)
-		v.AuxInt = off
-		v.Aux = sym
+		v.AuxInt = int32ToAuxInt(off)
+		v.Aux = symToAux(sym)
 		v0 := b.NewValue0(v.Pos, OpAMD64CMPQconst, types.TypeFlags)
-		v0.AuxInt = 0
+		v0.AuxInt = int32ToAuxInt(0)
 		v0.AddArg(s)
 		v.AddArg3(ptr, v0, mem)
 		return true
@@ -24921,8 +24882,8 @@
 	// cond: z1==z2
 	// result: (SETBstore [off] {sym} ptr (BTQconst [63] x) mem)
 	for {
-		off := v.AuxInt
-		sym := v.Aux
+		off := auxIntToInt32(v.AuxInt)
+		sym := auxToSym(v.Aux)
 		ptr := v_0
 		if v_1.Op != OpAMD64TESTQ {
 			break
@@ -24932,11 +24893,11 @@
 		v_1_1 := v_1.Args[1]
 		for _i0 := 0; _i0 <= 1; _i0, v_1_0, v_1_1 = _i0+1, v_1_1, v_1_0 {
 			z1 := v_1_0
-			if z1.Op != OpAMD64SHLQconst || z1.AuxInt != 63 {
+			if z1.Op != OpAMD64SHLQconst || auxIntToInt8(z1.AuxInt) != 63 {
 				continue
 			}
 			z1_0 := z1.Args[0]
-			if z1_0.Op != OpAMD64SHRQconst || z1_0.AuxInt != 63 {
+			if z1_0.Op != OpAMD64SHRQconst || auxIntToInt8(z1_0.AuxInt) != 63 {
 				continue
 			}
 			x := z1_0.Args[0]
@@ -24946,10 +24907,10 @@
 				continue
 			}
 			v.reset(OpAMD64SETBstore)
-			v.AuxInt = off
-			v.Aux = sym
+			v.AuxInt = int32ToAuxInt(off)
+			v.Aux = symToAux(sym)
 			v0 := b.NewValue0(v.Pos, OpAMD64BTQconst, types.TypeFlags)
-			v0.AuxInt = 63
+			v0.AuxInt = int8ToAuxInt(63)
 			v0.AddArg(x)
 			v.AddArg3(ptr, v0, mem)
 			return true
@@ -24960,8 +24921,8 @@
 	// cond: z1==z2
 	// result: (SETBstore [off] {sym} ptr (BTLconst [31] x) mem)
 	for {
-		off := v.AuxInt
-		sym := v.Aux
+		off := auxIntToInt32(v.AuxInt)
+		sym := auxToSym(v.Aux)
 		ptr := v_0
 		if v_1.Op != OpAMD64TESTL {
 			break
@@ -24971,11 +24932,11 @@
 		v_1_1 := v_1.Args[1]
 		for _i0 := 0; _i0 <= 1; _i0, v_1_0, v_1_1 = _i0+1, v_1_1, v_1_0 {
 			z1 := v_1_0
-			if z1.Op != OpAMD64SHLLconst || z1.AuxInt != 31 {
+			if z1.Op != OpAMD64SHLLconst || auxIntToInt8(z1.AuxInt) != 31 {
 				continue
 			}
 			z1_0 := z1.Args[0]
-			if z1_0.Op != OpAMD64SHRLconst || z1_0.AuxInt != 31 {
+			if z1_0.Op != OpAMD64SHRLconst || auxIntToInt8(z1_0.AuxInt) != 31 {
 				continue
 			}
 			x := z1_0.Args[0]
@@ -24985,10 +24946,10 @@
 				continue
 			}
 			v.reset(OpAMD64SETBstore)
-			v.AuxInt = off
-			v.Aux = sym
+			v.AuxInt = int32ToAuxInt(off)
+			v.Aux = symToAux(sym)
 			v0 := b.NewValue0(v.Pos, OpAMD64BTLconst, types.TypeFlags)
-			v0.AuxInt = 31
+			v0.AuxInt = int8ToAuxInt(31)
 			v0.AddArg(x)
 			v.AddArg3(ptr, v0, mem)
 			return true
@@ -24999,8 +24960,8 @@
 	// cond: z1==z2
 	// result: (SETBstore [off] {sym} ptr (BTQconst [0] x) mem)
 	for {
-		off := v.AuxInt
-		sym := v.Aux
+		off := auxIntToInt32(v.AuxInt)
+		sym := auxToSym(v.Aux)
 		ptr := v_0
 		if v_1.Op != OpAMD64TESTQ {
 			break
@@ -25010,11 +24971,11 @@
 		v_1_1 := v_1.Args[1]
 		for _i0 := 0; _i0 <= 1; _i0, v_1_0, v_1_1 = _i0+1, v_1_1, v_1_0 {
 			z1 := v_1_0
-			if z1.Op != OpAMD64SHRQconst || z1.AuxInt != 63 {
+			if z1.Op != OpAMD64SHRQconst || auxIntToInt8(z1.AuxInt) != 63 {
 				continue
 			}
 			z1_0 := z1.Args[0]
-			if z1_0.Op != OpAMD64SHLQconst || z1_0.AuxInt != 63 {
+			if z1_0.Op != OpAMD64SHLQconst || auxIntToInt8(z1_0.AuxInt) != 63 {
 				continue
 			}
 			x := z1_0.Args[0]
@@ -25024,10 +24985,10 @@
 				continue
 			}
 			v.reset(OpAMD64SETBstore)
-			v.AuxInt = off
-			v.Aux = sym
+			v.AuxInt = int32ToAuxInt(off)
+			v.Aux = symToAux(sym)
 			v0 := b.NewValue0(v.Pos, OpAMD64BTQconst, types.TypeFlags)
-			v0.AuxInt = 0
+			v0.AuxInt = int8ToAuxInt(0)
 			v0.AddArg(x)
 			v.AddArg3(ptr, v0, mem)
 			return true
@@ -25038,8 +24999,8 @@
 	// cond: z1==z2
 	// result: (SETBstore [off] {sym} ptr (BTLconst [0] x) mem)
 	for {
-		off := v.AuxInt
-		sym := v.Aux
+		off := auxIntToInt32(v.AuxInt)
+		sym := auxToSym(v.Aux)
 		ptr := v_0
 		if v_1.Op != OpAMD64TESTL {
 			break
@@ -25049,11 +25010,11 @@
 		v_1_1 := v_1.Args[1]
 		for _i0 := 0; _i0 <= 1; _i0, v_1_0, v_1_1 = _i0+1, v_1_1, v_1_0 {
 			z1 := v_1_0
-			if z1.Op != OpAMD64SHRLconst || z1.AuxInt != 31 {
+			if z1.Op != OpAMD64SHRLconst || auxIntToInt8(z1.AuxInt) != 31 {
 				continue
 			}
 			z1_0 := z1.Args[0]
-			if z1_0.Op != OpAMD64SHLLconst || z1_0.AuxInt != 31 {
+			if z1_0.Op != OpAMD64SHLLconst || auxIntToInt8(z1_0.AuxInt) != 31 {
 				continue
 			}
 			x := z1_0.Args[0]
@@ -25063,10 +25024,10 @@
 				continue
 			}
 			v.reset(OpAMD64SETBstore)
-			v.AuxInt = off
-			v.Aux = sym
+			v.AuxInt = int32ToAuxInt(off)
+			v.Aux = symToAux(sym)
 			v0 := b.NewValue0(v.Pos, OpAMD64BTLconst, types.TypeFlags)
-			v0.AuxInt = 0
+			v0.AuxInt = int8ToAuxInt(0)
 			v0.AddArg(x)
 			v.AddArg3(ptr, v0, mem)
 			return true
@@ -25077,8 +25038,8 @@
 	// cond: z1==z2
 	// result: (SETBstore [off] {sym} ptr (BTQconst [63] x) mem)
 	for {
-		off := v.AuxInt
-		sym := v.Aux
+		off := auxIntToInt32(v.AuxInt)
+		sym := auxToSym(v.Aux)
 		ptr := v_0
 		if v_1.Op != OpAMD64TESTQ {
 			break
@@ -25088,7 +25049,7 @@
 		v_1_1 := v_1.Args[1]
 		for _i0 := 0; _i0 <= 1; _i0, v_1_0, v_1_1 = _i0+1, v_1_1, v_1_0 {
 			z1 := v_1_0
-			if z1.Op != OpAMD64SHRQconst || z1.AuxInt != 63 {
+			if z1.Op != OpAMD64SHRQconst || auxIntToInt8(z1.AuxInt) != 63 {
 				continue
 			}
 			x := z1.Args[0]
@@ -25098,10 +25059,10 @@
 				continue
 			}
 			v.reset(OpAMD64SETBstore)
-			v.AuxInt = off
-			v.Aux = sym
+			v.AuxInt = int32ToAuxInt(off)
+			v.Aux = symToAux(sym)
 			v0 := b.NewValue0(v.Pos, OpAMD64BTQconst, types.TypeFlags)
-			v0.AuxInt = 63
+			v0.AuxInt = int8ToAuxInt(63)
 			v0.AddArg(x)
 			v.AddArg3(ptr, v0, mem)
 			return true
@@ -25112,8 +25073,8 @@
 	// cond: z1==z2
 	// result: (SETBstore [off] {sym} ptr (BTLconst [31] x) mem)
 	for {
-		off := v.AuxInt
-		sym := v.Aux
+		off := auxIntToInt32(v.AuxInt)
+		sym := auxToSym(v.Aux)
 		ptr := v_0
 		if v_1.Op != OpAMD64TESTL {
 			break
@@ -25123,7 +25084,7 @@
 		v_1_1 := v_1.Args[1]
 		for _i0 := 0; _i0 <= 1; _i0, v_1_0, v_1_1 = _i0+1, v_1_1, v_1_0 {
 			z1 := v_1_0
-			if z1.Op != OpAMD64SHRLconst || z1.AuxInt != 31 {
+			if z1.Op != OpAMD64SHRLconst || auxIntToInt8(z1.AuxInt) != 31 {
 				continue
 			}
 			x := z1.Args[0]
@@ -25133,10 +25094,10 @@
 				continue
 			}
 			v.reset(OpAMD64SETBstore)
-			v.AuxInt = off
-			v.Aux = sym
+			v.AuxInt = int32ToAuxInt(off)
+			v.Aux = symToAux(sym)
 			v0 := b.NewValue0(v.Pos, OpAMD64BTLconst, types.TypeFlags)
-			v0.AuxInt = 31
+			v0.AuxInt = int8ToAuxInt(31)
 			v0.AddArg(x)
 			v.AddArg3(ptr, v0, mem)
 			return true
@@ -25146,8 +25107,8 @@
 	// match: (SETNEstore [off] {sym} ptr (InvertFlags x) mem)
 	// result: (SETNEstore [off] {sym} ptr x mem)
 	for {
-		off := v.AuxInt
-		sym := v.Aux
+		off := auxIntToInt32(v.AuxInt)
+		sym := auxToSym(v.Aux)
 		ptr := v_0
 		if v_1.Op != OpAMD64InvertFlags {
 			break
@@ -25155,30 +25116,30 @@
 		x := v_1.Args[0]
 		mem := v_2
 		v.reset(OpAMD64SETNEstore)
-		v.AuxInt = off
-		v.Aux = sym
+		v.AuxInt = int32ToAuxInt(off)
+		v.Aux = symToAux(sym)
 		v.AddArg3(ptr, x, mem)
 		return true
 	}
 	// match: (SETNEstore [off1] {sym} (ADDQconst [off2] base) val mem)
-	// cond: is32Bit(off1+off2)
+	// cond: is32Bit(int64(off1)+int64(off2))
 	// result: (SETNEstore [off1+off2] {sym} base val mem)
 	for {
-		off1 := v.AuxInt
-		sym := v.Aux
+		off1 := auxIntToInt32(v.AuxInt)
+		sym := auxToSym(v.Aux)
 		if v_0.Op != OpAMD64ADDQconst {
 			break
 		}
-		off2 := v_0.AuxInt
+		off2 := auxIntToInt32(v_0.AuxInt)
 		base := v_0.Args[0]
 		val := v_1
 		mem := v_2
-		if !(is32Bit(off1 + off2)) {
+		if !(is32Bit(int64(off1) + int64(off2))) {
 			break
 		}
 		v.reset(OpAMD64SETNEstore)
-		v.AuxInt = off1 + off2
-		v.Aux = sym
+		v.AuxInt = int32ToAuxInt(off1 + off2)
+		v.Aux = symToAux(sym)
 		v.AddArg3(base, val, mem)
 		return true
 	}
@@ -25302,28 +25263,28 @@
 	v_0 := v.Args[0]
 	b := v.Block
 	// match: (SHLL x (MOVQconst [c]))
-	// result: (SHLLconst [c&31] x)
+	// result: (SHLLconst [int8(c&31)] x)
 	for {
 		x := v_0
 		if v_1.Op != OpAMD64MOVQconst {
 			break
 		}
-		c := v_1.AuxInt
+		c := auxIntToInt64(v_1.AuxInt)
 		v.reset(OpAMD64SHLLconst)
-		v.AuxInt = c & 31
+		v.AuxInt = int8ToAuxInt(int8(c & 31))
 		v.AddArg(x)
 		return true
 	}
 	// match: (SHLL x (MOVLconst [c]))
-	// result: (SHLLconst [c&31] x)
+	// result: (SHLLconst [int8(c&31)] x)
 	for {
 		x := v_0
 		if v_1.Op != OpAMD64MOVLconst {
 			break
 		}
-		c := v_1.AuxInt
+		c := auxIntToInt32(v_1.AuxInt)
 		v.reset(OpAMD64SHLLconst)
-		v.AuxInt = c & 31
+		v.AuxInt = int8ToAuxInt(int8(c & 31))
 		v.AddArg(x)
 		return true
 	}
@@ -25335,7 +25296,7 @@
 		if v_1.Op != OpAMD64ADDQconst {
 			break
 		}
-		c := v_1.AuxInt
+		c := auxIntToInt32(v_1.AuxInt)
 		y := v_1.Args[0]
 		if !(c&31 == 0) {
 			break
@@ -25357,7 +25318,7 @@
 		if v_1_0.Op != OpAMD64ADDQconst {
 			break
 		}
-		c := v_1_0.AuxInt
+		c := auxIntToInt32(v_1_0.AuxInt)
 		y := v_1_0.Args[0]
 		if !(c&31 == 0) {
 			break
@@ -25376,7 +25337,7 @@
 		if v_1.Op != OpAMD64ANDQconst {
 			break
 		}
-		c := v_1.AuxInt
+		c := auxIntToInt32(v_1.AuxInt)
 		y := v_1.Args[0]
 		if !(c&31 == 31) {
 			break
@@ -25398,7 +25359,7 @@
 		if v_1_0.Op != OpAMD64ANDQconst {
 			break
 		}
-		c := v_1_0.AuxInt
+		c := auxIntToInt32(v_1_0.AuxInt)
 		y := v_1_0.Args[0]
 		if !(c&31 == 31) {
 			break
@@ -25417,7 +25378,7 @@
 		if v_1.Op != OpAMD64ADDLconst {
 			break
 		}
-		c := v_1.AuxInt
+		c := auxIntToInt32(v_1.AuxInt)
 		y := v_1.Args[0]
 		if !(c&31 == 0) {
 			break
@@ -25439,7 +25400,7 @@
 		if v_1_0.Op != OpAMD64ADDLconst {
 			break
 		}
-		c := v_1_0.AuxInt
+		c := auxIntToInt32(v_1_0.AuxInt)
 		y := v_1_0.Args[0]
 		if !(c&31 == 0) {
 			break
@@ -25458,7 +25419,7 @@
 		if v_1.Op != OpAMD64ANDLconst {
 			break
 		}
-		c := v_1.AuxInt
+		c := auxIntToInt32(v_1.AuxInt)
 		y := v_1.Args[0]
 		if !(c&31 == 31) {
 			break
@@ -25480,7 +25441,7 @@
 		if v_1_0.Op != OpAMD64ANDLconst {
 			break
 		}
-		c := v_1_0.AuxInt
+		c := auxIntToInt32(v_1_0.AuxInt)
 		y := v_1_0.Args[0]
 		if !(c&31 == 31) {
 			break
@@ -25498,19 +25459,19 @@
 	// match: (SHLLconst [1] (SHRLconst [1] x))
 	// result: (BTRLconst [0] x)
 	for {
-		if v.AuxInt != 1 || v_0.Op != OpAMD64SHRLconst || v_0.AuxInt != 1 {
+		if auxIntToInt8(v.AuxInt) != 1 || v_0.Op != OpAMD64SHRLconst || auxIntToInt8(v_0.AuxInt) != 1 {
 			break
 		}
 		x := v_0.Args[0]
 		v.reset(OpAMD64BTRLconst)
-		v.AuxInt = 0
+		v.AuxInt = int8ToAuxInt(0)
 		v.AddArg(x)
 		return true
 	}
 	// match: (SHLLconst x [0])
 	// result: x
 	for {
-		if v.AuxInt != 0 {
+		if auxIntToInt8(v.AuxInt) != 0 {
 			break
 		}
 		x := v_0
@@ -25536,28 +25497,28 @@
 	v_0 := v.Args[0]
 	b := v.Block
 	// match: (SHLQ x (MOVQconst [c]))
-	// result: (SHLQconst [c&63] x)
+	// result: (SHLQconst [int8(c&63)] x)
 	for {
 		x := v_0
 		if v_1.Op != OpAMD64MOVQconst {
 			break
 		}
-		c := v_1.AuxInt
+		c := auxIntToInt64(v_1.AuxInt)
 		v.reset(OpAMD64SHLQconst)
-		v.AuxInt = c & 63
+		v.AuxInt = int8ToAuxInt(int8(c & 63))
 		v.AddArg(x)
 		return true
 	}
 	// match: (SHLQ x (MOVLconst [c]))
-	// result: (SHLQconst [c&63] x)
+	// result: (SHLQconst [int8(c&63)] x)
 	for {
 		x := v_0
 		if v_1.Op != OpAMD64MOVLconst {
 			break
 		}
-		c := v_1.AuxInt
+		c := auxIntToInt32(v_1.AuxInt)
 		v.reset(OpAMD64SHLQconst)
-		v.AuxInt = c & 63
+		v.AuxInt = int8ToAuxInt(int8(c & 63))
 		v.AddArg(x)
 		return true
 	}
@@ -25569,7 +25530,7 @@
 		if v_1.Op != OpAMD64ADDQconst {
 			break
 		}
-		c := v_1.AuxInt
+		c := auxIntToInt32(v_1.AuxInt)
 		y := v_1.Args[0]
 		if !(c&63 == 0) {
 			break
@@ -25591,7 +25552,7 @@
 		if v_1_0.Op != OpAMD64ADDQconst {
 			break
 		}
-		c := v_1_0.AuxInt
+		c := auxIntToInt32(v_1_0.AuxInt)
 		y := v_1_0.Args[0]
 		if !(c&63 == 0) {
 			break
@@ -25610,7 +25571,7 @@
 		if v_1.Op != OpAMD64ANDQconst {
 			break
 		}
-		c := v_1.AuxInt
+		c := auxIntToInt32(v_1.AuxInt)
 		y := v_1.Args[0]
 		if !(c&63 == 63) {
 			break
@@ -25632,7 +25593,7 @@
 		if v_1_0.Op != OpAMD64ANDQconst {
 			break
 		}
-		c := v_1_0.AuxInt
+		c := auxIntToInt32(v_1_0.AuxInt)
 		y := v_1_0.Args[0]
 		if !(c&63 == 63) {
 			break
@@ -25651,7 +25612,7 @@
 		if v_1.Op != OpAMD64ADDLconst {
 			break
 		}
-		c := v_1.AuxInt
+		c := auxIntToInt32(v_1.AuxInt)
 		y := v_1.Args[0]
 		if !(c&63 == 0) {
 			break
@@ -25673,7 +25634,7 @@
 		if v_1_0.Op != OpAMD64ADDLconst {
 			break
 		}
-		c := v_1_0.AuxInt
+		c := auxIntToInt32(v_1_0.AuxInt)
 		y := v_1_0.Args[0]
 		if !(c&63 == 0) {
 			break
@@ -25692,7 +25653,7 @@
 		if v_1.Op != OpAMD64ANDLconst {
 			break
 		}
-		c := v_1.AuxInt
+		c := auxIntToInt32(v_1.AuxInt)
 		y := v_1.Args[0]
 		if !(c&63 == 63) {
 			break
@@ -25714,7 +25675,7 @@
 		if v_1_0.Op != OpAMD64ANDLconst {
 			break
 		}
-		c := v_1_0.AuxInt
+		c := auxIntToInt32(v_1_0.AuxInt)
 		y := v_1_0.Args[0]
 		if !(c&63 == 63) {
 			break
@@ -25732,19 +25693,19 @@
 	// match: (SHLQconst [1] (SHRQconst [1] x))
 	// result: (BTRQconst [0] x)
 	for {
-		if v.AuxInt != 1 || v_0.Op != OpAMD64SHRQconst || v_0.AuxInt != 1 {
+		if auxIntToInt8(v.AuxInt) != 1 || v_0.Op != OpAMD64SHRQconst || auxIntToInt8(v_0.AuxInt) != 1 {
 			break
 		}
 		x := v_0.Args[0]
 		v.reset(OpAMD64BTRQconst)
-		v.AuxInt = 0
+		v.AuxInt = int8ToAuxInt(0)
 		v.AddArg(x)
 		return true
 	}
 	// match: (SHLQconst x [0])
 	// result: x
 	for {
-		if v.AuxInt != 0 {
+		if auxIntToInt8(v.AuxInt) != 0 {
 			break
 		}
 		x := v_0
@@ -25782,35 +25743,35 @@
 	v_0 := v.Args[0]
 	// match: (SHRB x (MOVQconst [c]))
 	// cond: c&31 < 8
-	// result: (SHRBconst [c&31] x)
+	// result: (SHRBconst [int8(c&31)] x)
 	for {
 		x := v_0
 		if v_1.Op != OpAMD64MOVQconst {
 			break
 		}
-		c := v_1.AuxInt
+		c := auxIntToInt64(v_1.AuxInt)
 		if !(c&31 < 8) {
 			break
 		}
 		v.reset(OpAMD64SHRBconst)
-		v.AuxInt = c & 31
+		v.AuxInt = int8ToAuxInt(int8(c & 31))
 		v.AddArg(x)
 		return true
 	}
 	// match: (SHRB x (MOVLconst [c]))
 	// cond: c&31 < 8
-	// result: (SHRBconst [c&31] x)
+	// result: (SHRBconst [int8(c&31)] x)
 	for {
 		x := v_0
 		if v_1.Op != OpAMD64MOVLconst {
 			break
 		}
-		c := v_1.AuxInt
+		c := auxIntToInt32(v_1.AuxInt)
 		if !(c&31 < 8) {
 			break
 		}
 		v.reset(OpAMD64SHRBconst)
-		v.AuxInt = c & 31
+		v.AuxInt = int8ToAuxInt(int8(c & 31))
 		v.AddArg(x)
 		return true
 	}
@@ -25821,12 +25782,12 @@
 		if v_1.Op != OpAMD64MOVQconst {
 			break
 		}
-		c := v_1.AuxInt
+		c := auxIntToInt64(v_1.AuxInt)
 		if !(c&31 >= 8) {
 			break
 		}
 		v.reset(OpAMD64MOVLconst)
-		v.AuxInt = 0
+		v.AuxInt = int32ToAuxInt(0)
 		return true
 	}
 	// match: (SHRB _ (MOVLconst [c]))
@@ -25836,12 +25797,12 @@
 		if v_1.Op != OpAMD64MOVLconst {
 			break
 		}
-		c := v_1.AuxInt
+		c := auxIntToInt32(v_1.AuxInt)
 		if !(c&31 >= 8) {
 			break
 		}
 		v.reset(OpAMD64MOVLconst)
-		v.AuxInt = 0
+		v.AuxInt = int32ToAuxInt(0)
 		return true
 	}
 	return false
@@ -25851,7 +25812,7 @@
 	// match: (SHRBconst x [0])
 	// result: x
 	for {
-		if v.AuxInt != 0 {
+		if auxIntToInt8(v.AuxInt) != 0 {
 			break
 		}
 		x := v_0
@@ -25865,28 +25826,28 @@
 	v_0 := v.Args[0]
 	b := v.Block
 	// match: (SHRL x (MOVQconst [c]))
-	// result: (SHRLconst [c&31] x)
+	// result: (SHRLconst [int8(c&31)] x)
 	for {
 		x := v_0
 		if v_1.Op != OpAMD64MOVQconst {
 			break
 		}
-		c := v_1.AuxInt
+		c := auxIntToInt64(v_1.AuxInt)
 		v.reset(OpAMD64SHRLconst)
-		v.AuxInt = c & 31
+		v.AuxInt = int8ToAuxInt(int8(c & 31))
 		v.AddArg(x)
 		return true
 	}
 	// match: (SHRL x (MOVLconst [c]))
-	// result: (SHRLconst [c&31] x)
+	// result: (SHRLconst [int8(c&31)] x)
 	for {
 		x := v_0
 		if v_1.Op != OpAMD64MOVLconst {
 			break
 		}
-		c := v_1.AuxInt
+		c := auxIntToInt32(v_1.AuxInt)
 		v.reset(OpAMD64SHRLconst)
-		v.AuxInt = c & 31
+		v.AuxInt = int8ToAuxInt(int8(c & 31))
 		v.AddArg(x)
 		return true
 	}
@@ -25898,7 +25859,7 @@
 		if v_1.Op != OpAMD64ADDQconst {
 			break
 		}
-		c := v_1.AuxInt
+		c := auxIntToInt32(v_1.AuxInt)
 		y := v_1.Args[0]
 		if !(c&31 == 0) {
 			break
@@ -25920,7 +25881,7 @@
 		if v_1_0.Op != OpAMD64ADDQconst {
 			break
 		}
-		c := v_1_0.AuxInt
+		c := auxIntToInt32(v_1_0.AuxInt)
 		y := v_1_0.Args[0]
 		if !(c&31 == 0) {
 			break
@@ -25939,7 +25900,7 @@
 		if v_1.Op != OpAMD64ANDQconst {
 			break
 		}
-		c := v_1.AuxInt
+		c := auxIntToInt32(v_1.AuxInt)
 		y := v_1.Args[0]
 		if !(c&31 == 31) {
 			break
@@ -25961,7 +25922,7 @@
 		if v_1_0.Op != OpAMD64ANDQconst {
 			break
 		}
-		c := v_1_0.AuxInt
+		c := auxIntToInt32(v_1_0.AuxInt)
 		y := v_1_0.Args[0]
 		if !(c&31 == 31) {
 			break
@@ -25980,7 +25941,7 @@
 		if v_1.Op != OpAMD64ADDLconst {
 			break
 		}
-		c := v_1.AuxInt
+		c := auxIntToInt32(v_1.AuxInt)
 		y := v_1.Args[0]
 		if !(c&31 == 0) {
 			break
@@ -26002,7 +25963,7 @@
 		if v_1_0.Op != OpAMD64ADDLconst {
 			break
 		}
-		c := v_1_0.AuxInt
+		c := auxIntToInt32(v_1_0.AuxInt)
 		y := v_1_0.Args[0]
 		if !(c&31 == 0) {
 			break
@@ -26021,7 +25982,7 @@
 		if v_1.Op != OpAMD64ANDLconst {
 			break
 		}
-		c := v_1.AuxInt
+		c := auxIntToInt32(v_1.AuxInt)
 		y := v_1.Args[0]
 		if !(c&31 == 31) {
 			break
@@ -26043,7 +26004,7 @@
 		if v_1_0.Op != OpAMD64ANDLconst {
 			break
 		}
-		c := v_1_0.AuxInt
+		c := auxIntToInt32(v_1_0.AuxInt)
 		y := v_1_0.Args[0]
 		if !(c&31 == 31) {
 			break
@@ -26061,19 +26022,19 @@
 	// match: (SHRLconst [1] (SHLLconst [1] x))
 	// result: (BTRLconst [31] x)
 	for {
-		if v.AuxInt != 1 || v_0.Op != OpAMD64SHLLconst || v_0.AuxInt != 1 {
+		if auxIntToInt8(v.AuxInt) != 1 || v_0.Op != OpAMD64SHLLconst || auxIntToInt8(v_0.AuxInt) != 1 {
 			break
 		}
 		x := v_0.Args[0]
 		v.reset(OpAMD64BTRLconst)
-		v.AuxInt = 31
+		v.AuxInt = int8ToAuxInt(31)
 		v.AddArg(x)
 		return true
 	}
 	// match: (SHRLconst x [0])
 	// result: x
 	for {
-		if v.AuxInt != 0 {
+		if auxIntToInt8(v.AuxInt) != 0 {
 			break
 		}
 		x := v_0
@@ -26087,28 +26048,28 @@
 	v_0 := v.Args[0]
 	b := v.Block
 	// match: (SHRQ x (MOVQconst [c]))
-	// result: (SHRQconst [c&63] x)
+	// result: (SHRQconst [int8(c&63)] x)
 	for {
 		x := v_0
 		if v_1.Op != OpAMD64MOVQconst {
 			break
 		}
-		c := v_1.AuxInt
+		c := auxIntToInt64(v_1.AuxInt)
 		v.reset(OpAMD64SHRQconst)
-		v.AuxInt = c & 63
+		v.AuxInt = int8ToAuxInt(int8(c & 63))
 		v.AddArg(x)
 		return true
 	}
 	// match: (SHRQ x (MOVLconst [c]))
-	// result: (SHRQconst [c&63] x)
+	// result: (SHRQconst [int8(c&63)] x)
 	for {
 		x := v_0
 		if v_1.Op != OpAMD64MOVLconst {
 			break
 		}
-		c := v_1.AuxInt
+		c := auxIntToInt32(v_1.AuxInt)
 		v.reset(OpAMD64SHRQconst)
-		v.AuxInt = c & 63
+		v.AuxInt = int8ToAuxInt(int8(c & 63))
 		v.AddArg(x)
 		return true
 	}
@@ -26120,7 +26081,7 @@
 		if v_1.Op != OpAMD64ADDQconst {
 			break
 		}
-		c := v_1.AuxInt
+		c := auxIntToInt32(v_1.AuxInt)
 		y := v_1.Args[0]
 		if !(c&63 == 0) {
 			break
@@ -26142,7 +26103,7 @@
 		if v_1_0.Op != OpAMD64ADDQconst {
 			break
 		}
-		c := v_1_0.AuxInt
+		c := auxIntToInt32(v_1_0.AuxInt)
 		y := v_1_0.Args[0]
 		if !(c&63 == 0) {
 			break
@@ -26161,7 +26122,7 @@
 		if v_1.Op != OpAMD64ANDQconst {
 			break
 		}
-		c := v_1.AuxInt
+		c := auxIntToInt32(v_1.AuxInt)
 		y := v_1.Args[0]
 		if !(c&63 == 63) {
 			break
@@ -26183,7 +26144,7 @@
 		if v_1_0.Op != OpAMD64ANDQconst {
 			break
 		}
-		c := v_1_0.AuxInt
+		c := auxIntToInt32(v_1_0.AuxInt)
 		y := v_1_0.Args[0]
 		if !(c&63 == 63) {
 			break
@@ -26202,7 +26163,7 @@
 		if v_1.Op != OpAMD64ADDLconst {
 			break
 		}
-		c := v_1.AuxInt
+		c := auxIntToInt32(v_1.AuxInt)
 		y := v_1.Args[0]
 		if !(c&63 == 0) {
 			break
@@ -26224,7 +26185,7 @@
 		if v_1_0.Op != OpAMD64ADDLconst {
 			break
 		}
-		c := v_1_0.AuxInt
+		c := auxIntToInt32(v_1_0.AuxInt)
 		y := v_1_0.Args[0]
 		if !(c&63 == 0) {
 			break
@@ -26243,7 +26204,7 @@
 		if v_1.Op != OpAMD64ANDLconst {
 			break
 		}
-		c := v_1.AuxInt
+		c := auxIntToInt32(v_1.AuxInt)
 		y := v_1.Args[0]
 		if !(c&63 == 63) {
 			break
@@ -26265,7 +26226,7 @@
 		if v_1_0.Op != OpAMD64ANDLconst {
 			break
 		}
-		c := v_1_0.AuxInt
+		c := auxIntToInt32(v_1_0.AuxInt)
 		y := v_1_0.Args[0]
 		if !(c&63 == 63) {
 			break
@@ -26283,19 +26244,19 @@
 	// match: (SHRQconst [1] (SHLQconst [1] x))
 	// result: (BTRQconst [63] x)
 	for {
-		if v.AuxInt != 1 || v_0.Op != OpAMD64SHLQconst || v_0.AuxInt != 1 {
+		if auxIntToInt8(v.AuxInt) != 1 || v_0.Op != OpAMD64SHLQconst || auxIntToInt8(v_0.AuxInt) != 1 {
 			break
 		}
 		x := v_0.Args[0]
 		v.reset(OpAMD64BTRQconst)
-		v.AuxInt = 63
+		v.AuxInt = int8ToAuxInt(63)
 		v.AddArg(x)
 		return true
 	}
 	// match: (SHRQconst x [0])
 	// result: x
 	for {
-		if v.AuxInt != 0 {
+		if auxIntToInt8(v.AuxInt) != 0 {
 			break
 		}
 		x := v_0
@@ -26309,35 +26270,35 @@
 	v_0 := v.Args[0]
 	// match: (SHRW x (MOVQconst [c]))
 	// cond: c&31 < 16
-	// result: (SHRWconst [c&31] x)
+	// result: (SHRWconst [int8(c&31)] x)
 	for {
 		x := v_0
 		if v_1.Op != OpAMD64MOVQconst {
 			break
 		}
-		c := v_1.AuxInt
+		c := auxIntToInt64(v_1.AuxInt)
 		if !(c&31 < 16) {
 			break
 		}
 		v.reset(OpAMD64SHRWconst)
-		v.AuxInt = c & 31
+		v.AuxInt = int8ToAuxInt(int8(c & 31))
 		v.AddArg(x)
 		return true
 	}
 	// match: (SHRW x (MOVLconst [c]))
 	// cond: c&31 < 16
-	// result: (SHRWconst [c&31] x)
+	// result: (SHRWconst [int8(c&31)] x)
 	for {
 		x := v_0
 		if v_1.Op != OpAMD64MOVLconst {
 			break
 		}
-		c := v_1.AuxInt
+		c := auxIntToInt32(v_1.AuxInt)
 		if !(c&31 < 16) {
 			break
 		}
 		v.reset(OpAMD64SHRWconst)
-		v.AuxInt = c & 31
+		v.AuxInt = int8ToAuxInt(int8(c & 31))
 		v.AddArg(x)
 		return true
 	}
@@ -26348,12 +26309,12 @@
 		if v_1.Op != OpAMD64MOVQconst {
 			break
 		}
-		c := v_1.AuxInt
+		c := auxIntToInt64(v_1.AuxInt)
 		if !(c&31 >= 16) {
 			break
 		}
 		v.reset(OpAMD64MOVLconst)
-		v.AuxInt = 0
+		v.AuxInt = int32ToAuxInt(0)
 		return true
 	}
 	// match: (SHRW _ (MOVLconst [c]))
@@ -26363,12 +26324,12 @@
 		if v_1.Op != OpAMD64MOVLconst {
 			break
 		}
-		c := v_1.AuxInt
+		c := auxIntToInt32(v_1.AuxInt)
 		if !(c&31 >= 16) {
 			break
 		}
 		v.reset(OpAMD64MOVLconst)
-		v.AuxInt = 0
+		v.AuxInt = int32ToAuxInt(0)
 		return true
 	}
 	return false
@@ -26378,7 +26339,7 @@
 	// match: (SHRWconst x [0])
 	// result: x
 	for {
-		if v.AuxInt != 0 {
+		if auxIntToInt8(v.AuxInt) != 0 {
 			break
 		}
 		x := v_0
@@ -26398,9 +26359,9 @@
 		if v_1.Op != OpAMD64MOVLconst {
 			break
 		}
-		c := v_1.AuxInt
+		c := auxIntToInt32(v_1.AuxInt)
 		v.reset(OpAMD64SUBLconst)
-		v.AuxInt = c
+		v.AuxInt = int32ToAuxInt(c)
 		v.AddArg(x)
 		return true
 	}
@@ -26410,11 +26371,11 @@
 		if v_0.Op != OpAMD64MOVLconst {
 			break
 		}
-		c := v_0.AuxInt
+		c := auxIntToInt32(v_0.AuxInt)
 		x := v_1
 		v.reset(OpAMD64NEGL)
 		v0 := b.NewValue0(v.Pos, OpAMD64SUBLconst, v.Type)
-		v0.AuxInt = c
+		v0.AuxInt = int32ToAuxInt(c)
 		v0.AddArg(x)
 		v.AddArg(v0)
 		return true
@@ -26486,24 +26447,24 @@
 	b := v.Block
 	typ := &b.Func.Config.Types
 	// match: (SUBLload [off1] {sym} val (ADDQconst [off2] base) mem)
-	// cond: is32Bit(off1+off2)
+	// cond: is32Bit(int64(off1)+int64(off2))
 	// result: (SUBLload [off1+off2] {sym} val base mem)
 	for {
-		off1 := v.AuxInt
-		sym := v.Aux
+		off1 := auxIntToInt32(v.AuxInt)
+		sym := auxToSym(v.Aux)
 		val := v_0
 		if v_1.Op != OpAMD64ADDQconst {
 			break
 		}
-		off2 := v_1.AuxInt
+		off2 := auxIntToInt32(v_1.AuxInt)
 		base := v_1.Args[0]
 		mem := v_2
-		if !(is32Bit(off1 + off2)) {
+		if !(is32Bit(int64(off1) + int64(off2))) {
 			break
 		}
 		v.reset(OpAMD64SUBLload)
-		v.AuxInt = off1 + off2
-		v.Aux = sym
+		v.AuxInt = int32ToAuxInt(off1 + off2)
+		v.Aux = symToAux(sym)
 		v.AddArg3(val, base, mem)
 		return true
 	}
@@ -26557,24 +26518,24 @@
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
 	// match: (SUBLmodify [off1] {sym} (ADDQconst [off2] base) val mem)
-	// cond: is32Bit(off1+off2)
+	// cond: is32Bit(int64(off1)+int64(off2))
 	// result: (SUBLmodify [off1+off2] {sym} base val mem)
 	for {
-		off1 := v.AuxInt
-		sym := v.Aux
+		off1 := auxIntToInt32(v.AuxInt)
+		sym := auxToSym(v.Aux)
 		if v_0.Op != OpAMD64ADDQconst {
 			break
 		}
-		off2 := v_0.AuxInt
+		off2 := auxIntToInt32(v_0.AuxInt)
 		base := v_0.Args[0]
 		val := v_1
 		mem := v_2
-		if !(is32Bit(off1 + off2)) {
+		if !(is32Bit(int64(off1) + int64(off2))) {
 			break
 		}
 		v.reset(OpAMD64SUBLmodify)
-		v.AuxInt = off1 + off2
-		v.Aux = sym
+		v.AuxInt = int32ToAuxInt(off1 + off2)
+		v.Aux = symToAux(sym)
 		v.AddArg3(base, val, mem)
 		return true
 	}
@@ -26609,36 +26570,36 @@
 	b := v.Block
 	// match: (SUBQ x (MOVQconst [c]))
 	// cond: is32Bit(c)
-	// result: (SUBQconst x [c])
+	// result: (SUBQconst x [int32(c)])
 	for {
 		x := v_0
 		if v_1.Op != OpAMD64MOVQconst {
 			break
 		}
-		c := v_1.AuxInt
+		c := auxIntToInt64(v_1.AuxInt)
 		if !(is32Bit(c)) {
 			break
 		}
 		v.reset(OpAMD64SUBQconst)
-		v.AuxInt = c
+		v.AuxInt = int32ToAuxInt(int32(c))
 		v.AddArg(x)
 		return true
 	}
 	// match: (SUBQ (MOVQconst [c]) x)
 	// cond: is32Bit(c)
-	// result: (NEGQ (SUBQconst <v.Type> x [c]))
+	// result: (NEGQ (SUBQconst <v.Type> x [int32(c)]))
 	for {
 		if v_0.Op != OpAMD64MOVQconst {
 			break
 		}
-		c := v_0.AuxInt
+		c := auxIntToInt64(v_0.AuxInt)
 		x := v_1
 		if !(is32Bit(c)) {
 			break
 		}
 		v.reset(OpAMD64NEGQ)
 		v0 := b.NewValue0(v.Pos, OpAMD64SUBQconst, v.Type)
-		v0.AuxInt = c
+		v0.AuxInt = int32ToAuxInt(int32(c))
 		v0.AddArg(x)
 		v.AddArg(v0)
 		return true
@@ -26765,24 +26726,24 @@
 	b := v.Block
 	typ := &b.Func.Config.Types
 	// match: (SUBQload [off1] {sym} val (ADDQconst [off2] base) mem)
-	// cond: is32Bit(off1+off2)
+	// cond: is32Bit(int64(off1)+int64(off2))
 	// result: (SUBQload [off1+off2] {sym} val base mem)
 	for {
-		off1 := v.AuxInt
-		sym := v.Aux
+		off1 := auxIntToInt32(v.AuxInt)
+		sym := auxToSym(v.Aux)
 		val := v_0
 		if v_1.Op != OpAMD64ADDQconst {
 			break
 		}
-		off2 := v_1.AuxInt
+		off2 := auxIntToInt32(v_1.AuxInt)
 		base := v_1.Args[0]
 		mem := v_2
-		if !(is32Bit(off1 + off2)) {
+		if !(is32Bit(int64(off1) + int64(off2))) {
 			break
 		}
 		v.reset(OpAMD64SUBQload)
-		v.AuxInt = off1 + off2
-		v.Aux = sym
+		v.AuxInt = int32ToAuxInt(off1 + off2)
+		v.Aux = symToAux(sym)
 		v.AddArg3(val, base, mem)
 		return true
 	}
@@ -26836,24 +26797,24 @@
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
 	// match: (SUBQmodify [off1] {sym} (ADDQconst [off2] base) val mem)
-	// cond: is32Bit(off1+off2)
+	// cond: is32Bit(int64(off1)+int64(off2))
 	// result: (SUBQmodify [off1+off2] {sym} base val mem)
 	for {
-		off1 := v.AuxInt
-		sym := v.Aux
+		off1 := auxIntToInt32(v.AuxInt)
+		sym := auxToSym(v.Aux)
 		if v_0.Op != OpAMD64ADDQconst {
 			break
 		}
-		off2 := v_0.AuxInt
+		off2 := auxIntToInt32(v_0.AuxInt)
 		base := v_0.Args[0]
 		val := v_1
 		mem := v_2
-		if !(is32Bit(off1 + off2)) {
+		if !(is32Bit(int64(off1) + int64(off2))) {
 			break
 		}
 		v.reset(OpAMD64SUBQmodify)
-		v.AuxInt = off1 + off2
-		v.Aux = sym
+		v.AuxInt = int32ToAuxInt(off1 + off2)
+		v.Aux = symToAux(sym)
 		v.AddArg3(base, val, mem)
 		return true
 	}
@@ -26916,24 +26877,24 @@
 	b := v.Block
 	typ := &b.Func.Config.Types
 	// match: (SUBSDload [off1] {sym} val (ADDQconst [off2] base) mem)
-	// cond: is32Bit(off1+off2)
+	// cond: is32Bit(int64(off1)+int64(off2))
 	// result: (SUBSDload [off1+off2] {sym} val base mem)
 	for {
-		off1 := v.AuxInt
-		sym := v.Aux
+		off1 := auxIntToInt32(v.AuxInt)
+		sym := auxToSym(v.Aux)
 		val := v_0
 		if v_1.Op != OpAMD64ADDQconst {
 			break
 		}
-		off2 := v_1.AuxInt
+		off2 := auxIntToInt32(v_1.AuxInt)
 		base := v_1.Args[0]
 		mem := v_2
-		if !(is32Bit(off1 + off2)) {
+		if !(is32Bit(int64(off1) + int64(off2))) {
 			break
 		}
 		v.reset(OpAMD64SUBSDload)
-		v.AuxInt = off1 + off2
-		v.Aux = sym
+		v.AuxInt = int32ToAuxInt(off1 + off2)
+		v.Aux = symToAux(sym)
 		v.AddArg3(val, base, mem)
 		return true
 	}
@@ -27016,24 +26977,24 @@
 	b := v.Block
 	typ := &b.Func.Config.Types
 	// match: (SUBSSload [off1] {sym} val (ADDQconst [off2] base) mem)
-	// cond: is32Bit(off1+off2)
+	// cond: is32Bit(int64(off1)+int64(off2))
 	// result: (SUBSSload [off1+off2] {sym} val base mem)
 	for {
-		off1 := v.AuxInt
-		sym := v.Aux
+		off1 := auxIntToInt32(v.AuxInt)
+		sym := auxToSym(v.Aux)
 		val := v_0
 		if v_1.Op != OpAMD64ADDQconst {
 			break
 		}
-		off2 := v_1.AuxInt
+		off2 := auxIntToInt32(v_1.AuxInt)
 		base := v_1.Args[0]
 		mem := v_2
-		if !(is32Bit(off1 + off2)) {
+		if !(is32Bit(int64(off1) + int64(off2))) {
 			break
 		}
 		v.reset(OpAMD64SUBSSload)
-		v.AuxInt = off1 + off2
-		v.Aux = sym
+		v.AuxInt = int32ToAuxInt(off1 + off2)
+		v.Aux = symToAux(sym)
 		v.AddArg3(val, base, mem)
 		return true
 	}
@@ -27594,7 +27555,7 @@
 			}
 			y := v_0.Args[1]
 			v_0_0 := v_0.Args[0]
-			if v_0_0.Op != OpAMD64MOVLconst || v_0_0.AuxInt != 1 {
+			if v_0_0.Op != OpAMD64MOVLconst || auxIntToInt32(v_0_0.AuxInt) != 1 {
 				continue
 			}
 			x := v_1
@@ -27605,20 +27566,20 @@
 		break
 	}
 	// match: (XORL (MOVLconst [c]) x)
-	// cond: isUint32PowerOfTwo(c) && uint64(c) >= 128
-	// result: (BTCLconst [log2uint32(c)] x)
+	// cond: isUint32PowerOfTwo(int64(c)) && uint64(c) >= 128
+	// result: (BTCLconst [int8(log32(c))] x)
 	for {
 		for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
 			if v_0.Op != OpAMD64MOVLconst {
 				continue
 			}
-			c := v_0.AuxInt
+			c := auxIntToInt32(v_0.AuxInt)
 			x := v_1
-			if !(isUint32PowerOfTwo(c) && uint64(c) >= 128) {
+			if !(isUint32PowerOfTwo(int64(c)) && uint64(c) >= 128) {
 				continue
 			}
 			v.reset(OpAMD64BTCLconst)
-			v.AuxInt = log2uint32(c)
+			v.AuxInt = int8ToAuxInt(int8(log32(c)))
 			v.AddArg(x)
 			return true
 		}
@@ -27632,9 +27593,9 @@
 			if v_1.Op != OpAMD64MOVLconst {
 				continue
 			}
-			c := v_1.AuxInt
+			c := auxIntToInt32(v_1.AuxInt)
 			v.reset(OpAMD64XORLconst)
-			v.AuxInt = c
+			v.AuxInt = int32ToAuxInt(c)
 			v.AddArg(x)
 			return true
 		}
@@ -27648,17 +27609,17 @@
 			if v_0.Op != OpAMD64SHLLconst {
 				continue
 			}
-			c := v_0.AuxInt
+			c := auxIntToInt8(v_0.AuxInt)
 			x := v_0.Args[0]
 			if v_1.Op != OpAMD64SHRLconst {
 				continue
 			}
-			d := v_1.AuxInt
+			d := auxIntToInt8(v_1.AuxInt)
 			if x != v_1.Args[0] || !(d == 32-c) {
 				continue
 			}
 			v.reset(OpAMD64ROLLconst)
-			v.AuxInt = c
+			v.AuxInt = int8ToAuxInt(c)
 			v.AddArg(x)
 			return true
 		}
@@ -27673,17 +27634,17 @@
 			if v_0.Op != OpAMD64SHLLconst {
 				continue
 			}
-			c := v_0.AuxInt
+			c := auxIntToInt8(v_0.AuxInt)
 			x := v_0.Args[0]
 			if v_1.Op != OpAMD64SHRWconst {
 				continue
 			}
-			d := v_1.AuxInt
+			d := auxIntToInt8(v_1.AuxInt)
 			if x != v_1.Args[0] || !(d == 16-c && c < 16 && t.Size() == 2) {
 				continue
 			}
 			v.reset(OpAMD64ROLWconst)
-			v.AuxInt = c
+			v.AuxInt = int8ToAuxInt(c)
 			v.AddArg(x)
 			return true
 		}
@@ -27698,17 +27659,17 @@
 			if v_0.Op != OpAMD64SHLLconst {
 				continue
 			}
-			c := v_0.AuxInt
+			c := auxIntToInt8(v_0.AuxInt)
 			x := v_0.Args[0]
 			if v_1.Op != OpAMD64SHRBconst {
 				continue
 			}
-			d := v_1.AuxInt
+			d := auxIntToInt8(v_1.AuxInt)
 			if x != v_1.Args[0] || !(d == 8-c && c < 8 && t.Size() == 1) {
 				continue
 			}
 			v.reset(OpAMD64ROLBconst)
-			v.AuxInt = c
+			v.AuxInt = int8ToAuxInt(c)
 			v.AddArg(x)
 			return true
 		}
@@ -27755,23 +27716,23 @@
 func rewriteValueAMD64_OpAMD64XORLconst(v *Value) bool {
 	v_0 := v.Args[0]
 	// match: (XORLconst [c] x)
-	// cond: isUint32PowerOfTwo(c) && uint64(c) >= 128
-	// result: (BTCLconst [log2uint32(c)] x)
+	// cond: isUint32PowerOfTwo(int64(c)) && uint64(c) >= 128
+	// result: (BTCLconst [int8(log32(c))] x)
 	for {
-		c := v.AuxInt
+		c := auxIntToInt32(v.AuxInt)
 		x := v_0
-		if !(isUint32PowerOfTwo(c) && uint64(c) >= 128) {
+		if !(isUint32PowerOfTwo(int64(c)) && uint64(c) >= 128) {
 			break
 		}
 		v.reset(OpAMD64BTCLconst)
-		v.AuxInt = log2uint32(c)
+		v.AuxInt = int8ToAuxInt(int8(log32(c)))
 		v.AddArg(x)
 		return true
 	}
 	// match: (XORLconst [1] (SETNE x))
 	// result: (SETEQ x)
 	for {
-		if v.AuxInt != 1 || v_0.Op != OpAMD64SETNE {
+		if auxIntToInt32(v.AuxInt) != 1 || v_0.Op != OpAMD64SETNE {
 			break
 		}
 		x := v_0.Args[0]
@@ -27782,7 +27743,7 @@
 	// match: (XORLconst [1] (SETEQ x))
 	// result: (SETNE x)
 	for {
-		if v.AuxInt != 1 || v_0.Op != OpAMD64SETEQ {
+		if auxIntToInt32(v.AuxInt) != 1 || v_0.Op != OpAMD64SETEQ {
 			break
 		}
 		x := v_0.Args[0]
@@ -27793,7 +27754,7 @@
 	// match: (XORLconst [1] (SETL x))
 	// result: (SETGE x)
 	for {
-		if v.AuxInt != 1 || v_0.Op != OpAMD64SETL {
+		if auxIntToInt32(v.AuxInt) != 1 || v_0.Op != OpAMD64SETL {
 			break
 		}
 		x := v_0.Args[0]
@@ -27804,7 +27765,7 @@
 	// match: (XORLconst [1] (SETGE x))
 	// result: (SETL x)
 	for {
-		if v.AuxInt != 1 || v_0.Op != OpAMD64SETGE {
+		if auxIntToInt32(v.AuxInt) != 1 || v_0.Op != OpAMD64SETGE {
 			break
 		}
 		x := v_0.Args[0]
@@ -27815,7 +27776,7 @@
 	// match: (XORLconst [1] (SETLE x))
 	// result: (SETG x)
 	for {
-		if v.AuxInt != 1 || v_0.Op != OpAMD64SETLE {
+		if auxIntToInt32(v.AuxInt) != 1 || v_0.Op != OpAMD64SETLE {
 			break
 		}
 		x := v_0.Args[0]
@@ -27826,7 +27787,7 @@
 	// match: (XORLconst [1] (SETG x))
 	// result: (SETLE x)
 	for {
-		if v.AuxInt != 1 || v_0.Op != OpAMD64SETG {
+		if auxIntToInt32(v.AuxInt) != 1 || v_0.Op != OpAMD64SETG {
 			break
 		}
 		x := v_0.Args[0]
@@ -27837,7 +27798,7 @@
 	// match: (XORLconst [1] (SETB x))
 	// result: (SETAE x)
 	for {
-		if v.AuxInt != 1 || v_0.Op != OpAMD64SETB {
+		if auxIntToInt32(v.AuxInt) != 1 || v_0.Op != OpAMD64SETB {
 			break
 		}
 		x := v_0.Args[0]
@@ -27848,7 +27809,7 @@
 	// match: (XORLconst [1] (SETAE x))
 	// result: (SETB x)
 	for {
-		if v.AuxInt != 1 || v_0.Op != OpAMD64SETAE {
+		if auxIntToInt32(v.AuxInt) != 1 || v_0.Op != OpAMD64SETAE {
 			break
 		}
 		x := v_0.Args[0]
@@ -27859,7 +27820,7 @@
 	// match: (XORLconst [1] (SETBE x))
 	// result: (SETA x)
 	for {
-		if v.AuxInt != 1 || v_0.Op != OpAMD64SETBE {
+		if auxIntToInt32(v.AuxInt) != 1 || v_0.Op != OpAMD64SETBE {
 			break
 		}
 		x := v_0.Args[0]
@@ -27870,7 +27831,7 @@
 	// match: (XORLconst [1] (SETA x))
 	// result: (SETBE x)
 	for {
-		if v.AuxInt != 1 || v_0.Op != OpAMD64SETA {
+		if auxIntToInt32(v.AuxInt) != 1 || v_0.Op != OpAMD64SETA {
 			break
 		}
 		x := v_0.Args[0]
@@ -27936,23 +27897,23 @@
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
 	// match: (XORLconstmodify [valoff1] {sym} (ADDQconst [off2] base) mem)
-	// cond: ValAndOff(valoff1).canAdd(off2)
-	// result: (XORLconstmodify [ValAndOff(valoff1).add(off2)] {sym} base mem)
+	// cond: ValAndOff(valoff1).canAdd32(off2)
+	// result: (XORLconstmodify [ValAndOff(valoff1).addOffset32(off2)] {sym} base mem)
 	for {
-		valoff1 := v.AuxInt
-		sym := v.Aux
+		valoff1 := auxIntToValAndOff(v.AuxInt)
+		sym := auxToSym(v.Aux)
 		if v_0.Op != OpAMD64ADDQconst {
 			break
 		}
-		off2 := v_0.AuxInt
+		off2 := auxIntToInt32(v_0.AuxInt)
 		base := v_0.Args[0]
 		mem := v_1
-		if !(ValAndOff(valoff1).canAdd(off2)) {
+		if !(ValAndOff(valoff1).canAdd32(off2)) {
 			break
 		}
 		v.reset(OpAMD64XORLconstmodify)
-		v.AuxInt = ValAndOff(valoff1).add(off2)
-		v.Aux = sym
+		v.AuxInt = valAndOffToAuxInt(ValAndOff(valoff1).addOffset32(off2))
+		v.Aux = symToAux(sym)
 		v.AddArg2(base, mem)
 		return true
 	}
@@ -27987,24 +27948,24 @@
 	b := v.Block
 	typ := &b.Func.Config.Types
 	// match: (XORLload [off1] {sym} val (ADDQconst [off2] base) mem)
-	// cond: is32Bit(off1+off2)
+	// cond: is32Bit(int64(off1)+int64(off2))
 	// result: (XORLload [off1+off2] {sym} val base mem)
 	for {
-		off1 := v.AuxInt
-		sym := v.Aux
+		off1 := auxIntToInt32(v.AuxInt)
+		sym := auxToSym(v.Aux)
 		val := v_0
 		if v_1.Op != OpAMD64ADDQconst {
 			break
 		}
-		off2 := v_1.AuxInt
+		off2 := auxIntToInt32(v_1.AuxInt)
 		base := v_1.Args[0]
 		mem := v_2
-		if !(is32Bit(off1 + off2)) {
+		if !(is32Bit(int64(off1) + int64(off2))) {
 			break
 		}
 		v.reset(OpAMD64XORLload)
-		v.AuxInt = off1 + off2
-		v.Aux = sym
+		v.AuxInt = int32ToAuxInt(off1 + off2)
+		v.Aux = symToAux(sym)
 		v.AddArg3(val, base, mem)
 		return true
 	}
@@ -28058,24 +28019,24 @@
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
 	// match: (XORLmodify [off1] {sym} (ADDQconst [off2] base) val mem)
-	// cond: is32Bit(off1+off2)
+	// cond: is32Bit(int64(off1)+int64(off2))
 	// result: (XORLmodify [off1+off2] {sym} base val mem)
 	for {
-		off1 := v.AuxInt
-		sym := v.Aux
+		off1 := auxIntToInt32(v.AuxInt)
+		sym := auxToSym(v.Aux)
 		if v_0.Op != OpAMD64ADDQconst {
 			break
 		}
-		off2 := v_0.AuxInt
+		off2 := auxIntToInt32(v_0.AuxInt)
 		base := v_0.Args[0]
 		val := v_1
 		mem := v_2
-		if !(is32Bit(off1 + off2)) {
+		if !(is32Bit(int64(off1) + int64(off2))) {
 			break
 		}
 		v.reset(OpAMD64XORLmodify)
-		v.AuxInt = off1 + off2
-		v.Aux = sym
+		v.AuxInt = int32ToAuxInt(off1 + off2)
+		v.Aux = symToAux(sym)
 		v.AddArg3(base, val, mem)
 		return true
 	}
@@ -28116,7 +28077,7 @@
 			}
 			y := v_0.Args[1]
 			v_0_0 := v_0.Args[0]
-			if v_0_0.Op != OpAMD64MOVQconst || v_0_0.AuxInt != 1 {
+			if v_0_0.Op != OpAMD64MOVQconst || auxIntToInt64(v_0_0.AuxInt) != 1 {
 				continue
 			}
 			x := v_1
@@ -28128,19 +28089,19 @@
 	}
 	// match: (XORQ (MOVQconst [c]) x)
 	// cond: isUint64PowerOfTwo(c) && uint64(c) >= 128
-	// result: (BTCQconst [log2(c)] x)
+	// result: (BTCQconst [int8(log2(c))] x)
 	for {
 		for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
 			if v_0.Op != OpAMD64MOVQconst {
 				continue
 			}
-			c := v_0.AuxInt
+			c := auxIntToInt64(v_0.AuxInt)
 			x := v_1
 			if !(isUint64PowerOfTwo(c) && uint64(c) >= 128) {
 				continue
 			}
 			v.reset(OpAMD64BTCQconst)
-			v.AuxInt = log2(c)