arm64/instgen: support memory with extensions in SVE

This CL generates CL 764780.

Supported addressing patterns:
(Z7.D.SXTW<<2)(Z6.D), where Z6.D is the base, Z7.D is the indices.
SXTW/UXTW represents signed/unsigned extension, << represents LSL.

Change-Id: Id79c8534d140a1d36ee12f80e8294486cac0c236
Reviewed-on: https://go-review.googlesource.com/c/arch/+/764800
Commit-Queue: Junyang Shao <shaojunyang@google.com>
Reviewed-by: David Chase <drchase@google.com>
LUCI-TryBot-Result: golang-scoped@luci-project-accounts.iam.gserviceaccount.com <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
diff --git a/arm64/instgen/encodings.go b/arm64/instgen/encodings.go
index c6750ae..b11f2e4 100644
--- a/arm64/instgen/encodings.go
+++ b/arm64/instgen/encodings.go
@@ -158,7 +158,10 @@
 	`Is the name of the destination scalable vector register, encoded in the "Zd" field.
 bit range mappings:
 Zd: [0:5)
-`: {"encodeZd", `return v, true`, "enc_Zd"},
+`: {"encodeZd", `if !stripRawZ(&v) {
+			return 0, false
+		}
+		return v, true`, "enc_Zd"},
 	`Is the name of the first source and destination scalable predicate register, encoded in the "Pdn" field.
 bit range mappings:
 Pdn: [0:4)
@@ -166,7 +169,10 @@
 	`Is the name of the first source and destination scalable vector register, encoded in the "Zdn" field.
 bit range mappings:
 Zdn: [0:5)
-`: {"encodeZdnDest", `return v, true`, "enc_Zdn"},
+`: {"encodeZdnDest", `if !stripRawZ(&v) {
+			return 0, false
+		}
+		return v, true`, "enc_Zdn"},
 	`Is the name of the first source scalable predicate register, encoded in the "Pn" field.
 bit range mappings:
 Pn: [5:9)
@@ -174,7 +180,10 @@
 	`Is the name of the first source scalable vector register, encoded in the "Zn" field.
 bit range mappings:
 Zn: [5:10)
-`: {"encodeZn510", `return v << 5, true`, "enc_Zn"},
+`: {"encodeZn510V1", `if !stripRawZ(&v) {
+			return 0, false
+		}
+		return v << 5, true`, "enc_Zn"},
 	`Is the name of the governing scalable predicate register P0-P7, encoded in the "Pg" field.
 bit range mappings:
 Pg: [10:13)
@@ -197,7 +206,10 @@
 	`Is the name of the second source and destination scalable vector register, encoded in the "Zda" field.
 bit range mappings:
 Zda: [0:5)
-`: {"encodeZdaDest", `return v, true`, "enc_Zda"},
+`: {"encodeZdaDest", `if !stripRawZ(&v) {
+			return 0, false
+		}
+		return v, true`, "enc_Zda"},
 	`Is the name of the second source scalable predicate register, encoded in the "Pm" field.
 bit range mappings:
 Pm: [16:20)
@@ -205,11 +217,17 @@
 	`Is the name of the second source scalable vector register, encoded in the "Zm" field.
 bit range mappings:
 Zm: [16:21)
-`: {"encodeZm1621V2", `return v << 16, true`, "enc_Zm"},
+`: {"encodeZm1621V2", `if !stripRawZ(&v) {
+			return 0, false
+		}
+		return v << 16, true`, "enc_Zm"},
 	`Is the name of the second source scalable vector register, encoded in the "Zm" field.
 bit range mappings:
 Zm: [5:10)
-`: {"encodeZm510V1", `return (v & 31) << 5, true`, "enc_Zm"},
+`: {"encodeZm510V1", `if !stripRawZ(&v) {
+			return 0, false
+		}
+		return (v & 31) << 5, true`, "enc_Zm"},
 	`Is the name of the source and destination scalable predicate register, encoded in the "Pdn" field.
 bit range mappings:
 Pdn: [0:4)
@@ -217,7 +235,10 @@
 	`Is the name of the source and destination scalable vector register, encoded in the "Zdn" field.
 bit range mappings:
 Zdn: [0:5)
-`: {"encodeZdnSrcDst", `return v, true`, "enc_Zdn"},
+`: {"encodeZdnSrcDst", `if !stripRawZ(&v) {
+			return 0, false
+		}
+		return v, true`, "enc_Zdn"},
 	`Is the name of the source scalable predicate register, encoded in the "Pm" field.
 bit range mappings:
 Pm: [5:9)
@@ -229,23 +250,38 @@
 	`Is the name of the source scalable vector register, encoded in the "Zn" field.
 bit range mappings:
 Zn: [5:10)
-`: {"encodeZn510Src", `return (v & 31) << 5, true`, "enc_Zn"},
+`: {"encodeZn510Src", `if !stripRawZ(&v) {
+			return 0, false
+		}
+		return (v & 31) << 5, true`, "enc_Zn"},
 	`Is the name of the third source and destination scalable vector register, encoded in the "Zda" field.
 bit range mappings:
 Zda: [0:5)
-`: {"encodeZda3RdSrcDst", `return v, true`, "enc_Zda"},
+`: {"encodeZda3RdSrcDst", `if !stripRawZ(&v) {
+			return 0, false
+		}
+		return v, true`, "enc_Zda"},
 	`Is the name of the third source scalable vector register, encoded in the "Za" field.
 bit range mappings:
 Za: [16:21)
-`: {"encodeZa16213Rd", `return v << 16, true`, "enc_Za"},
+`: {"encodeZa16213Rd", `if !stripRawZ(&v) {
+			return 0, false
+		}
+		return v << 16, true`, "enc_Za"},
 	`Is the name of the third source scalable vector register, encoded in the "Za" field.
 bit range mappings:
 Za: [5:10)
-`: {"encodeZa5103Rd", `return v << 5, true`, "enc_Za"},
+`: {"encodeZa5103Rd", `if !stripRawZ(&v) {
+			return 0, false
+		}
+		return v << 5, true`, "enc_Za"},
 	`Is the name of the third source scalable vector register, encoded in the "Zk" field.
 bit range mappings:
 Zk: [5:10)
-`: {"encodeZk5103Rd", `return v << 5, true`, "enc_Zk"},
+`: {"encodeZk5103Rd", `if !stripRawZ(&v) {
+			return 0, false
+		}
+		return v << 5, true`, "enc_Zk"},
 	`Is the name of the vector select predicate register P0-P7, encoded in the "Pv" field.
 bit range mappings:
 Pv: [10:13)
@@ -606,7 +642,10 @@
 	`No-op check, returns true`: {"encodeNoop", `return 0, true`, "enc_NIL"},
 	`Is the 32-bit name of the source and destination general-purpose register, encoded in the "Rdn" field.
 bit range mappings:
-Rdn: [0:5)`: {"encodeWdn05", `if v == REG_RSP {
+Rdn: [0:5)`: {"encodeWdn05", `if !checkIsR(v) {
+		return 0, false
+	}
+	if v == REG_RSP {
 		return 0, false
 	}
 	return v & 31, true`, "enc_Rdn"},
@@ -615,41 +654,62 @@
 Vd: [0:5)`: {"encodeVd0564", `return v & 31, true`, "enc_Vd"},
 	`Is the 64-bit name of the destination general-purpose register, encoded in the "Rd" field.
 bit range mappings:
-Rd: [0:5)`: {"encodeRd05", `if v == REG_RSP {
+Rd: [0:5)`: {"encodeRd05", `if !checkIsR(v) {
+		return 0, false
+	}
+	if v == REG_RSP {
 		return 0, false
 	}
 	return v & 31, true`, "enc_Rd"},
 	`Is the 64-bit name of the first source general-purpose register, encoded in the "Rn" field.
 bit range mappings:
-Rn: [5:10)`: {"encodeRn510", `if v == REG_RSP {
+Rn: [5:10)`: {"encodeRn510", `if !checkIsR(v) {
+		return 0, false
+	}
+	if v == REG_RSP {
 		return 0, false
 	}
 	return (v & 31) << 5, true`, "enc_Rn"},
 	`Is the 64-bit name of the second source general-purpose register, encoded in the "Rm" field.
 bit range mappings:
-Rm: [16:21)`: {"encodeRm1621", `if v == REG_RSP {
+Rm: [16:21)`: {"encodeRm1621V1", `if !checkIsR(v) {
+		return 0, false
+	}
+	if v == REG_RSP {
 		return 0, false
 	}
 	return (v & 31) << 16, true`, "enc_Rm"},
 	`Is the 64-bit name of the source and destination general-purpose register, encoded in the "Rdn" field.
 bit range mappings:
-Rdn: [0:5)`: {"encodeXdn05", `if v == REG_RSP {
+Rdn: [0:5)`: {"encodeXdn05", `if !checkIsR(v) {
+		return 0, false
+	}
+	if v == REG_RSP {
 		return 0, false
 	}
 	return v & 31, true`, "enc_Rdn"},
 	`Is the name of the source scalable vector register, encoded in the "Zm" field.
 bit range mappings:
-Zm: [5:10)`: {"encodeZm510V2", `return (v & 31) << 5, true`, "enc_Zm"},
+Zm: [5:10)`: {"encodeZm510V2", `if !stripRawZ(&v) {
+			return 0, false
+		}
+		return (v & 31) << 5, true`, "enc_Zm"},
 	`Is the number [0-30] of the destination general-purpose register or the name ZR (31), encoded in the "Rd" field.
 bit range mappings:
-Rd: [0:5)`: {"encodeRd05ZR", `if v == REG_RSP {
+Rd: [0:5)`: {"encodeRd05ZR", `if !checkIsR(v) {
+		return 0, false
+	}
+	if v == REG_RSP {
 		return 0, false
 	}
 	// ZR is just R31
 	return v & 31, true`, "enc_Rd"},
 	`Is the number [0-30] of the general-purpose source register or the name SP (31), encoded in the "Rn" field.
 bit range mappings:
-Rn: [5:10)`: {"encodeRn510SP", `if v == REG_R31 {
+Rn: [5:10)`: {"encodeRn510SPV1", `if !checkIsR(v) {
+		return 0, false
+	}
+	if v == REG_R31 {
 		return 0, false
 	}
 	if v == REG_RSP {
@@ -658,25 +718,37 @@
 	return (v & 31) << 5, true`, "enc_Rn"},
 	`Is the number [0-30] of the source and destination general-purpose register or the name ZR (31), encoded in the "Rdn" field.
 bit range mappings:
-Rdn: [0:5)`: {"encodeRdn05ZR", `if v == REG_RSP {
+Rdn: [0:5)`: {"encodeRdn05ZR", `if !checkIsR(v) {
+		return 0, false
+	}
+	if v == REG_RSP {
 		return 0, false
 	}
 	return v & 31, true`, "enc_Rdn"},
 	`Is the number [0-30] of the source general-purpose register or the name ZR (31), encoded in the "Rm" field.
 bit range mappings:
-Rm: [16:21)`: {"encodeRm1621ZR", `if v == REG_RSP {
+Rm: [16:21)`: {"encodeRm1621ZR", `if !checkIsR(v) {
+		return 0, false
+	}
+	if v == REG_RSP {
 		return 0, false
 	}
 	return (v & 31) << 16, true`, "enc_Rm"},
 	`Is the number [0-30] of the source general-purpose register or the name ZR (31), encoded in the "Rm" field.
 bit range mappings:
-Rm: [5:10)`: {"encodeRm510ZR", `if v == REG_RSP {
+Rm: [5:10)`: {"encodeRm510ZR", `if !checkIsR(v) {
+		return 0, false
+	}
+	if v == REG_RSP {
 		return 0, false
 	}
 	return (v & 31) << 5, true`, "enc_Rm"},
 	`Is the number [0-30] of the source general-purpose register or the name ZR (31), encoded in the "Rn" field.
 bit range mappings:
-Rn: [5:10)`: {"encodeRn510ZR", `if v == REG_RSP {
+Rn: [5:10)`: {"encodeRn510ZR", `if !checkIsR(v) {
+		return 0, false
+	}
+	if v == REG_RSP {
 		return 0, false
 	}
 	return (v & 31) << 5, true`, "enc_Rn"},
@@ -709,17 +781,23 @@
 	`For the "16-bit to 64-bit" variant: is the name of the second source scalable vector register Z0-Z15, encoded in the "Zm" field.
 bit range mappings:
 Zm: [16:20)
-`: {"encodeZm1620_16To64Bit", `if v > 15 {
-		return 0, false
-	}
-	return v << 16, true`, "enc_Zm"},
+`: {"encodeZm1620_16To64Bit", `if !stripRawZ(&v) {
+			return 0, false
+		}
+		if v > 15 {
+			return 0, false
+		}
+		return v << 16, true`, "enc_Zm"},
 	`For the "16-bit" and "32-bit" variants: is the name of the second source scalable vector register Z0-Z7, encoded in the "Zm" field.
 bit range mappings:
 Zm: [16:19)
-`: {"encodeZm1619_16Bit32Bit", `if v > 7 {
-		return 0, false
-	}
-	return v << 16, true`, "enc_Zm"},
+`: {"encodeZm1619_16Bit32Bit", `if !stripRawZ(&v) {
+			return 0, false
+		}
+		if v > 7 {
+			return 0, false
+		}
+		return v << 16, true`, "enc_Zm"},
 	`For the "16-bit" variant: is the element index, in the range 0 to 7, encoded in the "i3h:i3l" fields.
 bit range mappings:
 i3h: [22:23)
@@ -746,10 +824,13 @@
 	`For the "32-bit" variant: is the name of the second source scalable vector register Z0-Z7, encoded in the "Zm" field.
 bit range mappings:
 Zm: [16:19)
-`: {"encodeZm1619_32Bit", `if v > 7 {
-		return 0, false
-	}
-	return v << 16, true`, "enc_Zm"},
+`: {"encodeZm1619_32Bit", `if !stripRawZ(&v) {
+			return 0, false
+		}
+		if v > 7 {
+			return 0, false
+		}
+		return v << 16, true`, "enc_Zm"},
 	`For the "64-bit" variant: is the element index, in the range 0 to 1, encoded in the "i1" field.
 bit range mappings:
 i1: [20:21)
@@ -768,10 +849,13 @@
 	`For the "64-bit" variant: is the name of the second source scalable vector register Z0-Z15, encoded in the "Zm" field.
 bit range mappings:
 Zm: [16:20)
-`: {"encodeZm1620_64Bit", `if v > 15 {
-		return 0, false
-	}
-	return v << 16, true`, "enc_Zm"},
+`: {"encodeZm1620_64Bit", `if !stripRawZ(&v) {
+			return 0, false
+		}
+		if v > 15 {
+			return 0, false
+		}
+		return v << 16, true`, "enc_Zm"},
 	`For the "8-bit to 16-bit" variant: is the immediate index of a pair of 8-bit elements within each 128-bit vector segment, in the range 0 to 7, encoded in the "i3h:i3l" fields.
 bit range mappings:
 i3h: [22:23)
@@ -790,10 +874,13 @@
 	`For the "8-bit to 32-bit" variant: is the name of the second source scalable vector register Z0-Z7, encoded in the "Zm" field.
 bit range mappings:
 Zm: [16:19)
-`: {"encodeZm1619_8To32Bit", `if v > 7 {
-		return 0, false
-	}
-	return v << 16, true`, "enc_Zm"},
+`: {"encodeZm1619_8To32Bit", `if !stripRawZ(&v) {
+			return 0, false
+		}
+		if v > 7 {
+			return 0, false
+		}
+		return v << 16, true`, "enc_Zm"},
 	`For the "Double-precision" variant: is the immediate index, in the range 0 to 1, encoded in the "i1" field.
 bit range mappings:
 i1: [20:21)
@@ -804,10 +891,13 @@
 	`For the "Double-precision" variant: is the name of the second source scalable vector register Z0-Z15, encoded in the "Zm" field.
 bit range mappings:
 Zm: [16:20)
-`: {"encodeZm1620_DoublePrecision", `if v > 15 {
-		return 0, false
-	}
-	return v << 16, true`, "enc_Zm"},
+`: {"encodeZm1620_DoublePrecision", `if !stripRawZ(&v) {
+			return 0, false
+		}
+		if v > 15 {
+			return 0, false
+		}
+		return v << 16, true`, "enc_Zm"},
 	`For the "Doubleword" variant: is the optional portion index, in the range 0 to 7, defaulting to 0, encoded in the "i3h:i3l" fields.
 bit range mappings:
 i3h: [22:23)
@@ -819,10 +909,13 @@
 	`For the "Half-precision" and "Single-precision" variants: is the name of the second source scalable vector register Z0-Z7, encoded in the "Zm" field.
 bit range mappings:
 Zm: [16:19)
-`: {"encodeZm1619_HalfSinglePrecision", `if v > 7 {
-		return 0, false
-	}
-	return v << 16, true`, "enc_Zm"},
+`: {"encodeZm1619_HalfSinglePrecision", `if !stripRawZ(&v) {
+			return 0, false
+		}
+		if v > 7 {
+			return 0, false
+		}
+		return v << 16, true`, "enc_Zm"},
 	`For the "Half-precision" variant: is the immediate index, in the range 0 to 7, encoded in the "i3h:i3l" fields.
 bit range mappings:
 i3h: [22:23)
@@ -993,10 +1086,13 @@
 	`For the "16-bit" variant: is the name of the second source scalable vector register Z0-Z7, encoded in the "Zm" field.
 bit range mappings:
 Zm: [16:19)
-`: {"encodeZm_1619_Range0_7V1", `if v <= 7 {
-		return v << 16, true
-	}
-	return 0, false`, "enc_Zm"},
+`: {"encodeZm_1619_Range0_7V1", `if !stripRawZ(&v) {
+			return 0, false
+		}
+		if v <= 7 {
+			return v << 16, true
+		}
+		return 0, false`, "enc_Zm"},
 	`For the "32-bit" variant: is the element index, in the range 0 to 1, encoded in the "i1" field.
 bit range mappings:
 i1: [20:21)
@@ -1007,10 +1103,13 @@
 	`For the "32-bit" variant: is the name of the second source scalable vector register Z0-Z15, encoded in the "Zm" field.
 bit range mappings:
 Zm: [16:20)
-`: {"encodeZm_1620_Range0_15", `if v <= 15 {
-		return v << 16, true
-	}
-	return 0, false`, "enc_Zm"},
+`: {"encodeZm_1620_Range0_15", `if !stripRawZ(&v) {
+			return 0, false
+		}
+		if v <= 15 {
+			return v << 16, true
+		}
+		return 0, false`, "enc_Zm"},
 	`For the "Equal", "Greater than or equal", "Greater than", "Less than or equal", "Less than", and "Not equal" variants: is the signed immediate operand, in the range -16 to 15, encoded in the "imm5" field.
 bit range mappings:
 imm5: [16:21)
@@ -1028,10 +1127,13 @@
 	`For the "Half-precision" variant: is the name of the second source scalable vector register Z0-Z7, encoded in the "Zm" field.
 bit range mappings:
 Zm: [16:19)
-`: {"encodeZm_1619_Half", `if v <= 7 {
-		return v << 16, true
-	}
-	return 0, false`, "enc_Zm"},
+`: {"encodeZm_1619_Half", `if !stripRawZ(&v) {
+			return 0, false
+		}
+		if v <= 7 {
+			return v << 16, true
+		}
+		return 0, false`, "enc_Zm"},
 	`For the "Higher or same", "Higher", "Lower or same", and "Lower" variants: is the unsigned immediate operand, in the range 0 to 127, encoded in the "imm7" field.
 bit range mappings:
 imm7: [14:21)
@@ -1049,10 +1151,13 @@
 	`For the "Single-precision" variant: is the name of the second source scalable vector register Z0-Z15, encoded in the "Zm" field.
 bit range mappings:
 Zm: [16:20)
-`: {"encodeZm_1620_Single", `if v <= 15 {
-		return v << 16, true
-	}
-	return 0, false`, "enc_Zm"},
+`: {"encodeZm_1620_Single", `if !stripRawZ(&v) {
+			return 0, false
+		}
+		if v <= 15 {
+			return v << 16, true
+		}
+		return 0, false`, "enc_Zm"},
 	`Is a 64, 32, 16 or 8-bit bitmask consisting of replicated 2, 4, 8, 16, 32 or 64 bit fields, each field containing a rotated run of non-zero bits, encoded in the "imm13" field.
 bit range mappings:
 imm13: [5:18)
@@ -1110,7 +1215,10 @@
 	`Is the 64-bit name of the destination general-purpose register or stack pointer, encoded in the "Rd" field.
 bit range mappings:
 Rd: [0:5)
-`: {"encodeRd05_SPAllowed", `if v == REG_R31 {
+`: {"encodeRd05_SPAllowed", `if !checkIsR(v) {
+		return 0, false
+	}
+	if v == REG_R31 {
 		return 0, false
 	}
 	if v == REG_RSP {
@@ -1120,7 +1228,10 @@
 	`Is the 64-bit name of the source general-purpose register or stack pointer, encoded in the "Rn" field.
 bit range mappings:
 Rn: [16:21)
-`: {"encodeRn1621_SPAllowed", `if v == REG_R31 {
+`: {"encodeRn1621_SPAllowed", `if !checkIsR(v) {
+		return 0, false
+	}
+	if v == REG_R31 {
 		return 0, false
 	}
 	if v == REG_RSP {
@@ -1536,7 +1647,10 @@
 	`Is the name of the first scalable vector register of the source multi-vector group, encoded in the "Zn" field.
 bit range mappings:
 Zn: [5:10)
-`: {"encodeZn510MultiSrc1", `return v << 5, true`, "enc_Zn"},
+`: {"encodeZn510MultiSrc1", `if !stripRawZ(&v) {
+			return 0, false
+		}
+		return v << 5, true`, "enc_Zn"},
 	`Is the name of the first table vector register, encoded as "Zn".
 bit range mappings:
 Zn: [5:10)
@@ -1556,7 +1670,10 @@
 	`Is the name of the second scalable vector register of the source multi-vector group, encoded in the "Zn" field.
 bit range mappings:
 Zn: [5:10)
-`: {"encodeZn510MultiSrc2", `return (v - 1) << 5, true`, "enc_Zn"},
+`: {"encodeZn510MultiSrc2", `if !stripRawZ(&v) {
+			return 0, false
+		}
+		return (v - 1) << 5, true`, "enc_Zn"},
 	`Is the name of the second table vector register, encoded as "Zn" plus 1 modulo 32.
 bit range mappings:
 Zn: [5:10)
@@ -1564,7 +1681,10 @@
 	`Is the name of the source scalable vector register, encoded in the "Zm" field.
 bit range mappings:
 Zm: [16:21)
-`: {"encodeZm1621V1", `return v << 16, true`, "enc_Zm"},
+`: {"encodeZm1621V1", `if !stripRawZ(&v) {
+			return 0, false
+		}
+		return v << 16, true`, "enc_Zm"},
 	`Is the name of the table vector register, encoded in the "Zn" field.
 bit range mappings:
 Zn: [5:10)
@@ -1583,4 +1703,211 @@
 		return 0, false
 	}
 	return v << 23, true`, "enc_i1"},
+	`Check this is mod amount and is 1
+`: {"encodeModAmt1Check", `if v == 1 {
+		return 0, true
+	}
+	return 0, false`, "enc_NIL"},
+	`Check this is mod amount and is 2
+`: {"encodeModAmt2Check", `if v == 2 {
+		return 0, true
+	}
+	return 0, false`, "enc_NIL"},
+	`Check this is mod amount and is 3
+`: {"encodeModAmt3Check", `if v == 3 {
+		return 0, true
+	}
+	return 0, false`, "enc_NIL"},
+	`Check this is mod amount and is 4
+`: {"encodeModAmt4Check", `if v == 4 {
+		return 0, true
+	}
+	return 0, false`, "enc_NIL"},
+	`Check this is mod and is LSL
+`: {"encodeModLSLCheck", `if v&0b100 != 0 {
+		return 0, true
+	}
+	return 0, false`, "enc_NIL"},
+	`Check this is mod and is SXTW
+`: {"encodeModSXTWCheck", `if v&0b10 != 0 {
+		return 0, true
+	}
+	return 0, false`, "enc_NIL"},
+	`Check this is mod and is UXTW
+`: {"encodeModUXTWCheck", `if v&0b1 != 0 {
+		return 0, true
+	}
+	return 0, false`, "enc_NIL"},
+	`Is the 64-bit name of the general-purpose base register or stack pointer, encoded in the "Rn" field.
+bit range mappings:
+Rn: [5:10)
+`: {"encodeRn510SPV2", `if !checkIsR(v) {
+		return 0, false
+	}
+	if v == REG_R31 {
+		return 0, false
+	}
+	if v == REG_RSP {
+		return 31 << 5, true
+	}
+	return (v & 31) << 5, true`, "enc_Rn"},
+	`Is the 64-bit name of the general-purpose offset register, encoded in the "Rm" field.
+bit range mappings:
+Rm: [16:21)
+`: {"encodeRm1621V2", `if !checkIsR(v) {
+		return 0, false
+	}
+	if v == REG_RSP {
+		return 0, false
+	}
+	return (v & 31) << 16, true`, "enc_Rm"},
+	`Is the index extend and shift specifier,
+msz	<mod>
+00	[absent]
+x1	LSL
+10	LSL
+bit range mappings:
+msz: [10:12)
+`: {"encodeMsz1012", `// This does not accept UXTW and SXTW, check that
+	if v&0b11 != 0 {
+		return 0, false
+	}
+	// Note: this encoding function's semantic is entailed by its peer that
+	// encode <amount>, so just do nothing.
+	return codeNoOp, false`, "enc_msz"},
+	`Is the index extend and shift specifier,
+xs	<mod>
+0	UXTW
+1	SXTW
+bit range mappings:
+xs: [14:15)
+`: {"encodeXs1415", `if v&0b1 != 0 {
+		return 0, true
+	} else if v&0b10 != 0 {
+		return 1 << 14, true
+	}
+	return 0, false`, "enc_xs"},
+	`Is the index extend and shift specifier,
+xs	<mod>
+0	UXTW
+1	SXTW
+bit range mappings:
+xs: [22:23)
+`: {"encodeXs2223", `if v&0b1 != 0 {
+		return 0, true
+	} else if v&0b10 != 0 {
+		return 1 << 22, true
+	}
+	return 0, false`, "enc_xs"},
+	`Is the index shift amount,
+msz	<amount>
+00	[absent]
+01	#1
+10	#2
+11	#3
+bit range mappings:
+msz: [10:12)
+`: {"encodeMsz1012Amount", `if v <= 3 {
+		return v << 10, true
+	}
+	return 0, false`, "enc_msz"},
+	`Is the name of the base scalable vector register, encoded in the "Zn" field.
+bit range mappings:
+Zn: [5:10)
+`: {"encodeZn510V2", `if !stripRawZ(&v) {
+			return 0, false
+		}
+		return v << 5, true`, "enc_Zn"},
+	`Is the name of the first scalable vector register to be transferred, encoded in the "Zt" field.
+bit range mappings:
+Zt: [0:5)
+`: {"encodeZt051", `if !stripRawZ(&v) {
+			return 0, false
+		}
+		return v, true`, "enc_Zt"},
+	`Is the name of the fourth scalable vector register to be transferred, encoded as "Zt" plus 3 modulo 32.
+bit range mappings:
+Zt: [0:5)
+`: {"encodeZt054", `if !stripRawZ(&v) {
+			return 0, false
+		}
+		return (v - 3) % 32, true`, "enc_Zt"},
+	`Is the name of the offset scalable vector register, encoded in the "Zm" field.
+bit range mappings:
+Zm: [16:21)
+`: {"encodeZm1621V3", `if !stripRawZ(&v) {
+			return 0, false
+		}
+		return v << 16, true`, "enc_Zm"},
+	`Is the name of the scalable vector register to be transferred, encoded in the "Zt" field.
+bit range mappings:
+Zt: [0:5)
+`: {"encodeZt05", `if !stripRawZ(&v) {
+			return 0, false
+		}
+		return v, true`, "enc_Zt"},
+	`Is the name of the second scalable vector register to be transferred, encoded as "Zt" plus 1 modulo 32.
+bit range mappings:
+Zt: [0:5)
+`: {"encodeZt052", `if !stripRawZ(&v) {
+			return 0, false
+		}
+		return (v - 1) % 32, true`, "enc_Zt"},
+	`Is the name of the third scalable vector register to be transferred, encoded as "Zt" plus 2 modulo 32.
+bit range mappings:
+Zt: [0:5)
+`: {"encodeZt053", `if !stripRawZ(&v) {
+			return 0, false
+		}
+		return (v - 2) % 32, true`, "enc_Zt"},
+	`Is the optional 64-bit name of the general-purpose offset register, defaulting to XZR, encoded in the "Rm" field.
+bit range mappings:
+Rm: [16:21)
+`: {"encodeRm1621XZR", `if v == 0 {
+		// absent case, according to the spec this should be ZR (R31)
+		return 31, true
+	}
+	if !checkIsR(v) {
+		return 0, false
+	}
+	return (v & 31) << 16, true`, "enc_Rm"},
+	`Is the size specifier,
+size	<T>
+00	B
+01	H
+10	S
+11	D
+bit range mappings:
+size: [21:23)
+`: {"encodeSize2123V1", `switch v {
+	case ARNG_B:
+		return 0, true
+	case ARNG_H:
+		return 1 << 21, true
+	case ARNG_S:
+		return 2 << 21, true
+	case ARNG_D:
+		return 3 << 21, true
+	default:
+		return 0, false
+	}`, "enc_size"},
+	`Is the size specifier,
+size	<T>
+00	RESERVED
+01	H
+10	S
+11	D
+bit range mappings:
+size: [21:23)
+`: {"encodeSize2123V2", `switch v {
+	case ARNG_H:
+		return 1 << 21, true
+	case ARNG_S:
+		return 2 << 21, true
+	case ARNG_D:
+		return 3 << 21, true
+	}
+	return 0, false`, "enc_size"},
+	"Check that there is no modifier (UXTW, SXTW, LSL)": {"encodeNoModCheck", "return 0, v == 0", "enc_NIL"},
+	"Check that there is no modifier amount":            {"encodeNoAmtCheck", "return 0, v == 0", "enc_NIL"},
 }
diff --git a/arm64/instgen/generator.go b/arm64/instgen/generator.go
index 5dbda31..19fbd3c 100644
--- a/arm64/instgen/generator.go
+++ b/arm64/instgen/generator.go
@@ -136,6 +136,34 @@
 
 package arm64
 
+import "cmd/internal/obj"
+
+// stripRawZ first checks if v is a raw Z register, if so
+// it tries to verify that it's indeed a Z register, if it's not
+// it will return ok as false.
+// Otherwise, it will strip additional information and return ok as true.
+func stripRawZ(v *uint32) (bool) {
+	if *v >= obj.RBaseARM64 {
+		if !(*v >= REG_Z0 && *v <= REG_Z31) && !(*v >= REG_ZARNG && *v < REG_ZARNGELEM) {
+			return false
+		}
+	}
+	*v = *v & 31
+	return true
+}
+
+// checkIsR checks if v is a scalar register.
+// In the encoding scheme, R is always assumed to be passed in as raw, i.e.
+// starting at RBaseARM64. If it's not a raw R register, it will strip
+// additional information and return ok as true.
+// Otherwise, it will return ok as false.
+func checkIsR(v uint32) (bool) {
+	if v > REG_R31 && v != REG_RSP {
+		return false
+	}
+	return true
+}
+
 const (
   enc_NIL component = iota
 {{- range .Constants}}
@@ -211,6 +239,7 @@
 	"AC_REGLIST2": 4,
 	"AC_REGLIST3": 4,
 	"AC_REGLIST4": 4,
+	"AC_MEMEXT":   5,
 }
 
 func readExistingGoOps(aoutPath string) map[string]bool {
@@ -577,6 +606,7 @@
 		s = strings.Replace(s, "{", "", -1)
 		s = strings.Replace(s, "}", "", -1)
 		s = strings.Replace(s, "#", "c", -1)
+		s = strings.TrimPrefix(s, "_")
 		return s
 	}
 
@@ -601,6 +631,9 @@
 
 	// Map from ugly name to better name
 	ugly2better := make(map[string]string)
+	// There is sometimes only one ugly name for multiple operand type sequences
+	// that have different semantics, we need to assign them version numbers.
+	usedNames := make(map[string]bool)
 
 	for _, x := range otSlice {
 		// Get a canonical order of ugly names
@@ -610,6 +643,8 @@
 			uglies = append(uglies, c)
 		}
 
+		baseName := operandsToName(x)
+
 		if len(uglies) > 1 {
 			// if there is more than one, sort into decreasing
 			// frequency order, then sort by insertion order.
@@ -620,11 +655,29 @@
 				return uglyInsertionOrder[uglies[i]] < uglyInsertionOrder[uglies[j]]
 			})
 			for j, u := range uglies {
-				ugly2better[u] = fmt.Sprintf("%s__%d", operandsToName(x), j+1)
+				name := fmt.Sprintf("%s__%d", baseName, j+1)
+				if usedNames[name] {
+					v := 2
+					for usedNames[fmt.Sprintf("%s_V%d__%d", baseName, v, j+1)] {
+						v++
+					}
+					name = fmt.Sprintf("%s_V%d__%d", baseName, v, j+1)
+				}
+				ugly2better[u] = name
+				usedNames[name] = true
 			}
 		} else {
 			// if only one, do not add a __N suffix.
-			ugly2better[uglies[0]] = operandsToName(x)
+			name := baseName
+			if usedNames[name] {
+				v := 2
+				for usedNames[fmt.Sprintf("%s_V%d", baseName, v)] {
+					v++
+				}
+				name = fmt.Sprintf("%s_V%d", baseName, v)
+			}
+			ugly2better[uglies[0]] = name
+			usedNames[name] = true
 		}
 	}
 
@@ -737,6 +790,7 @@
 	"Zm":  "Z",
 	"Za":  "Z",
 	"Zk":  "Z",
+	"Zt":  "Z",
 
 	"Pd":  "P",
 	"Pdn": "P",
@@ -1176,6 +1230,135 @@
 
 				goAsmOps = append(goAsmOps, goAsmOp)
 				gnuAsmOps = append([]string{gnuAsmOp}, gnuAsmOps...)
+			} else if op.Typ == "AC_MEMEXT" {
+				var goReg1, gnuReg1 string
+				var goReg2, gnuReg2 string
+				var mod, amount string
+
+				// 1. Determine reg1 (Base)
+				if strings.Contains(op.Name, "<Xn|SP>") {
+					regIdx := cachedOrNew(regCache, "Xn|SP", 40)
+					if regIdx > 31 {
+						goReg1 = "RSP"
+						gnuReg1 = "SP"
+					} else {
+						if regIdx == 18 {
+							// R18 is reserved as the platform register.
+							regIdx = 19
+						}
+						goReg1 = fmt.Sprintf("R%d", regIdx)
+						gnuReg1 = fmt.Sprintf("X%d", regIdx)
+					}
+				} else if strings.Contains(op.Name, "<Zn>") {
+					regIdx := cachedOrNew(regCache, "Zn", 32)
+					arr := "D"
+					if strings.Contains(op.Name, ".S") {
+						arr = "S"
+					} else if strings.Contains(op.Name, ".D") {
+						arr = "D"
+					} else if strings.Contains(op.Name, ".<T>") {
+						arr = sveArr[rng.IntN(len(sveArr))]
+					}
+					goReg1 = fmt.Sprintf("Z%d.%s", regIdx, arr)
+					gnuReg1 = goReg1
+				}
+
+				// 2. Determine reg2 (Offset)
+				if strings.Contains(op.Name, "<Xm>") {
+					regIdx := cachedOrNew(regCache, "Xm", 32)
+					if regIdx == 18 {
+						regIdx = 19
+					}
+					goReg2 = fmt.Sprintf("R%d", regIdx)
+					gnuReg2 = fmt.Sprintf("X%d", regIdx)
+				} else if strings.Contains(op.Name, "<Zm>") {
+					regIdx := cachedOrNew(regCache, "Zm", 32)
+					arr2 := "D"
+					if strings.Contains(op.Name, ".S") {
+						arr2 = "S"
+					} else if strings.Contains(op.Name, ".D") {
+						arr2 = "D"
+					} else if strings.Contains(op.Name, ".<T>") {
+						arr2 = sveArr[rng.IntN(len(sveArr))]
+					}
+					goReg2 = fmt.Sprintf("Z%d.%s", regIdx, arr2)
+					gnuReg2 = goReg2
+				}
+
+				// 3. Determine mod
+				if strings.Contains(op.Name, "LSL") {
+					mod = "LSL"
+				} else if strings.Contains(op.Name, "UXTW") {
+					mod = "UXTW"
+				} else if strings.Contains(op.Name, "SXTW") {
+					mod = "SXTW"
+				} else if strings.Contains(op.Name, "<mod>") {
+					mods := []string{"", "LSL", "UXTW", "SXTW"}
+					mod = mods[rng.IntN(len(mods))]
+				}
+
+				// 4. Determine amount
+				if strings.Contains(op.Name, "#1") {
+					amount = "1"
+				} else if strings.Contains(op.Name, "#2") {
+					amount = "2"
+				} else if strings.Contains(op.Name, "#3") {
+					amount = "3"
+				} else if strings.Contains(op.Name, "#4") {
+					amount = "4"
+				} else if strings.Contains(op.Name, "<amount>") || strings.Contains(op.Name, "{<amount>}") {
+					if mod != "" {
+						amount = fmt.Sprintf("%d", rng.IntN(3)+1) // 1, 2, 3
+					}
+				}
+
+				// Enforce rule: mod and amount must be nil together, or non nil together
+				if strings.Contains(op.Name, "<mod>") && strings.Contains(op.Name, "<amount>") {
+					if mod == "" {
+						amount = ""
+					} else if amount == "" {
+						amount = fmt.Sprintf("%d", rng.IntN(3)+1)
+					}
+				}
+
+				// 5. Construct GNU string
+				gnuParts := []string{gnuReg1}
+				if gnuReg2 != "" {
+					gnuParts = append(gnuParts, gnuReg2)
+				}
+				if mod != "" && amount != "" {
+					gnuParts = append(gnuParts, fmt.Sprintf("%s #%s", mod, amount))
+				} else if mod != "" {
+					gnuParts = append(gnuParts, mod)
+				}
+				gnuAsmOp := "[" + strings.Join(gnuParts, ", ") + "]"
+
+				// 6. Construct Go string
+				var goOffset string
+				if goReg2 != "" {
+					goOffset = goReg2
+					if mod == "LSL" {
+						if amount != "" {
+							goOffset = fmt.Sprintf("%s<<%s", goReg2, amount)
+						}
+					} else if mod != "" {
+						if amount != "" {
+							goOffset = fmt.Sprintf("%s.%s<<%s", goReg2, mod, amount)
+						} else {
+							goOffset = fmt.Sprintf("%s.%s", goReg2, mod)
+						}
+					}
+				}
+
+				var goAsmOp string
+				if goOffset != "" {
+					goAsmOp = fmt.Sprintf("(%s)(%s)", goOffset, goReg1)
+				} else {
+					goAsmOp = fmt.Sprintf("(%s)", goReg1)
+				}
+
+				goAsmOps = append(goAsmOps, goAsmOp)
+				gnuAsmOps = append([]string{gnuAsmOp}, gnuAsmOps...)
 			}
 		}
 		// Try to assemble the GNU version.
diff --git a/arm64/instgen/xmlspec/parser.go b/arm64/instgen/xmlspec/parser.go
index 2deb524..905469b 100644
--- a/arm64/instgen/xmlspec/parser.go
+++ b/arm64/instgen/xmlspec/parser.go
@@ -1066,7 +1066,7 @@
 	insertElmAt := func(idx int, symbol, textExpWithRanges string, needOffset bool) {
 		if needOffset {
 			offset := 0
-			for i := range idx {
+			for i := range idx + 1 {
 				offset += insertionHistory[i]
 			}
 			insertionHistory[idx]++
@@ -1113,10 +1113,19 @@
 			case "ARNGS":
 				insertElmAt(index+1, "S", "Check this is a S arrangement", true)
 			case "R64":
-				// Width constraints are preceeding the element.
-				insertElmAt(index, "X", "Check this is a 64-bit scalar register", true)
+				if acl == "AC_SPZGREG" {
+					// Width constraints are preceeding the element.
+					insertElmAt(index, "X", "Check this is a 64-bit scalar register", true)
+				} else {
+					insertElmAt(index+1, "nil", noOpCheck, true)
+				}
 			case "R32":
-				insertElmAt(index, "W", "Check this is a 32-bit scalar register", true)
+				if acl == "AC_SPZGREG" {
+					// Width constraints are preceeding the element.
+					insertElmAt(index, "W", "Check this is a 32-bit scalar register", true)
+				} else {
+					insertElmAt(index+1, "nil", noOpCheck, true)
+				}
 			case "LSL1", "LSL2", "LSL3", "LSL4", "SXTW", "UXTW", "MODAMT1", "MODAMT2", "MODAMT3":
 				if acl == "AC_MEMEXT" {
 					switch constraintType {
@@ -1199,13 +1208,13 @@
 			}
 		case "[<Xn|SP>, <Xm>]", "[<Xn|SP>, <Zm>.D]", "[<Xn|SP>{, <Xm>}]", "[<Zn>.D{, <Xm>}]", "[<Zn>.S{, <Xm>}]":
 			if el == 6 && len(op.Elems) == 4 {
-				insertElmAt(4, "nil", noOpCheck, false)
-				insertElmAt(5, "nil", noOpCheck, false)
+				insertElmAt(4, "nil", "Check that there is no modifier (UXTW, SXTW, LSL)", false)
+				insertElmAt(5, "nil", "Check that there is no modifier amount", false)
 				resolved = true
 			}
 		case "[<Xn|SP>, <Zm>.S, <mod>]", "[<Xn|SP>, <Zm>.D, <mod>]":
 			if el == 6 && len(op.Elems) == 5 {
-				insertElmAt(5, "nil", noOpCheck, false)
+				insertElmAt(5, "nil", "Check that there is no modifier amount", false)
 				resolved = true
 			}
 		}