cmd/asm: add 128-bit FLDPQ and FSTPQ instructions for arm64

This CL adds assembly support for 128-bit FLDPQ and FSTPQ instructions.

This CL also deletes some wrong pre/post-indexed LDP and STP instructions,
such as {ALDP, C_UAUTO4K, C_NONE, C_NONE, C_PAIR, 74, 8, REGSP, 0, C_XPRE},
because when the offset type is C_UAUTO4K, pre and post don't work.

Change-Id: Ifd901d4440eb06eb9e86c9dd17518749fdf32848
Reviewed-on: https://go-review.googlesource.com/c/go/+/273668
Trust: eric fang <eric.fang@arm.com>
Run-TryBot: eric fang <eric.fang@arm.com>
TryBot-Result: Go Bot <gobot@golang.org>
Reviewed-by: eric fang <eric.fang@arm.com>
Reviewed-by: Cherry Zhang <cherryyz@google.com>
diff --git a/src/cmd/asm/internal/asm/testdata/arm64.s b/src/cmd/asm/internal/asm/testdata/arm64.s
index 91e3a0c..1e6cde7 100644
--- a/src/cmd/asm/internal/asm/testdata/arm64.s
+++ b/src/cmd/asm/internal/asm/testdata/arm64.s
@@ -982,6 +982,54 @@
 	FSTPS	(F3, F4), x(SB)
 	FSTPS	(F3, F4), x+8(SB)
 
+// FLDPQ/FSTPQ
+	FLDPQ   -4000(R0), (F1, F2)  // 1b803ed1610b40ad
+	FLDPQ	-1024(R0), (F1, F2)  // 010860ad
+	FLDPQ	(R0), (F1, F2)       // 010840ad
+	FLDPQ	16(R0), (F1, F2)     // 018840ad
+	FLDPQ	-16(R0), (F1, F2)    // 01887fad
+	FLDPQ.W	32(R0), (F1, F2)     // 0108c1ad
+	FLDPQ.P	32(R0), (F1, F2)     // 0108c1ac
+	FLDPQ	11(R0), (F1, F2)     // 1b2c0091610b40ad
+	FLDPQ	1024(R0), (F1, F2)   // 1b001091610b40ad
+	FLDPQ   4104(R0), (F1, F2)
+	FLDPQ   -4000(RSP), (F1, F2) // fb833ed1610b40ad
+	FLDPQ	-1024(RSP), (F1, F2) // e10b60ad
+	FLDPQ	(RSP), (F1, F2)      // e10b40ad
+	FLDPQ	16(RSP), (F1, F2)    // e18b40ad
+	FLDPQ	-16(RSP), (F1, F2)   // e18b7fad
+	FLDPQ.W	32(RSP), (F1, F2)    // e10bc1ad
+	FLDPQ.P	32(RSP), (F1, F2)    // e10bc1ac
+	FLDPQ	11(RSP), (F1, F2)    // fb2f0091610b40ad
+	FLDPQ	1024(RSP), (F1, F2)  // fb031091610b40ad
+	FLDPQ   4104(RSP), (F1, F2)
+	FLDPQ	-31(R0), (F1, F2)    // 1b7c00d1610b40ad
+	FLDPQ	-4(R0), (F1, F2)     // 1b1000d1610b40ad
+	FLDPQ	x(SB), (F1, F2)
+	FLDPQ	x+8(SB), (F1, F2)
+	FSTPQ	(F3, F4), -4000(R5)  // bb803ed1631300ad
+	FSTPQ	(F3, F4), -1024(R5)  // a31020ad
+	FSTPQ	(F3, F4), (R5)       // a31000ad
+	FSTPQ	(F3, F4), 16(R5)     // a39000ad
+	FSTPQ	(F3, F4), -16(R5)    // a3903fad
+	FSTPQ.W	(F3, F4), 32(R5)     // a31081ad
+	FSTPQ.P	(F3, F4), 32(R5)     // a31081ac
+	FSTPQ	(F3, F4), 11(R5)     // bb2c0091631300ad
+	FSTPQ	(F3, F4), 1024(R5)   // bb001091631300ad
+	FSTPQ	(F3, F4), 4104(R5)
+	FSTPQ	(F3, F4), -4000(RSP) // fb833ed1631300ad
+	FSTPQ	(F3, F4), -1024(RSP) // e31320ad
+	FSTPQ	(F3, F4), (RSP)      // e31300ad
+	FSTPQ	(F3, F4), 16(RSP)    // e39300ad
+	FSTPQ	(F3, F4), -16(RSP)   // e3933fad
+	FSTPQ.W	(F3, F4), 32(RSP)    // e31381ad
+	FSTPQ.P	(F3, F4), 32(RSP)    // e31381ac
+	FSTPQ	(F3, F4), 11(RSP)    // fb2f0091631300ad
+	FSTPQ	(F3, F4), 1024(RSP)  // fb031091631300ad
+	FSTPQ	(F3, F4), 4104(RSP)
+	FSTPQ	(F3, F4), x(SB)
+	FSTPQ	(F3, F4), x+8(SB)
+
 // System Register
 	MSR	$1, SPSel                          // bf4100d5
 	MSR	$9, DAIFSet                        // df4903d5
diff --git a/src/cmd/asm/internal/asm/testdata/arm64error.s b/src/cmd/asm/internal/asm/testdata/arm64error.s
index e579f20..9b4f42a 100644
--- a/src/cmd/asm/internal/asm/testdata/arm64error.s
+++ b/src/cmd/asm/internal/asm/testdata/arm64error.s
@@ -109,6 +109,9 @@
 	VREV16	V1.D1, V2.D1                                     // ERROR "invalid arrangement"
 	VREV16	V1.B8, V2.B16                                    // ERROR "invalid arrangement"
 	VREV16	V1.H4, V2.H4                                     // ERROR "invalid arrangement"
+	FLDPQ	(R0), (R1, R2)                                   // ERROR "invalid register pair"
+	FLDPQ	(R1), (F2, F2)                                   // ERROR "constrained unpredictable behavior"
+	FSTPQ	(R1, R2), (R0)                                   // ERROR "invalid register pair"
 	FLDPD	(R0), (R1, R2)                                   // ERROR "invalid register pair"
 	FLDPD	(R1), (F2, F2)                                   // ERROR "constrained unpredictable behavior"
 	FLDPS	(R2), (F3, F3)                                   // ERROR "constrained unpredictable behavior"
diff --git a/src/cmd/internal/obj/arm64/a.out.go b/src/cmd/internal/obj/arm64/a.out.go
index 7ab9c14..ed07f18 100644
--- a/src/cmd/internal/obj/arm64/a.out.go
+++ b/src/cmd/internal/obj/arm64/a.out.go
@@ -420,16 +420,21 @@
 	C_LBRA
 
 	C_ZAUTO       // 0(RSP)
+	C_NSAUTO_16   // -256 <= x < 0, 0 mod 16
 	C_NSAUTO_8    // -256 <= x < 0, 0 mod 8
 	C_NSAUTO_4    // -256 <= x < 0, 0 mod 4
 	C_NSAUTO      // -256 <= x < 0
+	C_NPAUTO_16   // -512 <= x < 0, 0 mod 16
 	C_NPAUTO      // -512 <= x < 0, 0 mod 8
+	C_NQAUTO_16   // -1024 <= x < 0, 0 mod 16
 	C_NAUTO4K     // -4095 <= x < 0
+	C_PSAUTO_16   // 0 to 255, 0 mod 16
 	C_PSAUTO_8    // 0 to 255, 0 mod 8
 	C_PSAUTO_4    // 0 to 255, 0 mod 4
 	C_PSAUTO      // 0 to 255
 	C_PPAUTO_16   // 0 to 504, 0 mod 16
 	C_PPAUTO      // 0 to 504, 0 mod 8
+	C_PQAUTO_16   // 0 to 1008, 0 mod 16
 	C_UAUTO4K_16  // 0 to 4095, 0 mod 16
 	C_UAUTO4K_8   // 0 to 4095, 0 mod 8
 	C_UAUTO4K_4   // 0 to 4095, 0 mod 4
@@ -454,17 +459,22 @@
 	C_SEXT16 // 0 to 65520
 	C_LEXT
 
-	C_ZOREG    // 0(R)
-	C_NSOREG_8 // must mirror C_NSAUTO_8, etc
+	C_ZOREG     // 0(R)
+	C_NSOREG_16 // must mirror C_NSAUTO_16, etc
+	C_NSOREG_8
 	C_NSOREG_4
 	C_NSOREG
+	C_NPOREG_16
 	C_NPOREG
+	C_NQOREG_16
 	C_NOREG4K
+	C_PSOREG_16
 	C_PSOREG_8
 	C_PSOREG_4
 	C_PSOREG
 	C_PPOREG_16
 	C_PPOREG
+	C_PQOREG_16
 	C_UOREG4K_16
 	C_UOREG4K_8
 	C_UOREG4K_4
@@ -898,6 +908,7 @@
 	AFDIVD
 	AFDIVS
 	AFLDPD
+	AFLDPQ
 	AFLDPS
 	AFMOVQ
 	AFMOVD
@@ -912,6 +923,7 @@
 	AFSQRTD
 	AFSQRTS
 	AFSTPD
+	AFSTPQ
 	AFSTPS
 	AFSUBD
 	AFSUBS
diff --git a/src/cmd/internal/obj/arm64/anames.go b/src/cmd/internal/obj/arm64/anames.go
index a98f8c7..0fb2853 100644
--- a/src/cmd/internal/obj/arm64/anames.go
+++ b/src/cmd/internal/obj/arm64/anames.go
@@ -392,6 +392,7 @@
 	"FDIVD",
 	"FDIVS",
 	"FLDPD",
+	"FLDPQ",
 	"FLDPS",
 	"FMOVQ",
 	"FMOVD",
@@ -406,6 +407,7 @@
 	"FSQRTD",
 	"FSQRTS",
 	"FSTPD",
+	"FSTPQ",
 	"FSTPS",
 	"FSUBD",
 	"FSUBS",
diff --git a/src/cmd/internal/obj/arm64/anames7.go b/src/cmd/internal/obj/arm64/anames7.go
index f7e99517..2ecd816 100644
--- a/src/cmd/internal/obj/arm64/anames7.go
+++ b/src/cmd/internal/obj/arm64/anames7.go
@@ -42,15 +42,21 @@
 	"SBRA",
 	"LBRA",
 	"ZAUTO",
+	"NSAUTO_16",
 	"NSAUTO_8",
 	"NSAUTO_4",
 	"NSAUTO",
+	"NPAUTO_16",
 	"NPAUTO",
+	"NQAUTO_16",
 	"NAUTO4K",
+	"PSAUTO_16",
 	"PSAUTO_8",
 	"PSAUTO_4",
 	"PSAUTO",
+	"PPAUTO_16",
 	"PPAUTO",
+	"PQAUTO_16",
 	"UAUTO4K_16",
 	"UAUTO4K_8",
 	"UAUTO4K_4",
@@ -74,15 +80,21 @@
 	"SEXT16",
 	"LEXT",
 	"ZOREG",
+	"NSOREG_16",
 	"NSOREG_8",
 	"NSOREG_4",
 	"NSOREG",
+	"NPOREG_16",
 	"NPOREG",
+	"NQOREG_16",
 	"NOREG4K",
+	"PSOREG_16",
 	"PSOREG_8",
 	"PSOREG_4",
 	"PSOREG",
+	"PPOREG_16",
 	"PPOREG",
+	"PQOREG_16",
 	"UOREG4K_16",
 	"UOREG4K_8",
 	"UOREG4K_4",
diff --git a/src/cmd/internal/obj/arm64/asm7.go b/src/cmd/internal/obj/arm64/asm7.go
index 70072cf..5937ebd 100644
--- a/src/cmd/internal/obj/arm64/asm7.go
+++ b/src/cmd/internal/obj/arm64/asm7.go
@@ -689,6 +689,46 @@
 
 	/* pre/post-indexed/signed-offset load/store register pair
 	   (unscaled, signed 10-bit quad-aligned and long offset) */
+	{AFLDPQ, C_NQAUTO_16, C_NONE, C_NONE, C_PAIR, 66, 4, REGSP, 0, 0},
+	{AFLDPQ, C_NQAUTO_16, C_NONE, C_NONE, C_PAIR, 66, 4, REGSP, 0, C_XPRE},
+	{AFLDPQ, C_NQAUTO_16, C_NONE, C_NONE, C_PAIR, 66, 4, REGSP, 0, C_XPOST},
+	{AFLDPQ, C_PQAUTO_16, C_NONE, C_NONE, C_PAIR, 66, 4, REGSP, 0, 0},
+	{AFLDPQ, C_PQAUTO_16, C_NONE, C_NONE, C_PAIR, 66, 4, REGSP, 0, C_XPRE},
+	{AFLDPQ, C_PQAUTO_16, C_NONE, C_NONE, C_PAIR, 66, 4, REGSP, 0, C_XPOST},
+	{AFLDPQ, C_UAUTO4K, C_NONE, C_NONE, C_PAIR, 74, 8, REGSP, 0, 0},
+	{AFLDPQ, C_NAUTO4K, C_NONE, C_NONE, C_PAIR, 74, 8, REGSP, 0, 0},
+	{AFLDPQ, C_LAUTO, C_NONE, C_NONE, C_PAIR, 75, 12, REGSP, LFROM, 0},
+	{AFLDPQ, C_NQOREG_16, C_NONE, C_NONE, C_PAIR, 66, 4, 0, 0, 0},
+	{AFLDPQ, C_NQOREG_16, C_NONE, C_NONE, C_PAIR, 66, 4, 0, 0, C_XPRE},
+	{AFLDPQ, C_NQOREG_16, C_NONE, C_NONE, C_PAIR, 66, 4, 0, 0, C_XPOST},
+	{AFLDPQ, C_PQOREG_16, C_NONE, C_NONE, C_PAIR, 66, 4, 0, 0, 0},
+	{AFLDPQ, C_PQOREG_16, C_NONE, C_NONE, C_PAIR, 66, 4, 0, 0, C_XPRE},
+	{AFLDPQ, C_PQOREG_16, C_NONE, C_NONE, C_PAIR, 66, 4, 0, 0, C_XPOST},
+	{AFLDPQ, C_UOREG4K, C_NONE, C_NONE, C_PAIR, 74, 8, 0, 0, 0},
+	{AFLDPQ, C_NOREG4K, C_NONE, C_NONE, C_PAIR, 74, 8, 0, 0, 0},
+	{AFLDPQ, C_LOREG, C_NONE, C_NONE, C_PAIR, 75, 12, 0, LFROM, 0},
+	{AFLDPQ, C_ADDR, C_NONE, C_NONE, C_PAIR, 88, 12, 0, 0, 0},
+
+	{AFSTPQ, C_PAIR, C_NONE, C_NONE, C_NQAUTO_16, 67, 4, REGSP, 0, 0},
+	{AFSTPQ, C_PAIR, C_NONE, C_NONE, C_NQAUTO_16, 67, 4, REGSP, 0, C_XPRE},
+	{AFSTPQ, C_PAIR, C_NONE, C_NONE, C_NQAUTO_16, 67, 4, REGSP, 0, C_XPOST},
+	{AFSTPQ, C_PAIR, C_NONE, C_NONE, C_PQAUTO_16, 67, 4, REGSP, 0, 0},
+	{AFSTPQ, C_PAIR, C_NONE, C_NONE, C_PQAUTO_16, 67, 4, REGSP, 0, C_XPRE},
+	{AFSTPQ, C_PAIR, C_NONE, C_NONE, C_PQAUTO_16, 67, 4, REGSP, 0, C_XPOST},
+	{AFSTPQ, C_PAIR, C_NONE, C_NONE, C_UAUTO4K, 76, 8, REGSP, 0, 0},
+	{AFSTPQ, C_PAIR, C_NONE, C_NONE, C_NAUTO4K, 76, 12, REGSP, 0, 0},
+	{AFSTPQ, C_PAIR, C_NONE, C_NONE, C_LAUTO, 77, 12, REGSP, LTO, 0},
+	{AFSTPQ, C_PAIR, C_NONE, C_NONE, C_NQOREG_16, 67, 4, 0, 0, 0},
+	{AFSTPQ, C_PAIR, C_NONE, C_NONE, C_NQOREG_16, 67, 4, 0, 0, C_XPRE},
+	{AFSTPQ, C_PAIR, C_NONE, C_NONE, C_NQOREG_16, 67, 4, 0, 0, C_XPOST},
+	{AFSTPQ, C_PAIR, C_NONE, C_NONE, C_PQOREG_16, 67, 4, 0, 0, 0},
+	{AFSTPQ, C_PAIR, C_NONE, C_NONE, C_PQOREG_16, 67, 4, 0, 0, C_XPRE},
+	{AFSTPQ, C_PAIR, C_NONE, C_NONE, C_PQOREG_16, 67, 4, 0, 0, C_XPOST},
+	{AFSTPQ, C_PAIR, C_NONE, C_NONE, C_UOREG4K, 76, 8, 0, 0, 0},
+	{AFSTPQ, C_PAIR, C_NONE, C_NONE, C_NOREG4K, 76, 8, 0, 0, 0},
+	{AFSTPQ, C_PAIR, C_NONE, C_NONE, C_LOREG, 77, 12, 0, LTO, 0},
+	{AFSTPQ, C_PAIR, C_NONE, C_NONE, C_ADDR, 87, 12, 0, 0, 0},
+
 	{ALDP, C_NPAUTO, C_NONE, C_NONE, C_PAIR, 66, 4, REGSP, 0, 0},
 	{ALDP, C_NPAUTO, C_NONE, C_NONE, C_PAIR, 66, 4, REGSP, 0, C_XPRE},
 	{ALDP, C_NPAUTO, C_NONE, C_NONE, C_PAIR, 66, 4, REGSP, 0, C_XPOST},
@@ -696,14 +736,8 @@
 	{ALDP, C_PPAUTO, C_NONE, C_NONE, C_PAIR, 66, 4, REGSP, 0, C_XPRE},
 	{ALDP, C_PPAUTO, C_NONE, C_NONE, C_PAIR, 66, 4, REGSP, 0, C_XPOST},
 	{ALDP, C_UAUTO4K, C_NONE, C_NONE, C_PAIR, 74, 8, REGSP, 0, 0},
-	{ALDP, C_UAUTO4K, C_NONE, C_NONE, C_PAIR, 74, 8, REGSP, 0, C_XPRE},
-	{ALDP, C_UAUTO4K, C_NONE, C_NONE, C_PAIR, 74, 8, REGSP, 0, C_XPOST},
 	{ALDP, C_NAUTO4K, C_NONE, C_NONE, C_PAIR, 74, 8, REGSP, 0, 0},
-	{ALDP, C_NAUTO4K, C_NONE, C_NONE, C_PAIR, 74, 8, REGSP, 0, C_XPRE},
-	{ALDP, C_NAUTO4K, C_NONE, C_NONE, C_PAIR, 74, 8, REGSP, 0, C_XPOST},
 	{ALDP, C_LAUTO, C_NONE, C_NONE, C_PAIR, 75, 12, REGSP, LFROM, 0},
-	{ALDP, C_LAUTO, C_NONE, C_NONE, C_PAIR, 75, 12, REGSP, LFROM, C_XPRE},
-	{ALDP, C_LAUTO, C_NONE, C_NONE, C_PAIR, 75, 12, REGSP, LFROM, C_XPOST},
 	{ALDP, C_NPOREG, C_NONE, C_NONE, C_PAIR, 66, 4, 0, 0, 0},
 	{ALDP, C_NPOREG, C_NONE, C_NONE, C_PAIR, 66, 4, 0, 0, C_XPRE},
 	{ALDP, C_NPOREG, C_NONE, C_NONE, C_PAIR, 66, 4, 0, 0, C_XPOST},
@@ -711,14 +745,8 @@
 	{ALDP, C_PPOREG, C_NONE, C_NONE, C_PAIR, 66, 4, 0, 0, C_XPRE},
 	{ALDP, C_PPOREG, C_NONE, C_NONE, C_PAIR, 66, 4, 0, 0, C_XPOST},
 	{ALDP, C_UOREG4K, C_NONE, C_NONE, C_PAIR, 74, 8, 0, 0, 0},
-	{ALDP, C_UOREG4K, C_NONE, C_NONE, C_PAIR, 74, 8, 0, 0, C_XPRE},
-	{ALDP, C_UOREG4K, C_NONE, C_NONE, C_PAIR, 74, 8, 0, 0, C_XPOST},
 	{ALDP, C_NOREG4K, C_NONE, C_NONE, C_PAIR, 74, 8, 0, 0, 0},
-	{ALDP, C_NOREG4K, C_NONE, C_NONE, C_PAIR, 74, 8, 0, 0, C_XPRE},
-	{ALDP, C_NOREG4K, C_NONE, C_NONE, C_PAIR, 74, 8, 0, 0, C_XPOST},
 	{ALDP, C_LOREG, C_NONE, C_NONE, C_PAIR, 75, 12, 0, LFROM, 0},
-	{ALDP, C_LOREG, C_NONE, C_NONE, C_PAIR, 75, 12, 0, LFROM, C_XPRE},
-	{ALDP, C_LOREG, C_NONE, C_NONE, C_PAIR, 75, 12, 0, LFROM, C_XPOST},
 	{ALDP, C_ADDR, C_NONE, C_NONE, C_PAIR, 88, 12, 0, 0, 0},
 
 	{ASTP, C_PAIR, C_NONE, C_NONE, C_NPAUTO, 67, 4, REGSP, 0, 0},
@@ -728,14 +756,8 @@
 	{ASTP, C_PAIR, C_NONE, C_NONE, C_PPAUTO, 67, 4, REGSP, 0, C_XPRE},
 	{ASTP, C_PAIR, C_NONE, C_NONE, C_PPAUTO, 67, 4, REGSP, 0, C_XPOST},
 	{ASTP, C_PAIR, C_NONE, C_NONE, C_UAUTO4K, 76, 8, REGSP, 0, 0},
-	{ASTP, C_PAIR, C_NONE, C_NONE, C_UAUTO4K, 76, 8, REGSP, 0, C_XPRE},
-	{ASTP, C_PAIR, C_NONE, C_NONE, C_UAUTO4K, 76, 8, REGSP, 0, C_XPOST},
 	{ASTP, C_PAIR, C_NONE, C_NONE, C_NAUTO4K, 76, 12, REGSP, 0, 0},
-	{ASTP, C_PAIR, C_NONE, C_NONE, C_NAUTO4K, 76, 12, REGSP, 0, C_XPRE},
-	{ASTP, C_PAIR, C_NONE, C_NONE, C_NAUTO4K, 76, 12, REGSP, 0, C_XPOST},
 	{ASTP, C_PAIR, C_NONE, C_NONE, C_LAUTO, 77, 12, REGSP, LTO, 0},
-	{ASTP, C_PAIR, C_NONE, C_NONE, C_LAUTO, 77, 12, REGSP, LTO, C_XPRE},
-	{ASTP, C_PAIR, C_NONE, C_NONE, C_LAUTO, 77, 12, REGSP, LTO, C_XPOST},
 	{ASTP, C_PAIR, C_NONE, C_NONE, C_NPOREG, 67, 4, 0, 0, 0},
 	{ASTP, C_PAIR, C_NONE, C_NONE, C_NPOREG, 67, 4, 0, 0, C_XPRE},
 	{ASTP, C_PAIR, C_NONE, C_NONE, C_NPOREG, 67, 4, 0, 0, C_XPOST},
@@ -743,14 +765,8 @@
 	{ASTP, C_PAIR, C_NONE, C_NONE, C_PPOREG, 67, 4, 0, 0, C_XPRE},
 	{ASTP, C_PAIR, C_NONE, C_NONE, C_PPOREG, 67, 4, 0, 0, C_XPOST},
 	{ASTP, C_PAIR, C_NONE, C_NONE, C_UOREG4K, 76, 8, 0, 0, 0},
-	{ASTP, C_PAIR, C_NONE, C_NONE, C_UOREG4K, 76, 8, 0, 0, C_XPRE},
-	{ASTP, C_PAIR, C_NONE, C_NONE, C_UOREG4K, 76, 8, 0, 0, C_XPOST},
 	{ASTP, C_PAIR, C_NONE, C_NONE, C_NOREG4K, 76, 8, 0, 0, 0},
-	{ASTP, C_PAIR, C_NONE, C_NONE, C_NOREG4K, 76, 8, 0, 0, C_XPRE},
-	{ASTP, C_PAIR, C_NONE, C_NONE, C_NOREG4K, 76, 8, 0, 0, C_XPOST},
 	{ASTP, C_PAIR, C_NONE, C_NONE, C_LOREG, 77, 12, 0, LTO, 0},
-	{ASTP, C_PAIR, C_NONE, C_NONE, C_LOREG, 77, 12, 0, LTO, C_XPRE},
-	{ASTP, C_PAIR, C_NONE, C_NONE, C_LOREG, 77, 12, 0, LTO, C_XPOST},
 	{ASTP, C_PAIR, C_NONE, C_NONE, C_ADDR, 87, 12, 0, 0, 0},
 
 	// differ from LDP/STP for C_NSAUTO_4/C_PSAUTO_4/C_NSOREG_4/C_PSOREG_4
@@ -761,14 +777,8 @@
 	{ALDPW, C_PSAUTO_4, C_NONE, C_NONE, C_PAIR, 66, 4, REGSP, 0, C_XPRE},
 	{ALDPW, C_PSAUTO_4, C_NONE, C_NONE, C_PAIR, 66, 4, REGSP, 0, C_XPOST},
 	{ALDPW, C_UAUTO4K, C_NONE, C_NONE, C_PAIR, 74, 8, REGSP, 0, 0},
-	{ALDPW, C_UAUTO4K, C_NONE, C_NONE, C_PAIR, 74, 8, REGSP, 0, C_XPRE},
-	{ALDPW, C_UAUTO4K, C_NONE, C_NONE, C_PAIR, 74, 8, REGSP, 0, C_XPOST},
 	{ALDPW, C_NAUTO4K, C_NONE, C_NONE, C_PAIR, 74, 8, REGSP, 0, 0},
-	{ALDPW, C_NAUTO4K, C_NONE, C_NONE, C_PAIR, 74, 8, REGSP, 0, C_XPRE},
-	{ALDPW, C_NAUTO4K, C_NONE, C_NONE, C_PAIR, 74, 8, REGSP, 0, C_XPOST},
 	{ALDPW, C_LAUTO, C_NONE, C_NONE, C_PAIR, 75, 12, REGSP, LFROM, 0},
-	{ALDPW, C_LAUTO, C_NONE, C_NONE, C_PAIR, 75, 12, REGSP, LFROM, C_XPRE},
-	{ALDPW, C_LAUTO, C_NONE, C_NONE, C_PAIR, 75, 12, REGSP, LFROM, C_XPOST},
 	{ALDPW, C_NSOREG_4, C_NONE, C_NONE, C_PAIR, 66, 4, 0, 0, 0},
 	{ALDPW, C_NSOREG_4, C_NONE, C_NONE, C_PAIR, 66, 4, 0, 0, C_XPRE},
 	{ALDPW, C_NSOREG_4, C_NONE, C_NONE, C_PAIR, 66, 4, 0, 0, C_XPOST},
@@ -776,14 +786,8 @@
 	{ALDPW, C_PSOREG_4, C_NONE, C_NONE, C_PAIR, 66, 4, 0, 0, C_XPRE},
 	{ALDPW, C_PSOREG_4, C_NONE, C_NONE, C_PAIR, 66, 4, 0, 0, C_XPOST},
 	{ALDPW, C_UOREG4K, C_NONE, C_NONE, C_PAIR, 74, 8, 0, 0, 0},
-	{ALDPW, C_UOREG4K, C_NONE, C_NONE, C_PAIR, 74, 8, 0, 0, C_XPRE},
-	{ALDPW, C_UOREG4K, C_NONE, C_NONE, C_PAIR, 74, 8, 0, 0, C_XPOST},
 	{ALDPW, C_NOREG4K, C_NONE, C_NONE, C_PAIR, 74, 8, 0, 0, 0},
-	{ALDPW, C_NOREG4K, C_NONE, C_NONE, C_PAIR, 74, 8, 0, 0, C_XPRE},
-	{ALDPW, C_NOREG4K, C_NONE, C_NONE, C_PAIR, 74, 8, 0, 0, C_XPOST},
 	{ALDPW, C_LOREG, C_NONE, C_NONE, C_PAIR, 75, 12, 0, LFROM, 0},
-	{ALDPW, C_LOREG, C_NONE, C_NONE, C_PAIR, 75, 12, 0, LFROM, C_XPRE},
-	{ALDPW, C_LOREG, C_NONE, C_NONE, C_PAIR, 75, 12, 0, LFROM, C_XPOST},
 	{ALDPW, C_ADDR, C_NONE, C_NONE, C_PAIR, 88, 12, 0, 0, 0},
 
 	{ASTPW, C_PAIR, C_NONE, C_NONE, C_NSAUTO_4, 67, 4, REGSP, 0, 0},
@@ -793,14 +797,8 @@
 	{ASTPW, C_PAIR, C_NONE, C_NONE, C_PSAUTO_4, 67, 4, REGSP, 0, C_XPRE},
 	{ASTPW, C_PAIR, C_NONE, C_NONE, C_PSAUTO_4, 67, 4, REGSP, 0, C_XPOST},
 	{ASTPW, C_PAIR, C_NONE, C_NONE, C_UAUTO4K, 76, 8, REGSP, 0, 0},
-	{ASTPW, C_PAIR, C_NONE, C_NONE, C_UAUTO4K, 76, 8, REGSP, 0, C_XPRE},
-	{ASTPW, C_PAIR, C_NONE, C_NONE, C_UAUTO4K, 76, 8, REGSP, 0, C_XPOST},
 	{ASTPW, C_PAIR, C_NONE, C_NONE, C_NAUTO4K, 76, 12, REGSP, 0, 0},
-	{ASTPW, C_PAIR, C_NONE, C_NONE, C_NAUTO4K, 76, 12, REGSP, 0, C_XPRE},
-	{ASTPW, C_PAIR, C_NONE, C_NONE, C_NAUTO4K, 76, 12, REGSP, 0, C_XPOST},
 	{ASTPW, C_PAIR, C_NONE, C_NONE, C_LAUTO, 77, 12, REGSP, LTO, 0},
-	{ASTPW, C_PAIR, C_NONE, C_NONE, C_LAUTO, 77, 12, REGSP, LTO, C_XPRE},
-	{ASTPW, C_PAIR, C_NONE, C_NONE, C_LAUTO, 77, 12, REGSP, LTO, C_XPOST},
 	{ASTPW, C_PAIR, C_NONE, C_NONE, C_NSOREG_4, 67, 4, 0, 0, 0},
 	{ASTPW, C_PAIR, C_NONE, C_NONE, C_NSOREG_4, 67, 4, 0, 0, C_XPRE},
 	{ASTPW, C_PAIR, C_NONE, C_NONE, C_NSOREG_4, 67, 4, 0, 0, C_XPOST},
@@ -808,14 +806,8 @@
 	{ASTPW, C_PAIR, C_NONE, C_NONE, C_PSOREG_4, 67, 4, 0, 0, C_XPRE},
 	{ASTPW, C_PAIR, C_NONE, C_NONE, C_PSOREG_4, 67, 4, 0, 0, C_XPOST},
 	{ASTPW, C_PAIR, C_NONE, C_NONE, C_UOREG4K, 76, 8, 0, 0, 0},
-	{ASTPW, C_PAIR, C_NONE, C_NONE, C_UOREG4K, 76, 8, 0, 0, C_XPRE},
-	{ASTPW, C_PAIR, C_NONE, C_NONE, C_UOREG4K, 76, 8, 0, 0, C_XPOST},
 	{ASTPW, C_PAIR, C_NONE, C_NONE, C_NOREG4K, 76, 8, 0, 0, 0},
-	{ASTPW, C_PAIR, C_NONE, C_NONE, C_NOREG4K, 76, 8, 0, 0, C_XPRE},
-	{ASTPW, C_PAIR, C_NONE, C_NONE, C_NOREG4K, 76, 8, 0, 0, C_XPOST},
 	{ASTPW, C_PAIR, C_NONE, C_NONE, C_LOREG, 77, 12, 0, LTO, 0},
-	{ASTPW, C_PAIR, C_NONE, C_NONE, C_LOREG, 77, 12, 0, LTO, C_XPRE},
-	{ASTPW, C_PAIR, C_NONE, C_NONE, C_LOREG, 77, 12, 0, LTO, C_XPOST},
 	{ASTPW, C_PAIR, C_NONE, C_NONE, C_ADDR, 87, 12, 0, 0, 0},
 
 	{ASWPD, C_REG, C_NONE, C_NONE, C_ZOREG, 47, 4, 0, 0, 0},        // RegTo2=C_REG
@@ -1276,12 +1268,27 @@
 	case C_ADDCON:
 		fallthrough
 
-	case C_ZAUTO,
-		C_PSAUTO,
+	case C_ADDCON2,
+		C_LCON,
+		C_VCON,
+		C_LACON,
+
+		C_ZAUTO,
+		C_NSAUTO_16,
+		C_NSAUTO_8,
+		C_NSAUTO_4,
+		C_NSAUTO,
+		C_NPAUTO_16,
+		C_NPAUTO,
+		C_NQAUTO_16,
+		C_NAUTO4K,
+		C_PSAUTO_16,
 		C_PSAUTO_8,
 		C_PSAUTO_4,
+		C_PSAUTO,
 		C_PPAUTO_16,
 		C_PPAUTO,
+		C_PQAUTO_16,
 		C_UAUTO4K_16,
 		C_UAUTO4K_8,
 		C_UAUTO4K_4,
@@ -1297,17 +1304,24 @@
 		C_UAUTO32K_16,
 		C_UAUTO32K,
 		C_UAUTO64K,
-		C_NSAUTO_8,
-		C_NSAUTO_4,
-		C_NSAUTO,
-		C_NPAUTO,
-		C_NAUTO4K,
 		C_LAUTO,
-		C_PSOREG,
+
+		C_ZOREG,
+		C_NSOREG_16,
+		C_NSOREG_8,
+		C_NSOREG_4,
+		C_NSOREG,
+		C_NPOREG_16,
+		C_NPOREG,
+		C_NQOREG_16,
+		C_NOREG4K,
+		C_PSOREG_16,
 		C_PSOREG_8,
 		C_PSOREG_4,
+		C_PSOREG,
 		C_PPOREG_16,
 		C_PPOREG,
+		C_PQOREG_16,
 		C_UOREG4K_16,
 		C_UOREG4K_8,
 		C_UOREG4K_4,
@@ -1323,16 +1337,7 @@
 		C_UOREG32K_16,
 		C_UOREG32K,
 		C_UOREG64K,
-		C_NSOREG_8,
-		C_NSOREG_4,
-		C_NSOREG,
-		C_NPOREG,
-		C_NOREG4K,
-		C_LOREG,
-		C_LACON,
-		C_ADDCON2,
-		C_LCON,
-		C_VCON:
+		C_LOREG:
 		if a.Name == obj.NAME_EXTERN {
 			fmt.Printf("addpool: %v in %v needs reloc\n", DRconv(cls), p)
 		}
@@ -1590,6 +1595,9 @@
 	}
 
 	if l < 0 {
+		if l >= -256 && (l&15) == 0 {
+			return C_NSAUTO_16
+		}
 		if l >= -256 && (l&7) == 0 {
 			return C_NSAUTO_8
 		}
@@ -1599,9 +1607,15 @@
 		if l >= -256 {
 			return C_NSAUTO
 		}
+		if l >= -512 && (l&15) == 0 {
+			return C_NPAUTO_16
+		}
 		if l >= -512 && (l&7) == 0 {
 			return C_NPAUTO
 		}
+		if l >= -1024 && (l&15) == 0 {
+			return C_NQAUTO_16
+		}
 		if l >= -4095 {
 			return C_NAUTO4K
 		}
@@ -1609,6 +1623,9 @@
 	}
 
 	if l <= 255 {
+		if (l & 15) == 0 {
+			return C_PSAUTO_16
+		}
 		if (l & 7) == 0 {
 			return C_PSAUTO_8
 		}
@@ -1625,6 +1642,11 @@
 			return C_PPAUTO
 		}
 	}
+	if l <= 1008 {
+		if l&15 == 0 {
+			return C_PQAUTO_16
+		}
+	}
 	if l <= 4095 {
 		if l&15 == 0 {
 			return C_UAUTO4K_16
@@ -2193,64 +2215,99 @@
 			return true
 		}
 
+	case C_NSAUTO_8:
+		if b == C_NSAUTO_16 {
+			return true
+		}
+
 	case C_NSAUTO_4:
-		if b == C_NSAUTO_8 {
+		if b == C_NSAUTO_16 || b == C_NSAUTO_8 {
 			return true
 		}
 
 	case C_NSAUTO:
 		switch b {
-		case C_NSAUTO_4, C_NSAUTO_8:
+		case C_NSAUTO_4, C_NSAUTO_8, C_NSAUTO_16:
+			return true
+		}
+
+	case C_NPAUTO_16:
+		switch b {
+		case C_NSAUTO_16:
 			return true
 		}
 
 	case C_NPAUTO:
 		switch b {
-		case C_NSAUTO_8:
+		case C_NSAUTO_16, C_NSAUTO_8, C_NPAUTO_16:
+			return true
+		}
+
+	case C_NQAUTO_16:
+		switch b {
+		case C_NSAUTO_16, C_NPAUTO_16:
 			return true
 		}
 
 	case C_NAUTO4K:
 		switch b {
-		case C_NSAUTO_8, C_NSAUTO_4, C_NSAUTO, C_NPAUTO:
+		case C_NSAUTO_16, C_NSAUTO_8, C_NSAUTO_4, C_NSAUTO, C_NPAUTO_16,
+			C_NPAUTO, C_NQAUTO_16:
+			return true
+		}
+
+	case C_PSAUTO_16:
+		if b == C_ZAUTO {
 			return true
 		}
 
 	case C_PSAUTO_8:
-		if b == C_ZAUTO {
+		if b == C_ZAUTO || b == C_PSAUTO_16 {
 			return true
 		}
 
 	case C_PSAUTO_4:
 		switch b {
-		case C_ZAUTO, C_PSAUTO_8:
+		case C_ZAUTO, C_PSAUTO_16, C_PSAUTO_8:
 			return true
 		}
 
 	case C_PSAUTO:
 		switch b {
-		case C_ZAUTO, C_PSAUTO_8, C_PSAUTO_4:
+		case C_ZAUTO, C_PSAUTO_16, C_PSAUTO_8, C_PSAUTO_4:
+			return true
+		}
+
+	case C_PPAUTO_16:
+		switch b {
+		case C_ZAUTO, C_PSAUTO_16:
 			return true
 		}
 
 	case C_PPAUTO:
 		switch b {
-		case C_ZAUTO, C_PSAUTO_8, C_PPAUTO_16:
+		case C_ZAUTO, C_PSAUTO_16, C_PSAUTO_8, C_PPAUTO_16:
+			return true
+		}
+
+	case C_PQAUTO_16:
+		switch b {
+		case C_ZAUTO, C_PSAUTO_16, C_PPAUTO_16:
 			return true
 		}
 
 	case C_UAUTO4K:
 		switch b {
-		case C_ZAUTO, C_PSAUTO, C_PSAUTO_4, C_PSAUTO_8,
-			C_PPAUTO, C_PPAUTO_16,
+		case C_ZAUTO, C_PSAUTO, C_PSAUTO_4, C_PSAUTO_8, C_PSAUTO_16,
+			C_PPAUTO, C_PPAUTO_16, C_PQAUTO_16,
 			C_UAUTO4K_2, C_UAUTO4K_4, C_UAUTO4K_8, C_UAUTO4K_16:
 			return true
 		}
 
 	case C_UAUTO8K:
 		switch b {
-		case C_ZAUTO, C_PSAUTO, C_PSAUTO_4, C_PSAUTO_8,
-			C_PPAUTO, C_PPAUTO_16,
+		case C_ZAUTO, C_PSAUTO, C_PSAUTO_4, C_PSAUTO_8, C_PSAUTO_16,
+			C_PPAUTO, C_PPAUTO_16, C_PQAUTO_16,
 			C_UAUTO4K_2, C_UAUTO4K_4, C_UAUTO4K_8, C_UAUTO4K_16,
 			C_UAUTO8K_4, C_UAUTO8K_8, C_UAUTO8K_16:
 			return true
@@ -2258,8 +2315,8 @@
 
 	case C_UAUTO16K:
 		switch b {
-		case C_ZAUTO, C_PSAUTO, C_PSAUTO_4, C_PSAUTO_8,
-			C_PPAUTO, C_PPAUTO_16,
+		case C_ZAUTO, C_PSAUTO, C_PSAUTO_4, C_PSAUTO_8, C_PSAUTO_16,
+			C_PPAUTO, C_PPAUTO_16, C_PQAUTO_16,
 			C_UAUTO4K_4, C_UAUTO4K_8, C_UAUTO4K_16,
 			C_UAUTO8K_4, C_UAUTO8K_8, C_UAUTO8K_16,
 			C_UAUTO16K_8, C_UAUTO16K_16:
@@ -2268,8 +2325,8 @@
 
 	case C_UAUTO32K:
 		switch b {
-		case C_ZAUTO, C_PSAUTO, C_PSAUTO_4, C_PSAUTO_8,
-			C_PPAUTO, C_PPAUTO_16,
+		case C_ZAUTO, C_PSAUTO, C_PSAUTO_4, C_PSAUTO_8, C_PSAUTO_16,
+			C_PPAUTO, C_PPAUTO_16, C_PQAUTO_16,
 			C_UAUTO4K_8, C_UAUTO4K_16,
 			C_UAUTO8K_8, C_UAUTO8K_16,
 			C_UAUTO16K_8, C_UAUTO16K_16,
@@ -2279,17 +2336,17 @@
 
 	case C_UAUTO64K:
 		switch b {
-		case C_ZAUTO, C_PSAUTO, C_PSAUTO_4, C_PSAUTO_8,
-			C_PPAUTO_16, C_UAUTO4K_16, C_UAUTO8K_16, C_UAUTO16K_16,
+		case C_ZAUTO, C_PSAUTO, C_PSAUTO_4, C_PSAUTO_8, C_PSAUTO_16,
+			C_PPAUTO_16, C_PQAUTO_16, C_UAUTO4K_16, C_UAUTO8K_16, C_UAUTO16K_16,
 			C_UAUTO32K_16:
 			return true
 		}
 
 	case C_LAUTO:
 		switch b {
-		case C_ZAUTO, C_NSAUTO, C_NSAUTO_4, C_NSAUTO_8, C_NPAUTO, C_NAUTO4K,
-			C_PSAUTO, C_PSAUTO_4, C_PSAUTO_8,
-			C_PPAUTO, C_PPAUTO_16,
+		case C_ZAUTO, C_NSAUTO, C_NSAUTO_4, C_NSAUTO_8, C_NSAUTO_16, C_NPAUTO_16, C_NPAUTO, C_NQAUTO_16, C_NAUTO4K,
+			C_PSAUTO, C_PSAUTO_4, C_PSAUTO_8, C_PSAUTO_16,
+			C_PPAUTO, C_PPAUTO_16, C_PQAUTO_16,
 			C_UAUTO4K, C_UAUTO4K_2, C_UAUTO4K_4, C_UAUTO4K_8, C_UAUTO4K_16,
 			C_UAUTO8K, C_UAUTO8K_4, C_UAUTO8K_8, C_UAUTO8K_16,
 			C_UAUTO16K, C_UAUTO16K_8, C_UAUTO16K_16,
@@ -2298,64 +2355,98 @@
 			return true
 		}
 
+	case C_NSOREG_8:
+		if b == C_NSOREG_16 {
+			return true
+		}
+
 	case C_NSOREG_4:
-		if b == C_NSOREG_8 {
+		if b == C_NSOREG_8 || b == C_NSOREG_16 {
 			return true
 		}
 
 	case C_NSOREG:
 		switch b {
-		case C_NSOREG_4, C_NSOREG_8:
+		case C_NSOREG_4, C_NSOREG_8, C_NSOREG_16:
+			return true
+		}
+
+	case C_NPOREG_16:
+		switch b {
+		case C_NSOREG_16:
 			return true
 		}
 
 	case C_NPOREG:
 		switch b {
-		case C_NSOREG_8:
+		case C_NSOREG_16, C_NSOREG_8, C_NPOREG_16:
+			return true
+		}
+
+	case C_NQOREG_16:
+		switch b {
+		case C_NSOREG_16, C_NPOREG_16:
 			return true
 		}
 
 	case C_NOREG4K:
 		switch b {
-		case C_NSOREG_8, C_NSOREG_4, C_NSOREG, C_NPOREG:
+		case C_NSOREG_16, C_NSOREG_8, C_NSOREG_4, C_NSOREG, C_NPOREG_16, C_NPOREG, C_NQOREG_16:
+			return true
+		}
+
+	case C_PSOREG_16:
+		if b == C_ZOREG {
 			return true
 		}
 
 	case C_PSOREG_8:
-		if b == C_ZOREG {
+		if b == C_ZOREG || b == C_PSOREG_16 {
 			return true
 		}
 
 	case C_PSOREG_4:
 		switch b {
-		case C_ZOREG, C_PSOREG_8:
+		case C_ZOREG, C_PSOREG_16, C_PSOREG_8:
 			return true
 		}
 
 	case C_PSOREG:
 		switch b {
-		case C_ZOREG, C_PSOREG_8, C_PSOREG_4:
+		case C_ZOREG, C_PSOREG_16, C_PSOREG_8, C_PSOREG_4:
+			return true
+		}
+
+	case C_PPOREG_16:
+		switch b {
+		case C_ZOREG, C_PSOREG_16:
 			return true
 		}
 
 	case C_PPOREG:
 		switch b {
-		case C_ZOREG, C_PSOREG_8, C_PPOREG_16:
+		case C_ZOREG, C_PSOREG_16, C_PSOREG_8, C_PPOREG_16:
+			return true
+		}
+
+	case C_PQOREG_16:
+		switch b {
+		case C_ZOREG, C_PSOREG_16, C_PPOREG_16:
 			return true
 		}
 
 	case C_UOREG4K:
 		switch b {
-		case C_ZOREG, C_PSOREG, C_PSOREG_4, C_PSOREG_8,
-			C_PPOREG, C_PPOREG_16,
+		case C_ZOREG, C_PSOREG, C_PSOREG_4, C_PSOREG_8, C_PSOREG_16,
+			C_PPOREG, C_PPOREG_16, C_PQOREG_16,
 			C_UOREG4K_2, C_UOREG4K_4, C_UOREG4K_8, C_UOREG4K_16:
 			return true
 		}
 
 	case C_UOREG8K:
 		switch b {
-		case C_ZOREG, C_PSOREG, C_PSOREG_4, C_PSOREG_8,
-			C_PPOREG, C_PPOREG_16,
+		case C_ZOREG, C_PSOREG, C_PSOREG_4, C_PSOREG_8, C_PSOREG_16,
+			C_PPOREG, C_PPOREG_16, C_PQOREG_16,
 			C_UOREG4K_2, C_UOREG4K_4, C_UOREG4K_8, C_UOREG4K_16,
 			C_UOREG8K_4, C_UOREG8K_8, C_UOREG8K_16:
 			return true
@@ -2363,8 +2454,8 @@
 
 	case C_UOREG16K:
 		switch b {
-		case C_ZOREG, C_PSOREG, C_PSOREG_4, C_PSOREG_8,
-			C_PPOREG, C_PPOREG_16,
+		case C_ZOREG, C_PSOREG, C_PSOREG_4, C_PSOREG_8, C_PSOREG_16,
+			C_PPOREG, C_PPOREG_16, C_PQOREG_16,
 			C_UOREG4K_4, C_UOREG4K_8, C_UOREG4K_16,
 			C_UOREG8K_4, C_UOREG8K_8, C_UOREG8K_16,
 			C_UOREG16K_8, C_UOREG16K_16:
@@ -2373,8 +2464,8 @@
 
 	case C_UOREG32K:
 		switch b {
-		case C_ZOREG, C_PSOREG, C_PSOREG_4, C_PSOREG_8,
-			C_PPOREG, C_PPOREG_16,
+		case C_ZOREG, C_PSOREG, C_PSOREG_4, C_PSOREG_8, C_PSOREG_16,
+			C_PPOREG, C_PPOREG_16, C_PQOREG_16,
 			C_UOREG4K_8, C_UOREG4K_16,
 			C_UOREG8K_8, C_UOREG8K_16,
 			C_UOREG16K_8, C_UOREG16K_16,
@@ -2384,17 +2475,17 @@
 
 	case C_UOREG64K:
 		switch b {
-		case C_ZOREG, C_PSOREG, C_PSOREG_4, C_PSOREG_8,
-			C_PPOREG_16, C_UOREG4K_16, C_UOREG8K_16, C_UOREG16K_16,
+		case C_ZOREG, C_PSOREG, C_PSOREG_4, C_PSOREG_8, C_PSOREG_16,
+			C_PPOREG_16, C_PQOREG_16, C_UOREG4K_16, C_UOREG8K_16, C_UOREG16K_16,
 			C_UOREG32K_16:
 			return true
 		}
 
 	case C_LOREG:
 		switch b {
-		case C_ZOREG, C_NSOREG, C_NSOREG_4, C_NSOREG_8, C_NPOREG, C_NOREG4K,
-			C_PSOREG, C_PSOREG_4, C_PSOREG_8,
-			C_PPOREG, C_PPOREG_16,
+		case C_ZOREG, C_NSOREG, C_NSOREG_4, C_NSOREG_8, C_NSOREG_16, C_NPOREG, C_NPOREG_16, C_NQOREG_16, C_NOREG4K,
+			C_PSOREG, C_PSOREG_4, C_PSOREG_8, C_PSOREG_16,
+			C_PPOREG, C_PPOREG_16, C_PQOREG_16,
 			C_UOREG4K, C_UOREG4K_2, C_UOREG4K_4, C_UOREG4K_8, C_UOREG4K_16,
 			C_UOREG8K, C_UOREG8K_4, C_UOREG8K_8, C_UOREG8K_16,
 			C_UOREG16K, C_UOREG16K_8, C_UOREG16K_16,
@@ -2722,6 +2813,10 @@
 			obj.ATEXT:
 			break
 
+		case AFLDPQ:
+			break
+		case AFSTPQ:
+			break
 		case ALDP:
 			oprangeset(AFLDPD, t)
 
@@ -7192,7 +7287,7 @@
 	return o
 }
 
-/* genrate instruction encoding for LDP/LDPW/LDPSW/STP/STPW */
+/* genrate instruction encoding for ldp and stp series */
 func (c *ctxt7) opldpstp(p *obj.Prog, o *Optab, vo int32, rbase, rl, rh, ldp uint32) uint32 {
 	wback := false
 	if o.scond == C_XPOST || o.scond == C_XPRE {
@@ -7205,30 +7300,36 @@
 		if wback == true {
 			c.checkUnpredictable(p, false, true, p.To.Reg, p.From.Reg, int16(p.From.Offset))
 		}
-	case AFLDPD, AFLDPS:
+	case AFLDPD, AFLDPQ, AFLDPS:
 		c.checkUnpredictable(p, true, false, p.From.Reg, p.To.Reg, int16(p.To.Offset))
 	}
 	var ret uint32
 	// check offset
 	switch p.As {
+	case AFLDPQ, AFSTPQ:
+		if vo < -1024 || vo > 1008 || vo%16 != 0 {
+			c.ctxt.Diag("invalid offset %v\n", p)
+		}
+		vo /= 16
+		ret = 2<<30 | 1<<26
 	case AFLDPD, AFSTPD:
 		if vo < -512 || vo > 504 || vo%8 != 0 {
 			c.ctxt.Diag("invalid offset %v\n", p)
 		}
 		vo /= 8
 		ret = 1<<30 | 1<<26
-	case ALDP, ASTP:
-		if vo < -512 || vo > 504 || vo%8 != 0 {
-			c.ctxt.Diag("invalid offset %v\n", p)
-		}
-		vo /= 8
-		ret = 2 << 30
 	case AFLDPS, AFSTPS:
 		if vo < -256 || vo > 252 || vo%4 != 0 {
 			c.ctxt.Diag("invalid offset %v\n", p)
 		}
 		vo /= 4
 		ret = 1 << 26
+	case ALDP, ASTP:
+		if vo < -512 || vo > 504 || vo%8 != 0 {
+			c.ctxt.Diag("invalid offset %v\n", p)
+		}
+		vo /= 8
+		ret = 2 << 30
 	case ALDPW, ASTPW:
 		if vo < -256 || vo > 252 || vo%4 != 0 {
 			c.ctxt.Diag("invalid offset %v\n", p)
@@ -7246,7 +7347,7 @@
 	}
 	// check register pair
 	switch p.As {
-	case AFLDPD, AFLDPS, AFSTPD, AFSTPS:
+	case AFLDPQ, AFLDPD, AFLDPS, AFSTPQ, AFSTPD, AFSTPS:
 		if rl < REG_F0 || REG_F31 < rl || rh < REG_F0 || REG_F31 < rh {
 			c.ctxt.Diag("invalid register pair %v\n", p)
 		}