cmd/asm: fix the error of checking the post-index offset of VLD[1-4]R instructions of arm64

The post-index offset of VLD[1-4]R instructions is decided by the
"size" field not "Q" field, the current assembler uses "Q" fileld
to check the correctness of post-index offset which is not correct.
This patch fixes it.

Fixes #40725

Change-Id: If1cde7f21c6b3ee0e491649eb567700bd1475c84
Reviewed-on: https://go-review.googlesource.com/c/go/+/249757
Reviewed-by: Cherry Zhang <cherryyz@google.com>
diff --git a/src/cmd/asm/internal/asm/testdata/arm64.s b/src/cmd/asm/internal/asm/testdata/arm64.s
index 5a6db05..f0c716a 100644
--- a/src/cmd/asm/internal/asm/testdata/arm64.s
+++ b/src/cmd/asm/internal/asm/testdata/arm64.s
@@ -359,18 +359,22 @@
 	VLD4	(R15), [V10.H4, V11.H4, V12.H4, V13.H4]         // ea05400c
 	VLD4.P	32(R24), [V31.B8, V0.B8, V1.B8, V2.B8]          // 1f03df0c
 	VLD4.P	(R13)(R9), [V14.S2, V15.S2, V16.S2, V17.S2]     // VLD4.P	(R13)(R9*1), [V14.S2,V15.S2,V16.S2,V17.S2] // ae09c90c
-	VLD1R	(R0), [V0.B16]					// 00c0404d
-	VLD1R.P	16(R0), [V0.B16]				// 00c0df4d
-	VLD1R.P	(R15)(R1), [V15.H4]				// VLD1R.P	(R15)(R1*1), [V15.H4] // efc5c10d
-	VLD2R	(R15), [V15.H4, V16.H4]				// efc5600d
-	VLD2R.P	32(R0), [V0.D2, V1.D2]				// 00ccff4d
-	VLD2R.P	(R0)(R5), [V31.D1, V0.D1]			// VLD2R.P	(R0)(R5*1), [V31.D1, V0.D1] // 1fcce50d
-	VLD3R	(RSP), [V31.S2, V0.S2, V1.S2]			// ffeb400d
-	VLD3R.P	24(R15), [V15.H4, V16.H4, V17.H4]		// efe5df0d
-	VLD3R.P	(R15)(R6), [V15.H8, V16.H8, V17.H8]		// VLD3R.P	(R15)(R6*1), [V15.H8, V16.H8, V17.H8] // efe5c64d
-	VLD4R	(R0), [V0.B8, V1.B8, V2.B8, V3.B8]		// 00e0600d
-	VLD4R.P	64(RSP), [V31.S4, V0.S4, V1.S4, V2.S4]		// ffebff4d
-	VLD4R.P	(R15)(R9), [V15.H4, V16.H4, V17.H4, V18.H4]	// VLD4R.P	(R15)(R9*1), [V15.H4, V16.H4, V17.H4, V18.H4] // efe5e90d
+	VLD1R	(R1), [V9.B8]                                   // 29c0400d
+	VLD1R.P	(R1), [V9.B8]                                   // 29c0df0d
+	VLD1R.P	1(R1), [V2.B8]                                  // 22c0df0d
+	VLD1R.P	2(R1), [V2.H4]                                  // 22c4df0d
+	VLD1R	(R0), [V0.B16]                                  // 00c0404d
+	VLD1R.P	(R0), [V0.B16]                                  // 00c0df4d
+	VLD1R.P	(R15)(R1), [V15.H4]                             // VLD1R.P	(R15)(R1*1), [V15.H4] // efc5c10d
+	VLD2R	(R15), [V15.H4, V16.H4]                         // efc5600d
+	VLD2R.P	16(R0), [V0.D2, V1.D2]                          // 00ccff4d
+	VLD2R.P	(R0)(R5), [V31.D1, V0.D1]                       // VLD2R.P	(R0)(R5*1), [V31.D1, V0.D1] // 1fcce50d
+	VLD3R	(RSP), [V31.S2, V0.S2, V1.S2]                   // ffeb400d
+	VLD3R.P	6(R15), [V15.H4, V16.H4, V17.H4]                // efe5df0d
+	VLD3R.P	(R15)(R6), [V15.H8, V16.H8, V17.H8]             // VLD3R.P	(R15)(R6*1), [V15.H8, V16.H8, V17.H8] // efe5c64d
+	VLD4R	(R0), [V0.B8, V1.B8, V2.B8, V3.B8]              // 00e0600d
+	VLD4R.P	16(RSP), [V31.S4, V0.S4, V1.S4, V2.S4]          // ffebff4d
+	VLD4R.P	(R15)(R9), [V15.H4, V16.H4, V17.H4, V18.H4]     // VLD4R.P	(R15)(R9*1), [V15.H4, V16.H4, V17.H4, V18.H4] // efe5e90d
 	VST1.P	[V24.S2], 8(R2)                                 // 58789f0c
 	VST1	[V29.S2, V30.S2], (R29)                         // bdab000c
 	VST1	[V14.H4, V15.H4, V16.H4], (R27)                 // 6e67000c
diff --git a/src/cmd/asm/internal/asm/testdata/arm64error.s b/src/cmd/asm/internal/asm/testdata/arm64error.s
index 0661a47..9f37781 100644
--- a/src/cmd/asm/internal/asm/testdata/arm64error.s
+++ b/src/cmd/asm/internal/asm/testdata/arm64error.s
@@ -339,4 +339,5 @@
 	MRS	ICV_EOIR1_EL1, R3                                // ERROR "system register is not readable"
 	MRS	PMSWINC_EL0, R3                                  // ERROR "system register is not readable"
 	MRS	OSLAR_EL1, R3                                    // ERROR "system register is not readable"
+	VLD3R.P	24(R15), [V15.H4,V16.H4,V17.H4]                  // ERROR "invalid post-increment offset"
 	RET
diff --git a/src/cmd/internal/obj/arm64/asm7.go b/src/cmd/internal/obj/arm64/asm7.go
index 65f7898..0b90e31 100644
--- a/src/cmd/internal/obj/arm64/asm7.go
+++ b/src/cmd/internal/obj/arm64/asm7.go
@@ -2898,6 +2898,7 @@
 	}
 	opcode := (list >> 12) & 15
 	q := (list >> 30) & 1
+	size := (list >> 10) & 3
 	if offset == 0 {
 		return
 	}
@@ -2913,8 +2914,16 @@
 	default:
 		c.ctxt.Diag("invalid register numbers in ARM64 register list: %v", p)
 	}
-	if !(q == 0 && offset == n*8) && !(q == 1 && offset == n*16) {
-		c.ctxt.Diag("invalid post-increment offset: %v", p)
+
+	switch as {
+	case AVLD1R, AVLD2R, AVLD3R, AVLD4R:
+		if offset != n*(1<<uint(size)) {
+			c.ctxt.Diag("invalid post-increment offset: %v", p)
+		}
+	default:
+		if !(q == 0 && offset == n*8) && !(q == 1 && offset == n*16) {
+			c.ctxt.Diag("invalid post-increment offset: %v", p)
+		}
 	}
 
 	switch as {