internal/bytealg: port bytealg functions to reg ABI on riscv64

This CL adds support for the reg ABI to the bytes functions for
riscv64. These are initially under control of the
GOEXPERIMENT macro until all changes are in.

Change-Id: I026295ae38e2aba055f7a51c77f92c1921e5ec97
Reviewed-on: https://go-review.googlesource.com/c/go/+/361916
Run-TryBot: mzh <mzh@golangcn.org>
TryBot-Result: Gopher Robot <gobot@golang.org>
Reviewed-by: Cherry Mui <cherryyz@google.com>
Reviewed-by: Ian Lance Taylor <iant@google.com>
diff --git a/src/internal/bytealg/compare_riscv64.s b/src/internal/bytealg/compare_riscv64.s
index 0dc6251..7d2f8d6 100644
--- a/src/internal/bytealg/compare_riscv64.s
+++ b/src/internal/bytealg/compare_riscv64.s
@@ -5,161 +5,179 @@
 #include "go_asm.h"
 #include "textflag.h"
 
-TEXT ·Compare(SB),NOSPLIT|NOFRAME,$0-56
-	MOV	a_base+0(FP), X5
-	MOV	a_len+8(FP), X6
-	MOV	b_base+24(FP), X7
-	MOV	b_len+32(FP), X8
-	MOV	$ret+48(FP), X9
+TEXT ·Compare<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-56
+#ifndef GOEXPERIMENT_regabiargs
+	MOV	a_base+0(FP), X10
+	MOV	a_len+8(FP), X11
+	MOV	b_base+24(FP), X12
+	MOV	b_len+32(FP), X13
+	MOV	$ret+48(FP), X14
+#else
+	// X10 = a_base
+	// X11 = a_len
+	// X12 = a_cap (unused)
+	// X13 = b_base (want in X12)
+	// X14 = b_len (want in X13)
+	// X15 = b_cap (unused)
+	MOV	X13, X12
+	MOV	X14, X13
+#endif
 	JMP	compare<>(SB)
 
-TEXT runtime·cmpstring(SB),NOSPLIT|NOFRAME,$0-40
-	MOV	a_base+0(FP), X5
-	MOV	a_len+8(FP), X6
-	MOV	b_base+16(FP), X7
-	MOV	b_len+24(FP), X8
-	MOV	$ret+32(FP), X9
+TEXT runtime·cmpstring<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-40
+#ifndef GOEXPERIMENT_regabiargs
+	MOV	a_base+0(FP), X10
+	MOV	a_len+8(FP), X11
+	MOV	b_base+16(FP), X12
+	MOV	b_len+24(FP), X13
+	MOV	$ret+32(FP), X14
+#endif
+	// X10 = a_base
+	// X11 = a_len
+	// X12 = b_base
+	// X13 = b_len
 	JMP	compare<>(SB)
 
 // On entry:
-// X5 points to start of a
-// X6 length of a
-// X7 points to start of b
-// X8 length of b
-// X9 points to the address to store the return value (-1/0/1)
+// X10 points to start of a
+// X11 length of a
+// X12 points to start of b
+// X13 length of b
+// for non-regabi X14 points to the address to store the return value (-1/0/1)
+// for regabi the return value in X10
 TEXT compare<>(SB),NOSPLIT|NOFRAME,$0
-	BEQ	X5, X7, cmp_len
+	BEQ	X10, X12, cmp_len
 
-	MOV	X6, X10
-	BGE	X8, X10, use_a_len // X10 = min(len(a), len(b))
-	MOV	X8, X10
+	MOV	X11, X5
+	BGE	X13, X5, use_a_len // X5 = min(len(a), len(b))
+	MOV	X13, X5
 use_a_len:
-	BEQZ	X10, cmp_len
+	BEQZ	X5, cmp_len
 
-	MOV	$32, X11
-	BLT	X10, X11, loop4_check
+	MOV	$32, X6
+	BLT	X5, X6, loop4_check
 
 	// Check alignment - if alignment differs we have to do one byte at a time.
-	AND	$3, X5, X12
-	AND	$3, X7, X13
-	BNE	X12, X13, loop4_check
-	BEQZ	X12, loop32_check
+	AND	$3, X10, X7
+	AND	$3, X12, X8
+	BNE	X7, X8, loop4_check
+	BEQZ	X7, loop32_check
 
 	// Check one byte at a time until we reach 8 byte alignment.
-	SUB	X12, X10, X10
+	SUB	X7, X5, X5
 align:
-	ADD	$-1, X12
-	MOVBU	0(X5), X13
-	MOVBU	0(X7), X14
-	BNE	X13, X14, cmp
-	ADD	$1, X5
-	ADD	$1, X7
-	BNEZ	X12, align
+	ADD	$-1, X7
+	MOVBU	0(X10), X8
+	MOVBU	0(X12), X9
+	BNE	X8, X9, cmp
+	ADD	$1, X10
+	ADD	$1, X12
+	BNEZ	X7, align
 
 loop32_check:
-	MOV	$32, X12
-	BLT	X10, X12, loop16_check
+	MOV	$32, X7
+	BLT	X5, X7, loop16_check
 loop32:
-	MOV	0(X5), X15
-	MOV	0(X7), X16
-	MOV	8(X5), X17
-	MOV	8(X7), X18
+	MOV	0(X10), X15
+	MOV	0(X12), X16
+	MOV	8(X10), X17
+	MOV	8(X12), X18
 	BEQ	X15, X16, loop32a
 	JMP	cmp8a
 loop32a:
 	BEQ	X17, X18, loop32b
 	JMP	cmp8b
 loop32b:
-	MOV	16(X5), X15
-	MOV	16(X7), X16
-	MOV	24(X5), X17
-	MOV	24(X7), X18
+	MOV	16(X10), X15
+	MOV	16(X12), X16
+	MOV	24(X10), X17
+	MOV	24(X12), X18
 	BEQ	X15, X16, loop32c
 	JMP	cmp8a
 loop32c:
 	BEQ	X17, X18, loop32d
 	JMP	cmp8b
 loop32d:
-	ADD	$32, X5
-	ADD	$32, X7
-	ADD	$-32, X10
-	BGE	X10, X12, loop32
-	BEQZ	X10, cmp_len
+	ADD	$32, X10
+	ADD	$32, X12
+	ADD	$-32, X5
+	BGE	X5, X7, loop32
+	BEQZ	X5, cmp_len
 
 loop16_check:
-	MOV	$16, X11
-	BLT	X10, X11, loop4_check
+	MOV	$16, X6
+	BLT	X5, X6, loop4_check
 loop16:
-	MOV	0(X5), X15
-	MOV	0(X7), X16
-	MOV	8(X5), X17
-	MOV	8(X7), X18
+	MOV	0(X10), X15
+	MOV	0(X12), X16
+	MOV	8(X10), X17
+	MOV	8(X12), X18
 	BEQ	X15, X16, loop16a
 	JMP	cmp8a
 loop16a:
 	BEQ	X17, X18, loop16b
 	JMP	cmp8b
 loop16b:
-	ADD	$16, X5
-	ADD	$16, X7
-	ADD	$-16, X10
-	BGE	X10, X11, loop16
-	BEQZ	X10, cmp_len
+	ADD	$16, X10
+	ADD	$16, X12
+	ADD	$-16, X5
+	BGE	X5, X6, loop16
+	BEQZ	X5, cmp_len
 
 loop4_check:
-	MOV	$4, X11
-	BLT	X10, X11, loop1
+	MOV	$4, X6
+	BLT	X5, X6, loop1
 loop4:
-	MOVBU	0(X5), X13
-	MOVBU	0(X7), X14
-	MOVBU	1(X5), X15
-	MOVBU	1(X7), X16
-	BEQ	X13, X14, loop4a
-	SLTU	X14, X13, X10
-	SLTU	X13, X14, X11
+	MOVBU	0(X10), X8
+	MOVBU	0(X12), X9
+	MOVBU	1(X10), X15
+	MOVBU	1(X12), X16
+	BEQ	X8, X9, loop4a
+	SLTU	X9, X8, X5
+	SLTU	X8, X9, X6
 	JMP	cmp_ret
 loop4a:
 	BEQ	X15, X16, loop4b
-	SLTU	X16, X15, X10
-	SLTU	X15, X16, X11
+	SLTU	X16, X15, X5
+	SLTU	X15, X16, X6
 	JMP	cmp_ret
 loop4b:
-	MOVBU	2(X5), X21
-	MOVBU	2(X7), X22
-	MOVBU	3(X5), X23
-	MOVBU	3(X7), X24
+	MOVBU	2(X10), X21
+	MOVBU	2(X12), X22
+	MOVBU	3(X10), X23
+	MOVBU	3(X12), X24
 	BEQ	X21, X22, loop4c
-	SLTU	X22, X21, X10
-	SLTU	X21, X22, X11
+	SLTU	X22, X21, X5
+	SLTU	X21, X22, X6
 	JMP	cmp_ret
 loop4c:
 	BEQ	X23, X24, loop4d
-	SLTU	X24, X23, X10
-	SLTU	X23, X24, X11
+	SLTU	X24, X23, X5
+	SLTU	X23, X24, X6
 	JMP	cmp_ret
 loop4d:
-	ADD	$4, X5
-	ADD	$4, X7
-	ADD	$-4, X10
-	BGE	X10, X11, loop4
+	ADD	$4, X10
+	ADD	$4, X12
+	ADD	$-4, X5
+	BGE	X5, X6, loop4
 
 loop1:
-	BEQZ	X10, cmp_len
-	MOVBU	0(X5), X13
-	MOVBU	0(X7), X14
-	BNE	X13, X14, cmp
-	ADD	$1, X5
-	ADD	$1, X7
-	ADD	$-1, X10
+	BEQZ	X5, cmp_len
+	MOVBU	0(X10), X8
+	MOVBU	0(X12), X9
+	BNE	X8, X9, cmp
+	ADD	$1, X10
+	ADD	$1, X12
+	ADD	$-1, X5
 	JMP	loop1
 
 	// Compare 8 bytes of memory in X15/X16 that are known to differ.
 cmp8a:
 	MOV	$0xff, X19
 cmp8a_loop:
-	AND	X15, X19, X13
-	AND	X16, X19, X14
-	BNE	X13, X14, cmp
+	AND	X15, X19, X8
+	AND	X16, X19, X9
+	BNE	X8, X9, cmp
 	SLLI	$8, X19
 	JMP	cmp8a_loop
 
@@ -167,19 +185,21 @@
 cmp8b:
 	MOV	$0xff, X19
 cmp8b_loop:
-	AND	X17, X19, X13
-	AND	X18, X19, X14
-	BNE	X13, X14, cmp
+	AND	X17, X19, X8
+	AND	X18, X19, X9
+	BNE	X8, X9, cmp
 	SLLI	$8, X19
 	JMP	cmp8b_loop
 
 cmp_len:
-	MOV	X6, X13
-	MOV	X8, X14
+	MOV	X11, X8
+	MOV	X13, X9
 cmp:
-	SLTU	X14, X13, X10
-	SLTU	X13, X14, X11
+	SLTU	X9, X8, X5
+	SLTU	X8, X9, X6
 cmp_ret:
-	SUB	X10, X11, X12
-	MOV	X12, (X9)
+	SUB	X5, X6, X10
+#ifndef GOEXPERIMENT_regabiargs
+	MOV	X10, (X14)
+#endif
 	RET
diff --git a/src/internal/bytealg/count_riscv64.s b/src/internal/bytealg/count_riscv64.s
index 3f4eb23..1e081e2 100644
--- a/src/internal/bytealg/count_riscv64.s
+++ b/src/internal/bytealg/count_riscv64.s
@@ -5,40 +5,61 @@
 #include "go_asm.h"
 #include "textflag.h"
 
-TEXT ·Count(SB),NOSPLIT,$0-40
-	MOV	b_base+0(FP), A1
-	MOV	b_len+8(FP), A2
-	MOVBU	c+24(FP), A3	// byte to count
-	MOV	ZERO, A4	// count
-	ADD	A1, A2		// end
+TEXT ·Count<ABIInternal>(SB),NOSPLIT,$0-40
+#ifndef GOEXPERIMENT_regabiargs
+	MOV	b_base+0(FP), X10
+	MOV	b_len+8(FP), X11
+	MOVBU	c+24(FP), X12	// byte to count
+#else
+	// X10 = b_base
+	// X11 = b_len
+	// X12 = b_cap (unused)
+	// X13 = byte to count (want in X12)
+	MOV	X13, X12
+#endif
+	MOV	ZERO, X14	// count
+	ADD	X10, X11	// end
 
 loop:
-	BEQ	A1, A2, done
-	MOVBU	(A1), A5
-	ADD	$1, A1
-	BNE	A3, A5, loop
-	ADD	$1, A4
+	BEQ	X10, X11, done
+	MOVBU	(X10), X15
+	ADD	$1, X10
+	BNE	X12, X15, loop
+	ADD	$1, X14
 	JMP	loop
 
 done:
-	MOV	A4, ret+32(FP)
+#ifndef GOEXPERIMENT_regabiargs
+	MOV	X14, ret+32(FP)
+#else
+	MOV	X14, X10
+#endif
 	RET
 
-TEXT ·CountString(SB),NOSPLIT,$0-32
-	MOV	s_base+0(FP), A1
-	MOV	s_len+8(FP), A2
-	MOVBU	c+16(FP), A3	// byte to count
-	MOV	ZERO, A4	// count
-	ADD	A1, A2		// end
+TEXT ·CountString<ABIInternal>(SB),NOSPLIT,$0-32
+#ifndef GOEXPERIMENT_regabiargs
+	MOV	s_base+0(FP), X10
+	MOV	s_len+8(FP), X11
+	MOVBU	c+16(FP), X12	// byte to count
+#endif
+	// X10 = s_base
+	// X11 = s_len
+	// X12 = byte to count
+	MOV	ZERO, X14	// count
+	ADD	X10, X11	// end
 
 loop:
-	BEQ	A1, A2, done
-	MOVBU	(A1), A5
-	ADD	$1, A1
-	BNE	A3, A5, loop
-	ADD	$1, A4
+	BEQ	X10, X11, done
+	MOVBU	(X10), X15
+	ADD	$1, X10
+	BNE	X12, X15, loop
+	ADD	$1, X14
 	JMP	loop
 
 done:
-	MOV	A4, ret+24(FP)
+#ifndef GOEXPERIMENT_regabiargs
+	MOV	X14, ret+24(FP)
+#else
+	MOV	X14, X10
+#endif
 	RET
diff --git a/src/internal/bytealg/equal_riscv64.s b/src/internal/bytealg/equal_riscv64.s
index 5dd13be..77202d6 100644
--- a/src/internal/bytealg/equal_riscv64.s
+++ b/src/internal/bytealg/equal_riscv64.s
@@ -8,120 +8,137 @@
 #define	CTXT	S10
 
 // func memequal(a, b unsafe.Pointer, size uintptr) bool
-TEXT runtime·memequal(SB),NOSPLIT|NOFRAME,$0-25
-	MOV	a+0(FP), X5
-	MOV	b+8(FP), X6
-	MOV	size+16(FP), X7
-	MOV	$ret+24(FP), X19
+TEXT runtime·memequal<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-25
+#ifndef GOEXPERIMENT_regabiargs
+	MOV	a+0(FP), X10
+	MOV	b+8(FP), X11
+	MOV	size+16(FP), X12
+	MOV	$ret+24(FP), X13
+#endif
+	// X10 = a_base
+	// X11 = b_base
+	// X12 = size
 	JMP	memequal<>(SB)
 
 // func memequal_varlen(a, b unsafe.Pointer) bool
-TEXT runtime·memequal_varlen(SB),NOSPLIT|NOFRAME,$0-17
-	MOV	a+0(FP), X5
-	MOV	b+8(FP), X6
-	MOV	8(CTXT), X7    // compiler stores size at offset 8 in the closure
-	MOV	$ret+16(FP), X19
+TEXT runtime·memequal_varlen<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-17
+	MOV	8(CTXT), X12    // compiler stores size at offset 8 in the closure
+#ifndef GOEXPERIMENT_regabiargs
+	MOV	a+0(FP), X10
+	MOV	b+8(FP), X11
+	MOV	$ret+16(FP), X13
+#endif
+	// X10 = a_base
+	// X11 = b_base
 	JMP	memequal<>(SB)
 
-// On entry X5 and X6 contain pointers, X7 contains length.
-// X19 contains address for return value.
+// On entry X10 and X11 contain pointers, X12 contains length.
+// For non-regabi X13 contains address for return value.
+// For regabi return value in X10.
 TEXT memequal<>(SB),NOSPLIT|NOFRAME,$0
-	BEQ	X5, X6, eq
+	BEQ	X10, X11, eq
 
-	MOV	$32, X8
-	BLT	X7, X8, loop4_check
+	MOV	$32, X23
+	BLT	X12, X23, loop4_check
 
 	// Check alignment - if alignment differs we have to do one byte at a time.
-	AND	$3, X5, X9
-	AND	$3, X6, X10
-	BNE	X9, X10, loop4_check
+	AND	$3, X10, X9
+	AND	$3, X11, X19
+	BNE	X9, X19, loop4_check
 	BEQZ	X9, loop32_check
 
 	// Check one byte at a time until we reach 8 byte alignment.
-	SUB	X9, X7, X7
+	SUB	X9, X12, X12
 align:
 	ADD	$-1, X9
-	MOVBU	0(X5), X10
-	MOVBU	0(X6), X11
-	BNE	X10, X11, not_eq
-	ADD	$1, X5
-	ADD	$1, X6
+	MOVBU	0(X10), X19
+	MOVBU	0(X11), X20
+	BNE	X19, X20, not_eq
+	ADD	$1, X10
+	ADD	$1, X11
 	BNEZ	X9, align
 
 loop32_check:
 	MOV	$32, X9
-	BLT	X7, X9, loop16_check
+	BLT	X12, X9, loop16_check
 loop32:
-	MOV	0(X5), X10
-	MOV	0(X6), X11
-	MOV	8(X5), X12
-	MOV	8(X6), X13
-	BNE	X10, X11, not_eq
-	BNE	X12, X13, not_eq
-	MOV	16(X5), X14
-	MOV	16(X6), X15
-	MOV	24(X5), X16
-	MOV	24(X6), X17
+	MOV	0(X10), X19
+	MOV	0(X11), X20
+	MOV	8(X10), X21
+	MOV	8(X11), X22
+	BNE	X19, X20, not_eq
+	BNE	X21, X22, not_eq
+	MOV	16(X10), X14
+	MOV	16(X11), X15
+	MOV	24(X10), X16
+	MOV	24(X11), X17
 	BNE	X14, X15, not_eq
 	BNE	X16, X17, not_eq
-	ADD	$32, X5
-	ADD	$32, X6
-	ADD	$-32, X7
-	BGE	X7, X9, loop32
-	BEQZ	X7, eq
+	ADD	$32, X10
+	ADD	$32, X11
+	ADD	$-32, X12
+	BGE	X12, X9, loop32
+	BEQZ	X12, eq
 
 loop16_check:
-	MOV	$16, X8
-	BLT	X7, X8, loop4_check
+	MOV	$16, X23
+	BLT	X12, X23, loop4_check
 loop16:
-	MOV	0(X5), X10
-	MOV	0(X6), X11
-	MOV	8(X5), X12
-	MOV	8(X6), X13
-	BNE	X10, X11, not_eq
-	BNE	X12, X13, not_eq
-	ADD	$16, X5
-	ADD	$16, X6
-	ADD	$-16, X7
-	BGE	X7, X8, loop16
-	BEQZ	X7, eq
+	MOV	0(X10), X19
+	MOV	0(X11), X20
+	MOV	8(X10), X21
+	MOV	8(X11), X22
+	BNE	X19, X20, not_eq
+	BNE	X21, X22, not_eq
+	ADD	$16, X10
+	ADD	$16, X11
+	ADD	$-16, X12
+	BGE	X12, X23, loop16
+	BEQZ	X12, eq
 
 loop4_check:
-	MOV	$4, X8
-	BLT	X7, X8, loop1
+	MOV	$4, X23
+	BLT	X12, X23, loop1
 loop4:
-	MOVBU	0(X5), X10
-	MOVBU	0(X6), X11
-	MOVBU	1(X5), X12
-	MOVBU	1(X6), X13
-	BNE	X10, X11, not_eq
-	BNE	X12, X13, not_eq
-	MOVBU	2(X5), X14
-	MOVBU	2(X6), X15
-	MOVBU	3(X5), X16
-	MOVBU	3(X6), X17
+	MOVBU	0(X10), X19
+	MOVBU	0(X11), X20
+	MOVBU	1(X10), X21
+	MOVBU	1(X11), X22
+	BNE	X19, X20, not_eq
+	BNE	X21, X22, not_eq
+	MOVBU	2(X10), X14
+	MOVBU	2(X11), X15
+	MOVBU	3(X10), X16
+	MOVBU	3(X11), X17
 	BNE	X14, X15, not_eq
 	BNE	X16, X17, not_eq
-	ADD	$4, X5
-	ADD	$4, X6
-	ADD	$-4, X7
-	BGE	X7, X8, loop4
+	ADD	$4, X10
+	ADD	$4, X11
+	ADD	$-4, X12
+	BGE	X12, X23, loop4
 
 loop1:
-	BEQZ	X7, eq
-	MOVBU	0(X5), X10
-	MOVBU	0(X6), X11
-	BNE	X10, X11, not_eq
-	ADD	$1, X5
-	ADD	$1, X6
-	ADD	$-1, X7
+	BEQZ	X12, eq
+	MOVBU	0(X10), X19
+	MOVBU	0(X11), X20
+	BNE	X19, X20, not_eq
+	ADD	$1, X10
+	ADD	$1, X11
+	ADD	$-1, X12
 	JMP	loop1
 
 not_eq:
-	MOV	$0, X5
-	MOVB	X5, (X19)
+#ifndef GOEXPERIMENT_regabiargs
+	MOVB	ZERO, (X13)
+#else
+	MOVB	ZERO, X10
+#endif
 	RET
 eq:
-	MOV	$1, X5
-	MOVB	X5, (X19)
+#ifndef GOEXPERIMENT_regabiargs
+	MOV	$1, X10
+	MOVB	X10, (X13)
+#else
+	MOV	$1, X10
+#endif
 	RET
diff --git a/src/internal/bytealg/indexbyte_riscv64.s b/src/internal/bytealg/indexbyte_riscv64.s
index 156c303..e9d3e6b 100644
--- a/src/internal/bytealg/indexbyte_riscv64.s
+++ b/src/internal/bytealg/indexbyte_riscv64.s
@@ -5,48 +5,67 @@
 #include "go_asm.h"
 #include "textflag.h"
 
-TEXT ·IndexByte(SB),NOSPLIT,$0-40
-	MOV	b_base+0(FP), A1
-	MOV	b_len+8(FP), A2
-	MOVBU	c+24(FP), A3	// byte to find
-	MOV	A1, A4		// store base for later
-	ADD	A1, A2		// end
-	ADD	$-1, A1
+TEXT ·IndexByte<ABIInternal>(SB),NOSPLIT,$0-40
+#ifndef GOEXPERIMENT_regabiargs
+	MOV	b_base+0(FP), X10
+	MOV	b_len+8(FP), X11
+	MOVBU	c+24(FP), X13	// byte to find
+#endif
+	// X10 = b_base
+	// X11 = b_len
+	// X12 = b_cap (unused)
+	// X13 = byte to find
+	MOV	X10, X12		// store base for later
+	ADD	X10, X11		// end
+	ADD	$-1, X10
 
 loop:
-	ADD	$1, A1
-	BEQ	A1, A2, notfound
-	MOVBU	(A1), A5
-	BNE	A3, A5, loop
+	ADD	$1, X10
+	BEQ	X10, X11, notfound
+	MOVBU	(X10), X14
+	BNE	X13, X14, loop
 
-	SUB	A4, A1		// remove base
-	MOV	A1, ret+32(FP)
+	SUB	X12, X10		// remove base
+#ifndef GOEXPERIMENT_regabiargs
+	MOV	X10, ret+32(FP)
+#endif
 	RET
 
 notfound:
-	MOV	$-1, A1
-	MOV	A1, ret+32(FP)
+	MOV	$-1, X10
+#ifndef GOEXPERIMENT_regabiargs
+	MOV	X10, ret+32(FP)
+#endif
 	RET
 
-TEXT ·IndexByteString(SB),NOSPLIT,$0-32
-	MOV	s_base+0(FP), A1
-	MOV	s_len+8(FP), A2
-	MOVBU	c+16(FP), A3	// byte to find
-	MOV	A1, A4		// store base for later
-	ADD	A1, A2		// end
-	ADD	$-1, A1
+TEXT ·IndexByteString<ABIInternal>(SB),NOSPLIT,$0-32
+#ifndef GOEXPERIMENT_regabiargs
+	MOV	s_base+0(FP), X10
+	MOV	s_len+8(FP), X11
+	MOVBU	c+16(FP), X12	// byte to find
+#endif
+	// X10 = b_base
+	// X11 = b_len
+	// X12 = byte to find
+	MOV	X10, X13		// store base for later
+	ADD	X10, X11		// end
+	ADD	$-1, X10
 
 loop:
-	ADD	$1, A1
-	BEQ	A1, A2, notfound
-	MOVBU	(A1), A5
-	BNE	A3, A5, loop
+	ADD	$1, X10
+	BEQ	X10, X11, notfound
+	MOVBU	(X10), X14
+	BNE	X12, X14, loop
 
-	SUB	A4, A1		// remove base
-	MOV	A1, ret+24(FP)
+	SUB	X13, X10		// remove base
+#ifndef GOEXPERIMENT_regabiargs
+	MOV	X10, ret+24(FP)
+#endif
 	RET
 
 notfound:
-	MOV	$-1, A1
-	MOV	A1, ret+24(FP)
+	MOV	$-1, X10
+#ifndef GOEXPERIMENT_regabiargs
+	MOV	X10, ret+24(FP)
+#endif
 	RET