internal/bytealg: port bytes.Index and bytes.Count to reg ABI on ppc64x
This change adds support for the reg ABI to the Index and Count
functions for ppc64/ppc64le.
Most Index and Count benchmarks show improvement in performance on
POWER9 with this change. Similar numbers observed on POWER8 and POWER10.
name old time/op new time/op delta
Index/32 71.0ns ± 0% 67.9ns ± 0% -4.42% (p=0.001 n=7+6)
IndexEasy/10 17.5ns ± 0% 17.2ns ± 0% -1.30% (p=0.001 n=7+7)
name old time/op new time/op delta
Count/10 26.6ns ± 0% 25.0ns ± 1% -6.02% (p=0.001 n=7+7)
Count/32 78.6ns ± 0% 74.7ns ± 0% -4.97% (p=0.001 n=7+7)
Count/4K 5.03µs ± 0% 5.03µs ± 0% -0.07% (p=0.000 n=6+7)
CountEasy/10 26.9ns ± 0% 25.2ns ± 1% -6.31% (p=0.001 n=7+7)
CountSingle/32 11.8ns ± 0% 9.9ns ± 0% -15.70% (p=0.002 n=6+6)
Change-Id: Ibd146c04f8107291c55f9e6100b8264dfccc41ae
Reviewed-on: https://go-review.googlesource.com/c/go/+/355509
Reviewed-by: Cherry Mui <cherryyz@google.com>
Reviewed-by: Lynn Boger <laboger@linux.vnet.ibm.com>
Run-TryBot: Cherry Mui <cherryyz@google.com>
TryBot-Result: Go Bot <gobot@golang.org>
diff --git a/src/internal/bytealg/count_ppc64x.s b/src/internal/bytealg/count_ppc64x.s
index 94163cb..dbafd06 100644
--- a/src/internal/bytealg/count_ppc64x.s
+++ b/src/internal/bytealg/count_ppc64x.s
@@ -8,24 +8,37 @@
#include "go_asm.h"
#include "textflag.h"
-TEXT ·Count(SB), NOSPLIT|NOFRAME, $0-40
+TEXT ·Count<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-40
+#ifdef GOEXPERIMENT_regabiargs
+// R3 = byte array pointer
+// R4 = length
+ MOVBZ R6,R5 // R5 = byte
+#else
+
MOVD b_base+0(FP), R3 // R3 = byte array pointer
MOVD b_len+8(FP), R4 // R4 = length
MOVBZ c+24(FP), R5 // R5 = byte
MOVD $ret+32(FP), R14 // R14 = &ret
+#endif
BR countbytebody<>(SB)
-TEXT ·CountString(SB), NOSPLIT|NOFRAME, $0-32
+TEXT ·CountString<ABIInternal>(SB), NOSPLIT|NOFRAME, $0-32
+#ifdef GOEXPERIMENT_regabiargs
+// R3 = byte array pointer
+// R4 = length
+ MOVBZ R5,R5 // R5 = byte
+#else
MOVD s_base+0(FP), R3 // R3 = string
MOVD s_len+8(FP), R4 // R4 = length
MOVBZ c+16(FP), R5 // R5 = byte
MOVD $ret+24(FP), R14 // R14 = &ret
+#endif
BR countbytebody<>(SB)
// R3: addr of string
// R4: len of string
// R5: byte to count
-// R14: addr for return value
+// R14: addr for return value when not regabi
// endianness shouldn't matter since we are just counting and order
// is irrelevant
TEXT countbytebody<>(SB), NOSPLIT|NOFRAME, $0-0
@@ -94,5 +107,10 @@
BR small
done:
+#ifdef GOEXPERIMENT_regabiargs
+ MOVD R18, R3 // return count
+#else
MOVD R18, (R14) // return count
+#endif
+
RET
diff --git a/src/internal/bytealg/index_ppc64x.s b/src/internal/bytealg/index_ppc64x.s
index 3ed9442..f587a8a 100644
--- a/src/internal/bytealg/index_ppc64x.s
+++ b/src/internal/bytealg/index_ppc64x.s
@@ -46,12 +46,20 @@
GLOBL byteswap<>+0(SB), RODATA, $16
-TEXT ·Index(SB), NOSPLIT|NOFRAME, $0-56
+TEXT ·Index<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-56
+#ifdef GOEXPERIMENT_regabiargs
+// R3 = byte array pointer
+// R4 = length
+ MOVD R6,R5 // R5 = separator pointer
+ MOVD R7,R6 // R6 = separator length
+#else
MOVD a_base+0(FP), R3 // R3 = byte array pointer
MOVD a_len+8(FP), R4 // R4 = length
MOVD b_base+24(FP), R5 // R5 = separator pointer
MOVD b_len+32(FP), R6 // R6 = separator length
MOVD $ret+48(FP), R14 // R14 = &ret
+#endif
+
#ifdef GOARCH_ppc64le
MOVBZ internal∕cpu·PPC64+const_offsetPPC64HasPOWER9(SB), R7
@@ -63,12 +71,15 @@
power8:
BR indexbody<>(SB)
-TEXT ·IndexString(SB), NOSPLIT|NOFRAME, $0-40
+TEXT ·IndexString<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-40
+#ifndef GOEXPERIMENT_regabiargs
MOVD a_base+0(FP), R3 // R3 = string
MOVD a_len+8(FP), R4 // R4 = length
MOVD b_base+16(FP), R5 // R5 = separator pointer
MOVD b_len+24(FP), R6 // R6 = separator length
MOVD $ret+32(FP), R14 // R14 = &ret
+#endif
+
#ifdef GOARCH_ppc64le
MOVBZ internal∕cpu·PPC64+const_offsetPPC64HasPOWER9(SB), R7
@@ -420,8 +431,12 @@
BR index17to32loop // Continue
notfound:
+#ifdef GOEXPERIMENT_regabiargs
+ MOVD $-1, R3 // Return -1 if not found
+#else
MOVD $-1, R8 // Return -1 if not found
MOVD R8, (R14)
+#endif
RET
index33plus:
@@ -432,12 +447,20 @@
SRD $3, R25 // Convert from bits to bytes
ADD R25, R7 // Add to current string address
SUB R3, R7 // Subtract from start of string
+#ifdef GOEXPERIMENT_regabiargs
+ MOVD R7, R3 // Return byte where found
+#else
MOVD R7, (R14) // Return byte where found
+#endif
RET
found:
SUB R3, R7 // Return byte where found
+#ifdef GOEXPERIMENT_regabiargs
+ MOVD R7, R3
+#else
MOVD R7, (R14)
+#endif
RET
TEXT indexbodyp9<>(SB), NOSPLIT|NOFRAME, $0
@@ -746,8 +769,12 @@
BR index17to32loop // Continue
notfound:
+#ifdef GOEXPERIMENT_regabiargs
+ MOVD $-1, R3 // Return -1 if not found
+#else
MOVD $-1, R8 // Return -1 if not found
MOVD R8, (R14)
+#endif
RET
index33plus:
@@ -758,11 +785,19 @@
SRD $3, R25 // Convert from bits to bytes
ADD R25, R7 // Add to current string address
SUB R3, R7 // Subtract from start of string
+#ifdef GOEXPERIMENT_regabiargs
+ MOVD R7, R3 // Return byte where found
+#else
MOVD R7, (R14) // Return byte where found
+#endif
RET
found:
SUB R3, R7 // Return byte where found
+#ifdef GOEXPERIMENT_regabiargs
+ MOVD R7, R3
+#else
MOVD R7, (R14)
+#endif
RET