blob: 3618501063b3123cd257f3b73e2b1b4c14c6900b [file] [log] [blame]
// Copyright 2022 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
#include "go_asm.h"
#include "textflag.h"
// input:
// R4 = b_base
// R5 = b_len
// R6 = b_cap (unused)
// R7 = byte to find
TEXT ·IndexByte<ABIInternal>(SB),NOSPLIT,$0-40
AND $0xff, R7
JMP indexbytebody<>(SB)
// input:
// R4 = s_base
// R5 = s_len
// R6 = byte to find
TEXT ·IndexByteString<ABIInternal>(SB),NOSPLIT,$0-32
AND $0xff, R6, R7 // byte to find
JMP indexbytebody<>(SB)
// input:
// R4: b_base
// R5: len
// R7: byte to find
TEXT indexbytebody<>(SB),NOSPLIT,$0
BEQ R5, notfound // len == 0
MOVV R4, R6 // store base for later
ADDV R4, R5, R8 // end
MOVV $32, R9
BGE R5, R9, lasx
tail:
MOVV $8, R9
BLT R5, R9, lt_8
generic8_loop:
MOVV (R4), R10
AND $0xff, R10, R11
BEQ R7, R11, found
BSTRPICKV $15, R10, $8, R11
BEQ R7, R11, byte_1th
BSTRPICKV $23, R10, $16, R11
BEQ R7, R11, byte_2th
BSTRPICKV $31, R10, $24, R11
BEQ R7, R11, byte_3th
BSTRPICKV $39, R10, $32, R11
BEQ R7, R11, byte_4th
BSTRPICKV $47, R10, $40, R11
BEQ R7, R11, byte_5th
BSTRPICKV $55, R10, $48, R11
BEQ R7, R11, byte_6th
BSTRPICKV $63, R10, $56, R11
BEQ R7, R11, byte_7th
ADDV $8, R4
ADDV $-8, R5
BGE R5, R9, generic8_loop
lt_8:
BEQ R4, R8, notfound
MOVBU (R4), R10
BEQ R7, R10, found
ADDV $1, R4
JMP lt_8
byte_1th:
ADDV $1, R4
SUBV R6, R4
RET
byte_2th:
ADDV $2, R4
SUBV R6, R4
RET
byte_3th:
ADDV $3, R4
SUBV R6, R4
RET
byte_4th:
ADDV $4, R4
SUBV R6, R4
RET
byte_5th:
ADDV $5, R4
SUBV R6, R4
RET
byte_6th:
ADDV $6, R4
SUBV R6, R4
RET
byte_7th:
ADDV $7, R4
found:
SUBV R6, R4
RET
notfound:
MOVV $-1, R4
RET
lasx:
MOVBU internal∕cpu·Loong64+const_offsetLOONG64HasLASX(SB), R9
BEQ R9, lsx
XVMOVQ R7, X0.B32
MOVV $128, R9
BLT R5, R9, lasx32_loop
lasx128_loop:
XVMOVQ 0(R4), X1
XVMOVQ 32(R4), X2
XVMOVQ 64(R4), X3
XVMOVQ 96(R4), X4
XVSEQB X1, X0, X1
XVSETNEV X1, FCC0
BFPT lasx_found_add_0
XVSEQB X2, X0, X1
XVSETNEV X1, FCC0
BFPT lasx_found_add_32
XVSEQB X3, X0, X1
XVSETNEV X1, FCC0
BFPT lasx_found_add_64
XVSEQB X4, X0, X1
XVSETNEV X1, FCC0
BFPT lasx_found_add_96
ADDV $128, R4
ADDV $-128, R5
BGE R5, R9, lasx128_loop
BEQ R5, notfound
MOVV $32, R9
BLT R5, R9, tail
lasx32_loop:
XVMOVQ 0(R4), X1
XVSEQB X1, X0, X1
XVSETNEV X1, FCC0
BFPT lasx_found_add_0
ADDV $32, R4
ADDV $-32, R5
BGE R5, R9, lasx32_loop
BEQ R5, notfound
JMP tail
lasx_found_add_0:
MOVV R0, R11
JMP lasx_index_cal
lasx_found_add_32:
MOVV $32, R11
JMP lasx_index_cal
lasx_found_add_64:
MOVV $64, R11
JMP lasx_index_cal
lasx_found_add_96:
MOVV $96, R11
JMP lasx_index_cal
lasx_index_cal:
MOVV $64, R9
XVMOVQ X1.V[0], R10
CTZV R10, R10
BNE R10, R9, index_cal
ADDV $8, R11
XVMOVQ X1.V[1], R10
CTZV R10, R10
BNE R10, R9, index_cal
ADDV $8, R11
XVMOVQ X1.V[2], R10
CTZV R10, R10
BNE R10, R9, index_cal
ADDV $8, R11
XVMOVQ X1.V[3], R10
CTZV R10, R10
JMP index_cal
lsx:
MOVBU internal∕cpu·Loong64+const_offsetLOONG64HasLSX(SB), R9
BEQ R9, tail
VMOVQ R7, V0.B16
MOVV $64, R9
BLT R5, R9, lsx16_loop
lsx64_loop:
VMOVQ 0(R4), V1
VMOVQ 16(R4), V2
VMOVQ 32(R4), V3
VMOVQ 48(R4), V4
VSEQB V1, V0, V1
VSETNEV V1, FCC0
BFPT lsx_found_add_0
VSEQB V2, V0, V1
VSETNEV V1, FCC0
BFPT lsx_found_add_16
VSEQB V3, V0, V1
VSETNEV V1, FCC0
BFPT lsx_found_add_32
VSEQB V4, V0, V1
VSETNEV V1, FCC0
BFPT lsx_found_add_48
ADDV $64, R4
ADDV $-64, R5
BGE R5, R9, lsx64_loop
BEQ R5, notfound
MOVV $16, R9
BLT R5, R9, tail
lsx16_loop:
VMOVQ 0(R4), V1
VSEQB V1, V0, V1
VSETNEV V1, FCC0
BFPT lsx_found_add_0
ADDV $16, R4
ADDV $-16, R5
BGE R5, R9, lsx16_loop
BEQ R5, notfound
JMP tail
lsx_found_add_0:
MOVV R0, R11
JMP lsx_index_cal
lsx_found_add_16:
MOVV $16, R11
JMP lsx_index_cal
lsx_found_add_32:
MOVV $32, R11
JMP lsx_index_cal
lsx_found_add_48:
MOVV $48, R11
JMP lsx_index_cal
lsx_index_cal:
MOVV $64, R9
VMOVQ V1.V[0], R10
CTZV R10, R10
BNE R10, R9, index_cal
ADDV $8, R11
VMOVQ V1.V[1], R10
CTZV R10, R10
JMP index_cal
index_cal:
SRLV $3, R10
ADDV R11, R10
ADDV R10, R4
JMP found