blob: 933053196449d541f57f4e0b3301281d86d5330e [file] [log] [blame]
// Copyright 2022 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
#include "go_asm.h"
#include "textflag.h"
TEXT ·Compare<ABIInternal>(SB),NOSPLIT,$0-56
// R4 = a_base
// R5 = a_len
// R6 = a_cap (unused)
// R7 = b_base (want in R6)
// R8 = b_len (want in R7)
// R9 = b_cap (unused)
MOVV R7, R6
MOVV R8, R7
JMP cmpbody<>(SB)
TEXT runtime·cmpstring<ABIInternal>(SB),NOSPLIT,$0-40
// R4 = a_base
// R5 = a_len
// R6 = b_base
// R7 = b_len
JMP cmpbody<>(SB)
// input:
// R4: points to the start of a
// R5: length of a
// R6: points to the start of b
// R7: length of b
// for regabi the return value (-1/0/1) in R4
TEXT cmpbody<>(SB),NOSPLIT|NOFRAME,$0
BEQ R4, R6, cmp_len // same start of a and b, then compare lengths
SGTU R5, R7, R9
BNE R9, b_lt_a
MOVV R5, R14
JMP entry
b_lt_a:
MOVV R7, R14
entry:
BEQ R14, cmp_len // minlength is 0
MOVV $32, R15
BGE R14, R15, lasx
tail:
MOVV $8, R15
BLT R14, R15, lt_8
generic8_loop:
MOVV (R4), R10
MOVV (R6), R11
BEQ R10, R11, generic8_equal
cmp8:
AND $0xff, R10, R16
AND $0xff, R11, R17
BNE R16, R17, cmp_byte
BSTRPICKV $15, R10, $8, R16
BSTRPICKV $15, R11, $8, R17
BNE R16, R17, cmp_byte
BSTRPICKV $23, R10, $16, R16
BSTRPICKV $23, R11, $16, R17
BNE R16, R17, cmp_byte
BSTRPICKV $31, R10, $24, R16
BSTRPICKV $31, R11, $24, R17
BNE R16, R17, cmp_byte
BSTRPICKV $39, R10, $32, R16
BSTRPICKV $39, R11, $32, R17
BNE R16, R17, cmp_byte
BSTRPICKV $47, R10, $40, R16
BSTRPICKV $47, R11, $40, R17
BNE R16, R17, cmp_byte
BSTRPICKV $55, R10, $48, R16
BSTRPICKV $55, R11, $48, R17
BNE R16, R17, cmp_byte
BSTRPICKV $63, R10, $56, R16
BSTRPICKV $63, R11, $56, R17
BNE R16, R17, cmp_byte
generic8_equal:
ADDV $-8, R14
BEQ R14, cmp_len
ADDV $8, R4
ADDV $8, R6
BGE R14, R15, generic8_loop
lt_8:
MOVV $4, R15
BLT R14, R15, lt_4
MOVWU (R4), R10
MOVWU (R6), R11
BEQ R10, R11, lt_8_equal
AND $0xff, R10, R16
AND $0xff, R11, R17
BNE R16, R17, cmp_byte
BSTRPICKV $15, R10, $8, R16
BSTRPICKV $15, R11, $8, R17
BNE R16, R17, cmp_byte
BSTRPICKV $23, R10, $16, R16
BSTRPICKV $23, R11, $16, R17
BNE R16, R17, cmp_byte
BSTRPICKV $31, R10, $24, R16
BSTRPICKV $31, R11, $24, R17
BNE R16, R17, cmp_byte
lt_8_equal:
ADDV $-4, R14
BEQ R14, cmp_len
ADDV $4, R4
ADDV $4, R6
lt_4:
MOVV $2, R15
BLT R14, R15, lt_2
MOVHU (R4), R10
MOVHU (R6), R11
BEQ R10, R11, lt_4_equal
AND $0xff, R10, R16
AND $0xff, R11, R17
BNE R16, R17, cmp_byte
BSTRPICKV $15, R10, $8, R16
BSTRPICKV $15, R11, $8, R17
BNE R16, R17, cmp_byte
lt_4_equal:
ADDV $-2, R14
BEQ R14, cmp_len
ADDV $2, R4
ADDV $2, R6
lt_2:
MOVBU (R4), R16
MOVBU (R6), R17
BNE R16, R17, cmp_byte
JMP cmp_len
// Compare 1 byte taken from R16/R17 that are known to differ.
cmp_byte:
SGTU R16, R17, R4 // R4 = 1 if (R16 > R17)
BNE R0, R4, ret
MOVV $-1, R4
RET
cmp_len:
SGTU R5, R7, R8
SGTU R7, R5, R9
SUBV R9, R8, R4
ret:
RET
lasx:
MOVV $64, R20
MOVBU internal∕cpu·Loong64+const_offsetLOONG64HasLASX(SB), R9
BEQ R9, lsx
MOVV $128, R15
BLT R14, R15, lasx32_loop
lasx128_loop:
XVMOVQ (R4), X0
XVMOVQ (R6), X1
XVSEQB X0, X1, X0
XVSETANYEQB X0, FCC0
BFPT lasx_found_0
XVMOVQ 32(R4), X0
XVMOVQ 32(R6), X1
XVSEQB X0, X1, X0
XVSETANYEQB X0, FCC0
BFPT lasx_found_32
XVMOVQ 64(R4), X0
XVMOVQ 64(R6), X1
XVSEQB X0, X1, X0
XVSETANYEQB X0, FCC0
BFPT lasx_found_64
XVMOVQ 96(R4), X0
XVMOVQ 96(R6), X1
XVSEQB X0, X1, X0
XVSETANYEQB X0, FCC0
BFPT lasx_found_96
ADDV $-128, R14
BEQ R14, cmp_len
ADDV $128, R4
ADDV $128, R6
BGE R14, R15, lasx128_loop
MOVV $32, R15
BLT R14, R15, tail
lasx32_loop:
XVMOVQ (R4), X0
XVMOVQ (R6), X1
XVSEQB X0, X1, X0
XVSETANYEQB X0, FCC0
BFPT lasx_found_0
ADDV $-32, R14
BEQ R14, cmp_len
ADDV $32, R4
ADDV $32, R6
BGE R14, R15, lasx32_loop
JMP tail
lasx_found_0:
MOVV R0, R11
JMP lasx_find_byte
lasx_found_32:
MOVV $32, R11
JMP lasx_find_byte
lasx_found_64:
MOVV $64, R11
JMP lasx_find_byte
lasx_found_96:
MOVV $96, R11
lasx_find_byte:
XVMOVQ X0.V[0], R10
CTOV R10, R10
BNE R10, R20, find_byte
ADDV $8, R11
XVMOVQ X0.V[1], R10
CTOV R10, R10
BNE R10, R20, find_byte
ADDV $8, R11
XVMOVQ X0.V[2], R10
CTOV R10, R10
BNE R10, R20, find_byte
ADDV $8, R11
XVMOVQ X0.V[3], R10
CTOV R10, R10
JMP find_byte
lsx:
MOVBU internal∕cpu·Loong64+const_offsetLOONG64HasLSX(SB), R9
BEQ R9, generic32_loop
MOVV $64, R15
BLT R14, R15, lsx16_loop
lsx64_loop:
VMOVQ (R4), V0
VMOVQ (R6), V1
VSEQB V0, V1, V0
VSETANYEQB V0, FCC0
BFPT lsx_found_0
VMOVQ 16(R4), V0
VMOVQ 16(R6), V1
VSEQB V0, V1, V0
VSETANYEQB V0, FCC0
BFPT lsx_found_16
VMOVQ 32(R4), V0
VMOVQ 32(R6), V1
VSEQB V0, V1, V0
VSETANYEQB V0, FCC0
BFPT lsx_found_32
VMOVQ 48(R4), V0
VMOVQ 48(R6), V1
VSEQB V0, V1, V0
VSETANYEQB V0, FCC0
BFPT lsx_found_48
ADDV $-64, R14
BEQ R14, cmp_len
ADDV $64, R4
ADDV $64, R6
BGE R14, R15, lsx64_loop
MOVV $16, R15
BLT R14, R15, tail
lsx16_loop:
VMOVQ (R4), V0
VMOVQ (R6), V1
VSEQB V0, V1, V0
VSETANYEQB V0, FCC0
BFPT lsx_found_0
ADDV $-16, R14
BEQ R14, cmp_len
ADDV $16, R4
ADDV $16, R6
BGE R14, R15, lsx16_loop
JMP tail
lsx_found_0:
MOVV R0, R11
JMP lsx_find_byte
lsx_found_16:
MOVV $16, R11
JMP lsx_find_byte
lsx_found_32:
MOVV $32, R11
JMP lsx_find_byte
lsx_found_48:
MOVV $48, R11
lsx_find_byte:
VMOVQ V0.V[0], R10
CTOV R10, R10
BNE R10, R20, find_byte
ADDV $8, R11
VMOVQ V0.V[1], R10
CTOV R10, R10
find_byte:
SRLV $3, R10
ADDV R10, R11
ADDV R11, R4
ADDV R11, R6
MOVB (R4), R16
MOVB (R6), R17
JMP cmp_byte
generic32_loop:
MOVV (R4), R10
MOVV (R6), R11
BNE R10, R11, cmp8
MOVV 8(R4), R10
MOVV 8(R6), R11
BNE R10, R11, cmp8
MOVV 16(R4), R10
MOVV 16(R6), R11
BNE R10, R11, cmp8
MOVV 24(R4), R10
MOVV 24(R6), R11
BNE R10, R11, cmp8
ADDV $-32, R14
BEQ R14, cmp_len
ADDV $32, R4
ADDV $32, R6
MOVV $32, R15
BGE R14, R15, generic32_loop
JMP tail