|  | // Copyright 2022 The Go Authors. All rights reserved. | 
|  | // Use of this source code is governed by a BSD-style | 
|  | // license that can be found in the LICENSE file. | 
|  |  | 
|  | #include "go_asm.h" | 
|  | #include "textflag.h" | 
|  |  | 
|  | TEXT ·Compare<ABIInternal>(SB),NOSPLIT,$0-56 | 
|  | // R4 = a_base | 
|  | // R5 = a_len | 
|  | // R6 = a_cap (unused) | 
|  | // R7 = b_base (want in R6) | 
|  | // R8 = b_len (want in R7) | 
|  | // R9 = b_cap (unused) | 
|  | MOVV	R7, R6 | 
|  | MOVV	R8, R7 | 
|  | JMP	cmpbody<>(SB) | 
|  |  | 
|  | TEXT runtime·cmpstring<ABIInternal>(SB),NOSPLIT,$0-40 | 
|  | // R4 = a_base | 
|  | // R5 = a_len | 
|  | // R6 = b_base | 
|  | // R7 = b_len | 
|  | JMP	cmpbody<>(SB) | 
|  |  | 
|  | // input: | 
|  | //    R4: points to the start of a | 
|  | //    R5: length of a | 
|  | //    R6: points to the start of b | 
|  | //    R7: length of b | 
|  | // for regabi the return value (-1/0/1) in R4 | 
|  | TEXT cmpbody<>(SB),NOSPLIT|NOFRAME,$0 | 
|  | BEQ	R4, R6, cmp_len	// same start of a and b, then compare lengths | 
|  |  | 
|  | SGTU	R5, R7, R9 | 
|  | BNE	R9, b_lt_a | 
|  | MOVV	R5, R14 | 
|  | JMP	entry | 
|  |  | 
|  | b_lt_a: | 
|  | MOVV	R7, R14 | 
|  |  | 
|  | entry: | 
|  | BEQ	R14, cmp_len	// minlength is 0 | 
|  |  | 
|  | MOVV	$32, R15 | 
|  | BGE	R14, R15, lasx | 
|  | tail: | 
|  | MOVV	$8, R15 | 
|  | BLT	R14, R15, lt_8 | 
|  | generic8_loop: | 
|  | MOVV	(R4), R10 | 
|  | MOVV	(R6), R11 | 
|  | BEQ	R10, R11, generic8_equal | 
|  |  | 
|  | cmp8: | 
|  | AND	$0xff, R10, R16 | 
|  | AND	$0xff, R11, R17 | 
|  | BNE	R16, R17, cmp_byte | 
|  |  | 
|  | BSTRPICKV	$15, R10, $8, R16 | 
|  | BSTRPICKV	$15, R11, $8, R17 | 
|  | BNE	R16, R17, cmp_byte | 
|  |  | 
|  | BSTRPICKV	$23, R10, $16, R16 | 
|  | BSTRPICKV	$23, R11, $16, R17 | 
|  | BNE	R16, R17, cmp_byte | 
|  |  | 
|  | BSTRPICKV	$31, R10, $24, R16 | 
|  | BSTRPICKV	$31, R11, $24, R17 | 
|  | BNE	R16, R17, cmp_byte | 
|  |  | 
|  | BSTRPICKV	$39, R10, $32, R16 | 
|  | BSTRPICKV	$39, R11, $32, R17 | 
|  | BNE	R16, R17, cmp_byte | 
|  |  | 
|  | BSTRPICKV	$47, R10, $40, R16 | 
|  | BSTRPICKV	$47, R11, $40, R17 | 
|  | BNE	R16, R17, cmp_byte | 
|  |  | 
|  | BSTRPICKV	$55, R10, $48, R16 | 
|  | BSTRPICKV	$55, R11, $48, R17 | 
|  | BNE	R16, R17, cmp_byte | 
|  |  | 
|  | BSTRPICKV	$63, R10, $56, R16 | 
|  | BSTRPICKV	$63, R11, $56, R17 | 
|  | BNE	R16, R17, cmp_byte | 
|  |  | 
|  | generic8_equal: | 
|  | ADDV	$-8, R14 | 
|  | BEQ	R14, cmp_len | 
|  | ADDV	$8, R4 | 
|  | ADDV	$8, R6 | 
|  | BGE	R14, R15, generic8_loop | 
|  |  | 
|  | lt_8: | 
|  | MOVV	$4, R15 | 
|  | BLT	R14, R15, lt_4 | 
|  |  | 
|  | MOVWU	(R4), R10 | 
|  | MOVWU	(R6), R11 | 
|  | BEQ	R10, R11, lt_8_equal | 
|  |  | 
|  | AND	$0xff, R10, R16 | 
|  | AND	$0xff, R11, R17 | 
|  | BNE	R16, R17, cmp_byte | 
|  |  | 
|  | BSTRPICKV	$15, R10, $8, R16 | 
|  | BSTRPICKV	$15, R11, $8, R17 | 
|  | BNE	R16, R17, cmp_byte | 
|  |  | 
|  | BSTRPICKV	$23, R10, $16, R16 | 
|  | BSTRPICKV	$23, R11, $16, R17 | 
|  | BNE	R16, R17, cmp_byte | 
|  |  | 
|  | BSTRPICKV	$31, R10, $24, R16 | 
|  | BSTRPICKV	$31, R11, $24, R17 | 
|  | BNE	R16, R17, cmp_byte | 
|  |  | 
|  | lt_8_equal: | 
|  | ADDV	$-4, R14 | 
|  | BEQ	R14, cmp_len | 
|  | ADDV	$4, R4 | 
|  | ADDV	$4, R6 | 
|  |  | 
|  | lt_4: | 
|  | MOVV	$2, R15 | 
|  | BLT	R14, R15, lt_2 | 
|  |  | 
|  | MOVHU	(R4), R10 | 
|  | MOVHU	(R6), R11 | 
|  | BEQ	R10, R11, lt_4_equal | 
|  |  | 
|  | AND	$0xff, R10, R16 | 
|  | AND	$0xff, R11, R17 | 
|  | BNE	R16, R17, cmp_byte | 
|  |  | 
|  | BSTRPICKV	$15, R10, $8, R16 | 
|  | BSTRPICKV	$15, R11, $8, R17 | 
|  | BNE	R16, R17, cmp_byte | 
|  |  | 
|  | lt_4_equal: | 
|  | ADDV	$-2, R14 | 
|  | BEQ	R14, cmp_len | 
|  | ADDV	$2, R4 | 
|  | ADDV	$2, R6 | 
|  |  | 
|  | lt_2: | 
|  | MOVBU	(R4), R16 | 
|  | MOVBU	(R6), R17 | 
|  | BNE	R16, R17, cmp_byte | 
|  | JMP	cmp_len | 
|  |  | 
|  | // Compare 1 byte taken from R16/R17 that are known to differ. | 
|  | cmp_byte: | 
|  | SGTU	R16, R17, R4	// R4 = 1 if (R16 > R17) | 
|  | BNE	R0, R4, ret | 
|  | MOVV	$-1, R4 | 
|  | RET | 
|  |  | 
|  | cmp_len: | 
|  | SGTU	R5, R7, R8 | 
|  | SGTU	R7, R5, R9 | 
|  | SUBV	R9, R8, R4 | 
|  |  | 
|  | ret: | 
|  | RET | 
|  |  | 
|  | lasx: | 
|  | MOVV	$64, R20 | 
|  | MOVBU	internal∕cpu·Loong64+const_offsetLOONG64HasLASX(SB), R9 | 
|  | BEQ	R9, lsx | 
|  |  | 
|  | MOVV	$128, R15 | 
|  | BLT	R14, R15, lasx32_loop | 
|  | lasx128_loop: | 
|  | XVMOVQ	(R4), X0 | 
|  | XVMOVQ	(R6), X1 | 
|  | XVSEQB	X0, X1, X0 | 
|  | XVSETANYEQB	X0, FCC0 | 
|  | BFPT	lasx_found_0 | 
|  |  | 
|  | XVMOVQ	32(R4), X0 | 
|  | XVMOVQ	32(R6), X1 | 
|  | XVSEQB	X0, X1, X0 | 
|  | XVSETANYEQB	X0, FCC0 | 
|  | BFPT	lasx_found_32 | 
|  |  | 
|  | XVMOVQ	64(R4), X0 | 
|  | XVMOVQ	64(R6), X1 | 
|  | XVSEQB	X0, X1, X0 | 
|  | XVSETANYEQB	X0, FCC0 | 
|  | BFPT	lasx_found_64 | 
|  |  | 
|  | XVMOVQ	96(R4), X0 | 
|  | XVMOVQ	96(R6), X1 | 
|  | XVSEQB	X0, X1, X0 | 
|  | XVSETANYEQB	X0, FCC0 | 
|  | BFPT	lasx_found_96 | 
|  |  | 
|  | ADDV	$-128, R14 | 
|  | BEQ	R14, cmp_len | 
|  | ADDV	$128, R4 | 
|  | ADDV	$128, R6 | 
|  | BGE	R14, R15, lasx128_loop | 
|  |  | 
|  | MOVV	$32, R15 | 
|  | BLT	R14, R15, tail | 
|  | lasx32_loop: | 
|  | XVMOVQ	(R4), X0 | 
|  | XVMOVQ	(R6), X1 | 
|  | XVSEQB	X0, X1, X0 | 
|  | XVSETANYEQB	X0, FCC0 | 
|  | BFPT	lasx_found_0 | 
|  |  | 
|  | ADDV	$-32, R14 | 
|  | BEQ	R14, cmp_len | 
|  | ADDV	$32, R4 | 
|  | ADDV	$32, R6 | 
|  | BGE	R14, R15, lasx32_loop | 
|  | JMP	tail | 
|  |  | 
|  | lasx_found_0: | 
|  | MOVV	R0, R11 | 
|  | JMP	lasx_find_byte | 
|  |  | 
|  | lasx_found_32: | 
|  | MOVV	$32, R11 | 
|  | JMP	lasx_find_byte | 
|  |  | 
|  | lasx_found_64: | 
|  | MOVV	$64, R11 | 
|  | JMP	lasx_find_byte | 
|  |  | 
|  | lasx_found_96: | 
|  | MOVV	$96, R11 | 
|  |  | 
|  | lasx_find_byte: | 
|  | XVMOVQ	X0.V[0], R10 | 
|  | CTOV	R10, R10 | 
|  | BNE	R10, R20, find_byte | 
|  | ADDV	$8, R11 | 
|  |  | 
|  | XVMOVQ	X0.V[1], R10 | 
|  | CTOV	R10, R10 | 
|  | BNE	R10, R20, find_byte | 
|  | ADDV	$8, R11 | 
|  |  | 
|  | XVMOVQ	X0.V[2], R10 | 
|  | CTOV	R10, R10 | 
|  | BNE	R10, R20, find_byte | 
|  | ADDV	$8, R11 | 
|  |  | 
|  | XVMOVQ	X0.V[3], R10 | 
|  | CTOV	R10, R10 | 
|  | JMP	find_byte | 
|  |  | 
|  | lsx: | 
|  | MOVBU	internal∕cpu·Loong64+const_offsetLOONG64HasLSX(SB), R9 | 
|  | BEQ	R9, generic32_loop | 
|  |  | 
|  | MOVV	$64, R15 | 
|  | BLT	R14, R15, lsx16_loop | 
|  | lsx64_loop: | 
|  | VMOVQ	(R4), V0 | 
|  | VMOVQ	(R6), V1 | 
|  | VSEQB	V0, V1, V0 | 
|  | VSETANYEQB	V0, FCC0 | 
|  | BFPT	lsx_found_0 | 
|  |  | 
|  | VMOVQ	16(R4), V0 | 
|  | VMOVQ	16(R6), V1 | 
|  | VSEQB	V0, V1, V0 | 
|  | VSETANYEQB	V0, FCC0 | 
|  | BFPT	lsx_found_16 | 
|  |  | 
|  | VMOVQ	32(R4), V0 | 
|  | VMOVQ	32(R6), V1 | 
|  | VSEQB	V0, V1, V0 | 
|  | VSETANYEQB	V0, FCC0 | 
|  | BFPT	lsx_found_32 | 
|  |  | 
|  | VMOVQ	48(R4), V0 | 
|  | VMOVQ	48(R6), V1 | 
|  | VSEQB	V0, V1, V0 | 
|  | VSETANYEQB	V0, FCC0 | 
|  | BFPT	lsx_found_48 | 
|  |  | 
|  | ADDV	$-64, R14 | 
|  | BEQ	R14, cmp_len | 
|  | ADDV	$64, R4 | 
|  | ADDV	$64, R6 | 
|  | BGE	R14, R15, lsx64_loop | 
|  |  | 
|  | MOVV	$16, R15 | 
|  | BLT	R14, R15, tail | 
|  | lsx16_loop: | 
|  | VMOVQ	(R4), V0 | 
|  | VMOVQ	(R6), V1 | 
|  | VSEQB	V0, V1, V0 | 
|  | VSETANYEQB	V0, FCC0 | 
|  | BFPT	lsx_found_0 | 
|  |  | 
|  | ADDV	$-16, R14 | 
|  | BEQ	R14, cmp_len | 
|  | ADDV	$16, R4 | 
|  | ADDV	$16, R6 | 
|  | BGE	R14, R15, lsx16_loop | 
|  | JMP	tail | 
|  |  | 
|  | lsx_found_0: | 
|  | MOVV	R0, R11 | 
|  | JMP	lsx_find_byte | 
|  |  | 
|  | lsx_found_16: | 
|  | MOVV	$16, R11 | 
|  | JMP	lsx_find_byte | 
|  |  | 
|  | lsx_found_32: | 
|  | MOVV	$32, R11 | 
|  | JMP	lsx_find_byte | 
|  |  | 
|  | lsx_found_48: | 
|  | MOVV	$48, R11 | 
|  |  | 
|  | lsx_find_byte: | 
|  | VMOVQ	V0.V[0], R10 | 
|  | CTOV	R10, R10 | 
|  | BNE	R10, R20, find_byte | 
|  | ADDV	$8, R11 | 
|  |  | 
|  | VMOVQ	V0.V[1], R10 | 
|  | CTOV	R10, R10 | 
|  |  | 
|  | find_byte: | 
|  | SRLV	$3, R10 | 
|  | ADDV	R10, R11 | 
|  | ADDV	R11, R4 | 
|  | ADDV	R11, R6 | 
|  | MOVB	(R4), R16 | 
|  | MOVB	(R6), R17 | 
|  | JMP	cmp_byte | 
|  |  | 
|  | generic32_loop: | 
|  | MOVV	(R4), R10 | 
|  | MOVV	(R6), R11 | 
|  | BNE	R10, R11, cmp8 | 
|  | MOVV	8(R4), R10 | 
|  | MOVV	8(R6), R11 | 
|  | BNE	R10, R11, cmp8 | 
|  | MOVV	16(R4), R10 | 
|  | MOVV	16(R6), R11 | 
|  | BNE	R10, R11, cmp8 | 
|  | MOVV	24(R4), R10 | 
|  | MOVV	24(R6), R11 | 
|  | BNE	R10, R11, cmp8 | 
|  | ADDV	$-32, R14 | 
|  | BEQ	R14, cmp_len | 
|  | ADDV	$32, R4 | 
|  | ADDV	$32, R6 | 
|  | MOVV	$32, R15 | 
|  | BGE	R14, R15, generic32_loop | 
|  | JMP	tail |