blob: 0dc62515a17d2f4780a44eea2e3d95210cd09380 [file] [log] [blame]
// Copyright 2022 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
#include "go_asm.h"
#include "textflag.h"
TEXT ·Compare(SB),NOSPLIT|NOFRAME,$0-56
MOV a_base+0(FP), X5
MOV a_len+8(FP), X6
MOV b_base+24(FP), X7
MOV b_len+32(FP), X8
MOV $ret+48(FP), X9
JMP compare<>(SB)
TEXT runtime·cmpstring(SB),NOSPLIT|NOFRAME,$0-40
MOV a_base+0(FP), X5
MOV a_len+8(FP), X6
MOV b_base+16(FP), X7
MOV b_len+24(FP), X8
MOV $ret+32(FP), X9
JMP compare<>(SB)
// On entry:
// X5 points to start of a
// X6 length of a
// X7 points to start of b
// X8 length of b
// X9 points to the address to store the return value (-1/0/1)
TEXT compare<>(SB),NOSPLIT|NOFRAME,$0
BEQ X5, X7, cmp_len
MOV X6, X10
BGE X8, X10, use_a_len // X10 = min(len(a), len(b))
MOV X8, X10
use_a_len:
BEQZ X10, cmp_len
MOV $32, X11
BLT X10, X11, loop4_check
// Check alignment - if alignment differs we have to do one byte at a time.
AND $3, X5, X12
AND $3, X7, X13
BNE X12, X13, loop4_check
BEQZ X12, loop32_check
// Check one byte at a time until we reach 8 byte alignment.
SUB X12, X10, X10
align:
ADD $-1, X12
MOVBU 0(X5), X13
MOVBU 0(X7), X14
BNE X13, X14, cmp
ADD $1, X5
ADD $1, X7
BNEZ X12, align
loop32_check:
MOV $32, X12
BLT X10, X12, loop16_check
loop32:
MOV 0(X5), X15
MOV 0(X7), X16
MOV 8(X5), X17
MOV 8(X7), X18
BEQ X15, X16, loop32a
JMP cmp8a
loop32a:
BEQ X17, X18, loop32b
JMP cmp8b
loop32b:
MOV 16(X5), X15
MOV 16(X7), X16
MOV 24(X5), X17
MOV 24(X7), X18
BEQ X15, X16, loop32c
JMP cmp8a
loop32c:
BEQ X17, X18, loop32d
JMP cmp8b
loop32d:
ADD $32, X5
ADD $32, X7
ADD $-32, X10
BGE X10, X12, loop32
BEQZ X10, cmp_len
loop16_check:
MOV $16, X11
BLT X10, X11, loop4_check
loop16:
MOV 0(X5), X15
MOV 0(X7), X16
MOV 8(X5), X17
MOV 8(X7), X18
BEQ X15, X16, loop16a
JMP cmp8a
loop16a:
BEQ X17, X18, loop16b
JMP cmp8b
loop16b:
ADD $16, X5
ADD $16, X7
ADD $-16, X10
BGE X10, X11, loop16
BEQZ X10, cmp_len
loop4_check:
MOV $4, X11
BLT X10, X11, loop1
loop4:
MOVBU 0(X5), X13
MOVBU 0(X7), X14
MOVBU 1(X5), X15
MOVBU 1(X7), X16
BEQ X13, X14, loop4a
SLTU X14, X13, X10
SLTU X13, X14, X11
JMP cmp_ret
loop4a:
BEQ X15, X16, loop4b
SLTU X16, X15, X10
SLTU X15, X16, X11
JMP cmp_ret
loop4b:
MOVBU 2(X5), X21
MOVBU 2(X7), X22
MOVBU 3(X5), X23
MOVBU 3(X7), X24
BEQ X21, X22, loop4c
SLTU X22, X21, X10
SLTU X21, X22, X11
JMP cmp_ret
loop4c:
BEQ X23, X24, loop4d
SLTU X24, X23, X10
SLTU X23, X24, X11
JMP cmp_ret
loop4d:
ADD $4, X5
ADD $4, X7
ADD $-4, X10
BGE X10, X11, loop4
loop1:
BEQZ X10, cmp_len
MOVBU 0(X5), X13
MOVBU 0(X7), X14
BNE X13, X14, cmp
ADD $1, X5
ADD $1, X7
ADD $-1, X10
JMP loop1
// Compare 8 bytes of memory in X15/X16 that are known to differ.
cmp8a:
MOV $0xff, X19
cmp8a_loop:
AND X15, X19, X13
AND X16, X19, X14
BNE X13, X14, cmp
SLLI $8, X19
JMP cmp8a_loop
// Compare 8 bytes of memory in X17/X18 that are known to differ.
cmp8b:
MOV $0xff, X19
cmp8b_loop:
AND X17, X19, X13
AND X18, X19, X14
BNE X13, X14, cmp
SLLI $8, X19
JMP cmp8b_loop
cmp_len:
MOV X6, X13
MOV X8, X14
cmp:
SLTU X14, X13, X10
SLTU X13, X14, X11
cmp_ret:
SUB X10, X11, X12
MOV X12, (X9)
RET