blob: b1e1f7bcc76c5b65378e049d37440dbf70767e9a [file] [log] [blame]
// Copyright 2022 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
#include "go_asm.h"
#include "textflag.h"
TEXT ·Compare<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-56
// X10 = a_base
// X11 = a_len
// X12 = a_cap (unused)
// X13 = b_base (want in X12)
// X14 = b_len (want in X13)
// X15 = b_cap (unused)
MOV X13, X12
MOV X14, X13
JMP compare<>(SB)
TEXT runtime·cmpstring<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-40
// X10 = a_base
// X11 = a_len
// X12 = b_base
// X13 = b_len
JMP compare<>(SB)
// On entry:
// X10 points to start of a
// X11 length of a
// X12 points to start of b
// X13 length of b
// for non-regabi X14 points to the address to store the return value (-1/0/1)
// for regabi the return value in X10
TEXT compare<>(SB),NOSPLIT|NOFRAME,$0
BEQ X10, X12, cmp_len
MOV X11, X5
BGE X13, X5, use_a_len // X5 = min(len(a), len(b))
MOV X13, X5
use_a_len:
BEQZ X5, cmp_len
MOV $32, X6
BLT X5, X6, check8_unaligned
// Check alignment - if alignment differs we have to do one byte at a time.
AND $7, X10, X7
AND $7, X12, X8
BNE X7, X8, check8_unaligned
BEQZ X7, compare32
// Check one byte at a time until we reach 8 byte alignment.
SUB X7, X0, X7
ADD $8, X7, X7
SUB X7, X5, X5
align:
SUB $1, X7
MOVBU 0(X10), X8
MOVBU 0(X12), X9
BNE X8, X9, cmp
ADD $1, X10
ADD $1, X12
BNEZ X7, align
check32:
// X6 contains $32
BLT X5, X6, compare16
compare32:
MOV 0(X10), X15
MOV 0(X12), X16
MOV 8(X10), X17
MOV 8(X12), X18
BNE X15, X16, cmp8a
BNE X17, X18, cmp8b
MOV 16(X10), X15
MOV 16(X12), X16
MOV 24(X10), X17
MOV 24(X12), X18
BNE X15, X16, cmp8a
BNE X17, X18, cmp8b
ADD $32, X10
ADD $32, X12
SUB $32, X5
BGE X5, X6, compare32
BEQZ X5, cmp_len
check16:
MOV $16, X6
BLT X5, X6, check8_unaligned
compare16:
MOV 0(X10), X15
MOV 0(X12), X16
MOV 8(X10), X17
MOV 8(X12), X18
BNE X15, X16, cmp8a
BNE X17, X18, cmp8b
ADD $16, X10
ADD $16, X12
SUB $16, X5
BEQZ X5, cmp_len
check8_unaligned:
MOV $8, X6
BLT X5, X6, check4_unaligned
compare8_unaligned:
MOVBU 0(X10), X8
MOVBU 1(X10), X15
MOVBU 2(X10), X17
MOVBU 3(X10), X19
MOVBU 4(X10), X21
MOVBU 5(X10), X23
MOVBU 6(X10), X25
MOVBU 7(X10), X29
MOVBU 0(X12), X9
MOVBU 1(X12), X16
MOVBU 2(X12), X18
MOVBU 3(X12), X20
MOVBU 4(X12), X22
MOVBU 5(X12), X24
MOVBU 6(X12), X28
MOVBU 7(X12), X30
BNE X8, X9, cmp1a
BNE X15, X16, cmp1b
BNE X17, X18, cmp1c
BNE X19, X20, cmp1d
BNE X21, X22, cmp1e
BNE X23, X24, cmp1f
BNE X25, X28, cmp1g
BNE X29, X30, cmp1h
ADD $8, X10
ADD $8, X12
SUB $8, X5
BGE X5, X6, compare8_unaligned
BEQZ X5, cmp_len
check4_unaligned:
MOV $4, X6
BLT X5, X6, compare1
compare4_unaligned:
MOVBU 0(X10), X8
MOVBU 1(X10), X15
MOVBU 2(X10), X17
MOVBU 3(X10), X19
MOVBU 0(X12), X9
MOVBU 1(X12), X16
MOVBU 2(X12), X18
MOVBU 3(X12), X20
BNE X8, X9, cmp1a
BNE X15, X16, cmp1b
BNE X17, X18, cmp1c
BNE X19, X20, cmp1d
ADD $4, X10
ADD $4, X12
SUB $4, X5
BGE X5, X6, compare4_unaligned
compare1:
BEQZ X5, cmp_len
MOVBU 0(X10), X8
MOVBU 0(X12), X9
BNE X8, X9, cmp
ADD $1, X10
ADD $1, X12
SUB $1, X5
JMP compare1
// Compare 8 bytes of memory in X15/X16 that are known to differ.
cmp8a:
MOV X15, X17
MOV X16, X18
// Compare 8 bytes of memory in X17/X18 that are known to differ.
cmp8b:
MOV $0xff, X19
cmp8_loop:
AND X17, X19, X8
AND X18, X19, X9
BNE X8, X9, cmp
SLLI $8, X19
JMP cmp8_loop
cmp1a:
SLTU X9, X8, X5
SLTU X8, X9, X6
JMP cmp_ret
cmp1b:
SLTU X16, X15, X5
SLTU X15, X16, X6
JMP cmp_ret
cmp1c:
SLTU X18, X17, X5
SLTU X17, X18, X6
JMP cmp_ret
cmp1d:
SLTU X20, X19, X5
SLTU X19, X20, X6
JMP cmp_ret
cmp1e:
SLTU X22, X21, X5
SLTU X21, X22, X6
JMP cmp_ret
cmp1f:
SLTU X24, X23, X5
SLTU X23, X24, X6
JMP cmp_ret
cmp1g:
SLTU X28, X25, X5
SLTU X25, X28, X6
JMP cmp_ret
cmp1h:
SLTU X30, X29, X5
SLTU X29, X30, X6
JMP cmp_ret
cmp_len:
MOV X11, X8
MOV X13, X9
cmp:
SLTU X9, X8, X5
SLTU X8, X9, X6
cmp_ret:
SUB X5, X6, X10
RET