blob: 36c18a627770abd4304ac0eddf7b97961041ec3d [file] [edit]
// Copyright 2024 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build !purego
#include "textflag.h"
#define SMALL_TAIL \
SGTU $2, R7, R8; \
BNE R8, xor_1; \
SGTU $4, R7, R8; \
BNE R8, xor_2; \
SGTU $8, R7, R8; \
BNE R8, xor_4; \
SGTU $16, R7, R8; \
BNE R8, xor_8; \
#define SMALL \
xor_8_check:; \
SGTU $8, R7, R8; \
BNE R8, xor_4_check; \
xor_8:; \
SUBV $8, R7; \
MOVV (R5), R10; \
MOVV (R6), R11; \
XOR R10, R11; \
MOVV R11, (R4); \
ADDV $8, R5; \
ADDV $8, R6; \
ADDV $8, R4; \
BEQ R7, R0, end; \
xor_4_check:; \
SGTU $4, R7, R8; \
BNE R8, xor_2_check; \
xor_4:; \
SUBV $4, R7; \
MOVW (R5), R10; \
MOVW (R6), R11; \
XOR R10, R11; \
MOVW R11, (R4); \
ADDV $4, R5; \
ADDV $4, R6; \
ADDV $4, R4; \
BEQ R7, R0, end; \
xor_2_check:; \
SGTU $2, R7, R8; \
BNE R8, xor_1; \
xor_2:; \
SUBV $2, R7; \
MOVH (R5), R10; \
MOVH (R6), R11; \
XOR R10, R11; \
MOVH R11, (R4); \
ADDV $2, R5; \
ADDV $2, R6; \
ADDV $2, R4; \
BEQ R7, R0, end; \
xor_1:; \
MOVB (R5), R10; \
MOVB (R6), R11; \
XOR R10, R11; \
MOVB R11, (R4); \
// func xorBytesBasic(dst, a, b *byte, n int)
TEXT ·xorBytesBasic(SB), NOSPLIT, $0
MOVV dst+0(FP), R4
MOVV a+8(FP), R5
MOVV b+16(FP), R6
MOVV n+24(FP), R7
SMALL_TAIL
xor_64_check:
SGTU $64, R7, R8
BNE R8, xor_32_check
xor_64_loop:
SUBV $64, R7
MOVV (R5), R10
MOVV 8(R5), R11
MOVV 16(R5), R12
MOVV 24(R5), R13
MOVV (R6), R14
MOVV 8(R6), R15
MOVV 16(R6), R16
MOVV 24(R6), R17
XOR R10, R14
XOR R11, R15
XOR R12, R16
XOR R13, R17
MOVV R14, (R4)
MOVV R15, 8(R4)
MOVV R16, 16(R4)
MOVV R17, 24(R4)
MOVV 32(R5), R10
MOVV 40(R5), R11
MOVV 48(R5), R12
MOVV 56(R5), R13
MOVV 32(R6), R14
MOVV 40(R6), R15
MOVV 48(R6), R16
MOVV 56(R6), R17
XOR R10, R14
XOR R11, R15
XOR R12, R16
XOR R13, R17
MOVV R14, 32(R4)
MOVV R15, 40(R4)
MOVV R16, 48(R4)
MOVV R17, 56(R4)
SGTU $64, R7, R8
ADDV $64, R5
ADDV $64, R6
ADDV $64, R4
BEQ R8, xor_64_loop
BEQ R7, end
xor_32_check:
SGTU $32, R7, R8
BNE R8, xor_16_check
xor_32:
SUBV $32, R7
MOVV (R5), R10
MOVV 8(R5), R11
MOVV 16(R5), R12
MOVV 24(R5), R13
MOVV (R6), R14
MOVV 8(R6), R15
MOVV 16(R6), R16
MOVV 24(R6), R17
XOR R10, R14
XOR R11, R15
XOR R12, R16
XOR R13, R17
MOVV R14, (R4)
MOVV R15, 8(R4)
MOVV R16, 16(R4)
MOVV R17, 24(R4)
ADDV $32, R5
ADDV $32, R6
ADDV $32, R4
BEQ R7, R0, end
xor_16_check:
SGTU $16, R7, R8
BNE R8, xor_8_check
xor_16:
SUBV $16, R7
MOVV (R5), R10
MOVV 8(R5), R11
MOVV (R6), R12
MOVV 8(R6), R13
XOR R10, R12
XOR R11, R13
MOVV R12, (R4)
MOVV R13, 8(R4)
ADDV $16, R5
ADDV $16, R6
ADDV $16, R4
BEQ R7, R0, end
SMALL
end:
RET
// func xorBytesLSX(dst, a, b *byte, n int)
TEXT ·xorBytesLSX(SB), NOSPLIT, $0
MOVV dst+0(FP), R4
MOVV a+8(FP), R5
MOVV b+16(FP), R6
MOVV n+24(FP), R7
SMALL_TAIL
xor_128_lsx_check:
SGTU $128, R7, R8
BNE R8, xor_64_lsx_check
xor_128_lsx_loop:
SUBV $128, R7
VMOVQ (R5), V0
VMOVQ 16(R5), V1
VMOVQ 32(R5), V2
VMOVQ 48(R5), V3
VMOVQ 64(R5), V4
VMOVQ 80(R5), V5
VMOVQ 96(R5), V6
VMOVQ 112(R5), V7
VMOVQ (R6), V8
VMOVQ 16(R6), V9
VMOVQ 32(R6), V10
VMOVQ 48(R6), V11
VMOVQ 64(R6), V12
VMOVQ 80(R6), V13
VMOVQ 96(R6), V14
VMOVQ 112(R6), V15
VXORV V0, V8, V8
VXORV V1, V9, V9
VXORV V2, V10, V10
VXORV V3, V11, V11
VXORV V4, V12, V12
VXORV V5, V13, V13
VXORV V6, V14, V14
VXORV V7, V15, V15
VMOVQ V8, (R4)
VMOVQ V9, 16(R4)
VMOVQ V10, 32(R4)
VMOVQ V11, 48(R4)
VMOVQ V12, 64(R4)
VMOVQ V13, 80(R4)
VMOVQ V14, 96(R4)
VMOVQ V15, 112(R4)
SGTU $128, R7, R8
ADDV $128, R5
ADDV $128, R6
ADDV $128, R4
BEQ R8, xor_128_lsx_loop
BEQ R7, end
xor_64_lsx_check:
SGTU $64, R7, R8
BNE R8, xor_32_lsx_check
xor_64_lsx:
SUBV $64, R7
VMOVQ (R5), V0
VMOVQ 16(R5), V1
VMOVQ 32(R5), V2
VMOVQ 48(R5), V3
VMOVQ (R6), V4
VMOVQ 16(R6), V5
VMOVQ 32(R6), V6
VMOVQ 48(R6), V7
VXORV V0, V4, V4
VXORV V1, V5, V5
VXORV V2, V6, V6
VXORV V3, V7, V7
VMOVQ V4, (R4)
VMOVQ V5, 16(R4)
VMOVQ V6, 32(R4)
VMOVQ V7, 48(R4)
ADDV $64, R5
ADDV $64, R6
ADDV $64, R4
BEQ R7, end
xor_32_lsx_check:
SGTU $32, R7, R8
BNE R8, xor_16_lsx_check
xor_32_lsx:
SUBV $32, R7
VMOVQ (R5), V0
VMOVQ 16(R5), V1
VMOVQ (R6), V2
VMOVQ 16(R6), V3
VXORV V0, V2, V2
VXORV V1, V3, V3
VMOVQ V2, (R4)
VMOVQ V3, 16(R4)
ADDV $32, R5
ADDV $32, R6
ADDV $32, R4
BEQ R7, end
xor_16_lsx_check:
SGTU $16, R7, R8
BNE R8, xor_8_check
xor_16_lsx:
SUBV $16, R7
VMOVQ (R5), V0
VMOVQ (R6), V1
VXORV V0, V1, V1
VMOVQ V1, (R4)
ADDV $16, R5
ADDV $16, R6
ADDV $16, R4
BEQ R7, end
SMALL
end:
RET
// func xorBytesLASX(dst, a, b *byte, n int)
TEXT ·xorBytesLASX(SB), NOSPLIT, $0
MOVV dst+0(FP), R4
MOVV a+8(FP), R5
MOVV b+16(FP), R6
MOVV n+24(FP), R7
SMALL_TAIL
xor_256_lasx_check:
SGTU $256, R7, R8
BNE R8, xor_128_lasx_check
xor_256_lasx_loop:
SUBV $256, R7
XVMOVQ (R5), X0
XVMOVQ 32(R5), X1
XVMOVQ 64(R5), X2
XVMOVQ 96(R5), X3
XVMOVQ 128(R5), X4
XVMOVQ 160(R5), X5
XVMOVQ 192(R5), X6
XVMOVQ 224(R5), X7
XVMOVQ (R6), X8
XVMOVQ 32(R6), X9
XVMOVQ 64(R6), X10
XVMOVQ 96(R6), X11
XVMOVQ 128(R6), X12
XVMOVQ 160(R6), X13
XVMOVQ 192(R6), X14
XVMOVQ 224(R6), X15
XVXORV X0, X8, X8
XVXORV X1, X9, X9
XVXORV X2, X10, X10
XVXORV X3, X11, X11
XVXORV X4, X12, X12
XVXORV X5, X13, X13
XVXORV X6, X14, X14
XVXORV X7, X15, X15
XVMOVQ X8, (R4)
XVMOVQ X9, 32(R4)
XVMOVQ X10, 64(R4)
XVMOVQ X11, 96(R4)
XVMOVQ X12, 128(R4)
XVMOVQ X13, 160(R4)
XVMOVQ X14, 192(R4)
XVMOVQ X15, 224(R4)
SGTU $256, R7, R8
ADDV $256, R5
ADDV $256, R6
ADDV $256, R4
BEQ R8, xor_256_lasx_loop
BEQ R7, end
xor_128_lasx_check:
SGTU $128, R7, R8
BNE R8, xor_64_lasx_check
xor_128_lasx:
SUBV $128, R7
XVMOVQ (R5), X0
XVMOVQ 32(R5), X1
XVMOVQ 64(R5), X2
XVMOVQ 96(R5), X3
XVMOVQ (R6), X4
XVMOVQ 32(R6), X5
XVMOVQ 64(R6), X6
XVMOVQ 96(R6), X7
XVXORV X0, X4, X4
XVXORV X1, X5, X5
XVXORV X2, X6, X6
XVXORV X3, X7, X7
XVMOVQ X4, (R4)
XVMOVQ X5, 32(R4)
XVMOVQ X6, 64(R4)
XVMOVQ X7, 96(R4)
ADDV $128, R5
ADDV $128, R6
ADDV $128, R4
BEQ R7, end
xor_64_lasx_check:
SGTU $64, R7, R8
BNE R8, xor_32_lasx_check
xor_64_lasx:
SUBV $64, R7
XVMOVQ (R5), X0
XVMOVQ 32(R5), X1
XVMOVQ (R6), X2
XVMOVQ 32(R6), X3
XVXORV X0, X2, X2
XVXORV X1, X3, X3
XVMOVQ X2, (R4)
XVMOVQ X3, 32(R4)
ADDV $64, R5
ADDV $64, R6
ADDV $64, R4
BEQ R7, end
xor_32_lasx_check:
SGTU $32, R7, R8
BNE R8, xor_16_lasx_check
xor_32_lasx:
SUBV $32, R7
XVMOVQ (R5), X0
XVMOVQ (R6), X1
XVXORV X0, X1, X1
XVMOVQ X1, (R4)
ADDV $32, R5
ADDV $32, R6
ADDV $32, R4
BEQ R7, end
xor_16_lasx_check:
SGTU $16, R7, R8
BNE R8, xor_8_check
xor_16_lasx:
SUBV $16, R7
VMOVQ (R5), V0
VMOVQ (R6), V1
VXORV V0, V1, V1
VMOVQ V1, (R4)
ADDV $16, R5
ADDV $16, R6
ADDV $16, R4
BEQ R7, end
SMALL
end:
RET