| // Copyright 2024 The Go Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style |
| // license that can be found in the LICENSE file. |
| |
| //go:build !purego |
| |
| #include "textflag.h" |
| |
| #define SMALL_TAIL \ |
| SGTU $2, R7, R8; \ |
| BNE R8, xor_1; \ |
| SGTU $4, R7, R8; \ |
| BNE R8, xor_2; \ |
| SGTU $8, R7, R8; \ |
| BNE R8, xor_4; \ |
| SGTU $16, R7, R8; \ |
| BNE R8, xor_8; \ |
| |
| #define SMALL \ |
| xor_8_check:; \ |
| SGTU $8, R7, R8; \ |
| BNE R8, xor_4_check; \ |
| xor_8:; \ |
| SUBV $8, R7; \ |
| MOVV (R5), R10; \ |
| MOVV (R6), R11; \ |
| XOR R10, R11; \ |
| MOVV R11, (R4); \ |
| ADDV $8, R5; \ |
| ADDV $8, R6; \ |
| ADDV $8, R4; \ |
| BEQ R7, R0, end; \ |
| xor_4_check:; \ |
| SGTU $4, R7, R8; \ |
| BNE R8, xor_2_check; \ |
| xor_4:; \ |
| SUBV $4, R7; \ |
| MOVW (R5), R10; \ |
| MOVW (R6), R11; \ |
| XOR R10, R11; \ |
| MOVW R11, (R4); \ |
| ADDV $4, R5; \ |
| ADDV $4, R6; \ |
| ADDV $4, R4; \ |
| BEQ R7, R0, end; \ |
| xor_2_check:; \ |
| SGTU $2, R7, R8; \ |
| BNE R8, xor_1; \ |
| xor_2:; \ |
| SUBV $2, R7; \ |
| MOVH (R5), R10; \ |
| MOVH (R6), R11; \ |
| XOR R10, R11; \ |
| MOVH R11, (R4); \ |
| ADDV $2, R5; \ |
| ADDV $2, R6; \ |
| ADDV $2, R4; \ |
| BEQ R7, R0, end; \ |
| xor_1:; \ |
| MOVB (R5), R10; \ |
| MOVB (R6), R11; \ |
| XOR R10, R11; \ |
| MOVB R11, (R4); \ |
| |
| // func xorBytesBasic(dst, a, b *byte, n int) |
| TEXT ·xorBytesBasic(SB), NOSPLIT, $0 |
| MOVV dst+0(FP), R4 |
| MOVV a+8(FP), R5 |
| MOVV b+16(FP), R6 |
| MOVV n+24(FP), R7 |
| |
| SMALL_TAIL |
| |
| xor_64_check: |
| SGTU $64, R7, R8 |
| BNE R8, xor_32_check |
| xor_64_loop: |
| SUBV $64, R7 |
| MOVV (R5), R10 |
| MOVV 8(R5), R11 |
| MOVV 16(R5), R12 |
| MOVV 24(R5), R13 |
| MOVV (R6), R14 |
| MOVV 8(R6), R15 |
| MOVV 16(R6), R16 |
| MOVV 24(R6), R17 |
| XOR R10, R14 |
| XOR R11, R15 |
| XOR R12, R16 |
| XOR R13, R17 |
| MOVV R14, (R4) |
| MOVV R15, 8(R4) |
| MOVV R16, 16(R4) |
| MOVV R17, 24(R4) |
| MOVV 32(R5), R10 |
| MOVV 40(R5), R11 |
| MOVV 48(R5), R12 |
| MOVV 56(R5), R13 |
| MOVV 32(R6), R14 |
| MOVV 40(R6), R15 |
| MOVV 48(R6), R16 |
| MOVV 56(R6), R17 |
| XOR R10, R14 |
| XOR R11, R15 |
| XOR R12, R16 |
| XOR R13, R17 |
| MOVV R14, 32(R4) |
| MOVV R15, 40(R4) |
| MOVV R16, 48(R4) |
| MOVV R17, 56(R4) |
| SGTU $64, R7, R8 |
| ADDV $64, R5 |
| ADDV $64, R6 |
| ADDV $64, R4 |
| BEQ R8, xor_64_loop |
| BEQ R7, end |
| |
| xor_32_check: |
| SGTU $32, R7, R8 |
| BNE R8, xor_16_check |
| xor_32: |
| SUBV $32, R7 |
| MOVV (R5), R10 |
| MOVV 8(R5), R11 |
| MOVV 16(R5), R12 |
| MOVV 24(R5), R13 |
| MOVV (R6), R14 |
| MOVV 8(R6), R15 |
| MOVV 16(R6), R16 |
| MOVV 24(R6), R17 |
| XOR R10, R14 |
| XOR R11, R15 |
| XOR R12, R16 |
| XOR R13, R17 |
| MOVV R14, (R4) |
| MOVV R15, 8(R4) |
| MOVV R16, 16(R4) |
| MOVV R17, 24(R4) |
| ADDV $32, R5 |
| ADDV $32, R6 |
| ADDV $32, R4 |
| BEQ R7, R0, end |
| |
| xor_16_check: |
| SGTU $16, R7, R8 |
| BNE R8, xor_8_check |
| xor_16: |
| SUBV $16, R7 |
| MOVV (R5), R10 |
| MOVV 8(R5), R11 |
| MOVV (R6), R12 |
| MOVV 8(R6), R13 |
| XOR R10, R12 |
| XOR R11, R13 |
| MOVV R12, (R4) |
| MOVV R13, 8(R4) |
| ADDV $16, R5 |
| ADDV $16, R6 |
| ADDV $16, R4 |
| BEQ R7, R0, end |
| |
| SMALL |
| end: |
| RET |
| |
| // func xorBytesLSX(dst, a, b *byte, n int) |
| TEXT ·xorBytesLSX(SB), NOSPLIT, $0 |
| MOVV dst+0(FP), R4 |
| MOVV a+8(FP), R5 |
| MOVV b+16(FP), R6 |
| MOVV n+24(FP), R7 |
| |
| SMALL_TAIL |
| |
| xor_128_lsx_check: |
| SGTU $128, R7, R8 |
| BNE R8, xor_64_lsx_check |
| xor_128_lsx_loop: |
| SUBV $128, R7 |
| VMOVQ (R5), V0 |
| VMOVQ 16(R5), V1 |
| VMOVQ 32(R5), V2 |
| VMOVQ 48(R5), V3 |
| VMOVQ 64(R5), V4 |
| VMOVQ 80(R5), V5 |
| VMOVQ 96(R5), V6 |
| VMOVQ 112(R5), V7 |
| VMOVQ (R6), V8 |
| VMOVQ 16(R6), V9 |
| VMOVQ 32(R6), V10 |
| VMOVQ 48(R6), V11 |
| VMOVQ 64(R6), V12 |
| VMOVQ 80(R6), V13 |
| VMOVQ 96(R6), V14 |
| VMOVQ 112(R6), V15 |
| VXORV V0, V8, V8 |
| VXORV V1, V9, V9 |
| VXORV V2, V10, V10 |
| VXORV V3, V11, V11 |
| VXORV V4, V12, V12 |
| VXORV V5, V13, V13 |
| VXORV V6, V14, V14 |
| VXORV V7, V15, V15 |
| VMOVQ V8, (R4) |
| VMOVQ V9, 16(R4) |
| VMOVQ V10, 32(R4) |
| VMOVQ V11, 48(R4) |
| VMOVQ V12, 64(R4) |
| VMOVQ V13, 80(R4) |
| VMOVQ V14, 96(R4) |
| VMOVQ V15, 112(R4) |
| SGTU $128, R7, R8 |
| ADDV $128, R5 |
| ADDV $128, R6 |
| ADDV $128, R4 |
| BEQ R8, xor_128_lsx_loop |
| BEQ R7, end |
| |
| xor_64_lsx_check: |
| SGTU $64, R7, R8 |
| BNE R8, xor_32_lsx_check |
| xor_64_lsx: |
| SUBV $64, R7 |
| VMOVQ (R5), V0 |
| VMOVQ 16(R5), V1 |
| VMOVQ 32(R5), V2 |
| VMOVQ 48(R5), V3 |
| VMOVQ (R6), V4 |
| VMOVQ 16(R6), V5 |
| VMOVQ 32(R6), V6 |
| VMOVQ 48(R6), V7 |
| VXORV V0, V4, V4 |
| VXORV V1, V5, V5 |
| VXORV V2, V6, V6 |
| VXORV V3, V7, V7 |
| VMOVQ V4, (R4) |
| VMOVQ V5, 16(R4) |
| VMOVQ V6, 32(R4) |
| VMOVQ V7, 48(R4) |
| ADDV $64, R5 |
| ADDV $64, R6 |
| ADDV $64, R4 |
| BEQ R7, end |
| |
| xor_32_lsx_check: |
| SGTU $32, R7, R8 |
| BNE R8, xor_16_lsx_check |
| xor_32_lsx: |
| SUBV $32, R7 |
| VMOVQ (R5), V0 |
| VMOVQ 16(R5), V1 |
| VMOVQ (R6), V2 |
| VMOVQ 16(R6), V3 |
| VXORV V0, V2, V2 |
| VXORV V1, V3, V3 |
| VMOVQ V2, (R4) |
| VMOVQ V3, 16(R4) |
| ADDV $32, R5 |
| ADDV $32, R6 |
| ADDV $32, R4 |
| BEQ R7, end |
| |
| xor_16_lsx_check: |
| SGTU $16, R7, R8 |
| BNE R8, xor_8_check |
| xor_16_lsx: |
| SUBV $16, R7 |
| VMOVQ (R5), V0 |
| VMOVQ (R6), V1 |
| VXORV V0, V1, V1 |
| VMOVQ V1, (R4) |
| ADDV $16, R5 |
| ADDV $16, R6 |
| ADDV $16, R4 |
| BEQ R7, end |
| |
| SMALL |
| end: |
| RET |
| |
| // func xorBytesLASX(dst, a, b *byte, n int) |
| TEXT ·xorBytesLASX(SB), NOSPLIT, $0 |
| MOVV dst+0(FP), R4 |
| MOVV a+8(FP), R5 |
| MOVV b+16(FP), R6 |
| MOVV n+24(FP), R7 |
| |
| SMALL_TAIL |
| |
| xor_256_lasx_check: |
| SGTU $256, R7, R8 |
| BNE R8, xor_128_lasx_check |
| xor_256_lasx_loop: |
| SUBV $256, R7 |
| XVMOVQ (R5), X0 |
| XVMOVQ 32(R5), X1 |
| XVMOVQ 64(R5), X2 |
| XVMOVQ 96(R5), X3 |
| XVMOVQ 128(R5), X4 |
| XVMOVQ 160(R5), X5 |
| XVMOVQ 192(R5), X6 |
| XVMOVQ 224(R5), X7 |
| XVMOVQ (R6), X8 |
| XVMOVQ 32(R6), X9 |
| XVMOVQ 64(R6), X10 |
| XVMOVQ 96(R6), X11 |
| XVMOVQ 128(R6), X12 |
| XVMOVQ 160(R6), X13 |
| XVMOVQ 192(R6), X14 |
| XVMOVQ 224(R6), X15 |
| XVXORV X0, X8, X8 |
| XVXORV X1, X9, X9 |
| XVXORV X2, X10, X10 |
| XVXORV X3, X11, X11 |
| XVXORV X4, X12, X12 |
| XVXORV X5, X13, X13 |
| XVXORV X6, X14, X14 |
| XVXORV X7, X15, X15 |
| XVMOVQ X8, (R4) |
| XVMOVQ X9, 32(R4) |
| XVMOVQ X10, 64(R4) |
| XVMOVQ X11, 96(R4) |
| XVMOVQ X12, 128(R4) |
| XVMOVQ X13, 160(R4) |
| XVMOVQ X14, 192(R4) |
| XVMOVQ X15, 224(R4) |
| SGTU $256, R7, R8 |
| ADDV $256, R5 |
| ADDV $256, R6 |
| ADDV $256, R4 |
| BEQ R8, xor_256_lasx_loop |
| BEQ R7, end |
| |
| xor_128_lasx_check: |
| SGTU $128, R7, R8 |
| BNE R8, xor_64_lasx_check |
| xor_128_lasx: |
| SUBV $128, R7 |
| XVMOVQ (R5), X0 |
| XVMOVQ 32(R5), X1 |
| XVMOVQ 64(R5), X2 |
| XVMOVQ 96(R5), X3 |
| XVMOVQ (R6), X4 |
| XVMOVQ 32(R6), X5 |
| XVMOVQ 64(R6), X6 |
| XVMOVQ 96(R6), X7 |
| XVXORV X0, X4, X4 |
| XVXORV X1, X5, X5 |
| XVXORV X2, X6, X6 |
| XVXORV X3, X7, X7 |
| XVMOVQ X4, (R4) |
| XVMOVQ X5, 32(R4) |
| XVMOVQ X6, 64(R4) |
| XVMOVQ X7, 96(R4) |
| ADDV $128, R5 |
| ADDV $128, R6 |
| ADDV $128, R4 |
| BEQ R7, end |
| |
| xor_64_lasx_check: |
| SGTU $64, R7, R8 |
| BNE R8, xor_32_lasx_check |
| xor_64_lasx: |
| SUBV $64, R7 |
| XVMOVQ (R5), X0 |
| XVMOVQ 32(R5), X1 |
| XVMOVQ (R6), X2 |
| XVMOVQ 32(R6), X3 |
| XVXORV X0, X2, X2 |
| XVXORV X1, X3, X3 |
| XVMOVQ X2, (R4) |
| XVMOVQ X3, 32(R4) |
| ADDV $64, R5 |
| ADDV $64, R6 |
| ADDV $64, R4 |
| BEQ R7, end |
| |
| xor_32_lasx_check: |
| SGTU $32, R7, R8 |
| BNE R8, xor_16_lasx_check |
| xor_32_lasx: |
| SUBV $32, R7 |
| XVMOVQ (R5), X0 |
| XVMOVQ (R6), X1 |
| XVXORV X0, X1, X1 |
| XVMOVQ X1, (R4) |
| ADDV $32, R5 |
| ADDV $32, R6 |
| ADDV $32, R4 |
| BEQ R7, end |
| |
| xor_16_lasx_check: |
| SGTU $16, R7, R8 |
| BNE R8, xor_8_check |
| xor_16_lasx: |
| SUBV $16, R7 |
| VMOVQ (R5), V0 |
| VMOVQ (R6), V1 |
| VXORV V0, V1, V1 |
| VMOVQ V1, (R4) |
| ADDV $16, R5 |
| ADDV $16, R6 |
| ADDV $16, R4 |
| BEQ R7, end |
| |
| SMALL |
| end: |
| RET |
| |