| // Copyright 2024 The Go Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style |
| // license that can be found in the LICENSE file. |
| |
| #include "textflag.h" |
| |
| // castagnoliUpdate updates the non-inverted crc with the given data. |
| |
| // func castagnoliUpdate(crc uint32, p []byte) uint32 |
| TEXT ·castagnoliUpdate(SB),NOSPLIT,$0-36 |
| MOVWU crc+0(FP), R4 // a0 = CRC value |
| MOVV p+8(FP), R5 // a1 = data pointer |
| MOVV p_len+16(FP), R6 // a2 = len(p) |
| |
| SGT $8, R6, R12 |
| BNE R12, less_than_8 |
| AND $7, R5, R12 |
| BEQ R12, aligned |
| |
| // Process the first few bytes to 8-byte align the input. |
| // t0 = 8 - t0. We need to process this many bytes to align. |
| SUB $1, R12 |
| XOR $7, R12 |
| |
| AND $1, R12, R13 |
| BEQ R13, align_2 |
| MOVB (R5), R13 |
| CRCCWBW R4, R13, R4 |
| ADDV $1, R5 |
| ADDV $-1, R6 |
| |
| align_2: |
| AND $2, R12, R13 |
| BEQ R13, align_4 |
| MOVH (R5), R13 |
| CRCCWHW R4, R13, R4 |
| ADDV $2, R5 |
| ADDV $-2, R6 |
| |
| align_4: |
| AND $4, R12, R13 |
| BEQ R13, aligned |
| MOVW (R5), R13 |
| CRCCWWW R4, R13, R4 |
| ADDV $4, R5 |
| ADDV $-4, R6 |
| |
| aligned: |
| // The input is now 8-byte aligned and we can process 8-byte chunks. |
| SGT $8, R6, R12 |
| BNE R12, less_than_8 |
| MOVV (R5), R13 |
| CRCCWVW R4, R13, R4 |
| ADDV $8, R5 |
| ADDV $-8, R6 |
| JMP aligned |
| |
| less_than_8: |
| // We may have some bytes left over; process 4 bytes, then 2, then 1. |
| AND $4, R6, R12 |
| BEQ R12, less_than_4 |
| MOVW (R5), R13 |
| CRCCWWW R4, R13, R4 |
| ADDV $4, R5 |
| ADDV $-4, R6 |
| |
| less_than_4: |
| AND $2, R6, R12 |
| BEQ R12, less_than_2 |
| MOVH (R5), R13 |
| CRCCWHW R4, R13, R4 |
| ADDV $2, R5 |
| ADDV $-2, R6 |
| |
| less_than_2: |
| BEQ R6, done |
| MOVB (R5), R13 |
| CRCCWBW R4, R13, R4 |
| |
| done: |
| MOVW R4, ret+32(FP) |
| RET |
| |
| // ieeeUpdate updates the non-inverted crc with the given data. |
| |
| // func ieeeUpdate(crc uint32, p []byte) uint32 |
| TEXT ·ieeeUpdate(SB),NOSPLIT,$0-36 |
| MOVWU crc+0(FP), R4 // a0 = CRC value |
| MOVV p+8(FP), R5 // a1 = data pointer |
| MOVV p_len+16(FP), R6 // a2 = len(p) |
| |
| SGT $8, R6, R12 |
| BNE R12, less_than_8 |
| AND $7, R5, R12 |
| BEQ R12, aligned |
| |
| // Process the first few bytes to 8-byte align the input. |
| // t0 = 8 - t0. We need to process this many bytes to align. |
| SUB $1, R12 |
| XOR $7, R12 |
| |
| AND $1, R12, R13 |
| BEQ R13, align_2 |
| MOVB (R5), R13 |
| CRCWBW R4, R13, R4 |
| ADDV $1, R5 |
| ADDV $-1, R6 |
| |
| align_2: |
| AND $2, R12, R13 |
| BEQ R13, align_4 |
| MOVH (R5), R13 |
| CRCWHW R4, R13, R4 |
| ADDV $2, R5 |
| ADDV $-2, R6 |
| |
| align_4: |
| AND $4, R12, R13 |
| BEQ R13, aligned |
| MOVW (R5), R13 |
| CRCWWW R4, R13, R4 |
| ADDV $4, R5 |
| ADDV $-4, R6 |
| |
| aligned: |
| // The input is now 8-byte aligned and we can process 8-byte chunks. |
| SGT $8, R6, R12 |
| BNE R12, less_than_8 |
| MOVV (R5), R13 |
| CRCWVW R4, R13, R4 |
| ADDV $8, R5 |
| ADDV $-8, R6 |
| JMP aligned |
| |
| less_than_8: |
| // We may have some bytes left over; process 4 bytes, then 2, then 1. |
| AND $4, R6, R12 |
| BEQ R12, less_than_4 |
| MOVW (R5), R13 |
| CRCWWW R4, R13, R4 |
| ADDV $4, R5 |
| ADDV $-4, R6 |
| |
| less_than_4: |
| AND $2, R6, R12 |
| BEQ R12, less_than_2 |
| MOVH (R5), R13 |
| CRCWHW R4, R13, R4 |
| ADDV $2, R5 |
| ADDV $-2, R6 |
| |
| less_than_2: |
| BEQ R6, done |
| MOVB (R5), R13 |
| CRCWBW R4, R13, R4 |
| |
| done: |
| MOVW R4, ret+32(FP) |
| RET |
| |