blob: 66c17a5d44d13c8bd208c13aa1eb5d3a41f3ddd5 [file] [log] [blame] [edit]
// Copyright 2024 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
#include "textflag.h"
// castagnoliUpdate updates the non-inverted crc with the given data.
// func castagnoliUpdate(crc uint32, p []byte) uint32
TEXT ·castagnoliUpdate(SB),NOSPLIT,$0-36
MOVWU crc+0(FP), R4 // a0 = CRC value
MOVV p+8(FP), R5 // a1 = data pointer
MOVV p_len+16(FP), R6 // a2 = len(p)
SGT $8, R6, R12
BNE R12, less_than_8
AND $7, R5, R12
BEQ R12, aligned
// Process the first few bytes to 8-byte align the input.
// t0 = 8 - t0. We need to process this many bytes to align.
SUB $1, R12
XOR $7, R12
AND $1, R12, R13
BEQ R13, align_2
MOVB (R5), R13
CRCCWBW R4, R13, R4
ADDV $1, R5
ADDV $-1, R6
align_2:
AND $2, R12, R13
BEQ R13, align_4
MOVH (R5), R13
CRCCWHW R4, R13, R4
ADDV $2, R5
ADDV $-2, R6
align_4:
AND $4, R12, R13
BEQ R13, aligned
MOVW (R5), R13
CRCCWWW R4, R13, R4
ADDV $4, R5
ADDV $-4, R6
aligned:
// The input is now 8-byte aligned and we can process 8-byte chunks.
SGT $8, R6, R12
BNE R12, less_than_8
MOVV (R5), R13
CRCCWVW R4, R13, R4
ADDV $8, R5
ADDV $-8, R6
JMP aligned
less_than_8:
// We may have some bytes left over; process 4 bytes, then 2, then 1.
AND $4, R6, R12
BEQ R12, less_than_4
MOVW (R5), R13
CRCCWWW R4, R13, R4
ADDV $4, R5
ADDV $-4, R6
less_than_4:
AND $2, R6, R12
BEQ R12, less_than_2
MOVH (R5), R13
CRCCWHW R4, R13, R4
ADDV $2, R5
ADDV $-2, R6
less_than_2:
BEQ R6, done
MOVB (R5), R13
CRCCWBW R4, R13, R4
done:
MOVW R4, ret+32(FP)
RET
// ieeeUpdate updates the non-inverted crc with the given data.
// func ieeeUpdate(crc uint32, p []byte) uint32
TEXT ·ieeeUpdate(SB),NOSPLIT,$0-36
MOVWU crc+0(FP), R4 // a0 = CRC value
MOVV p+8(FP), R5 // a1 = data pointer
MOVV p_len+16(FP), R6 // a2 = len(p)
SGT $8, R6, R12
BNE R12, less_than_8
AND $7, R5, R12
BEQ R12, aligned
// Process the first few bytes to 8-byte align the input.
// t0 = 8 - t0. We need to process this many bytes to align.
SUB $1, R12
XOR $7, R12
AND $1, R12, R13
BEQ R13, align_2
MOVB (R5), R13
CRCWBW R4, R13, R4
ADDV $1, R5
ADDV $-1, R6
align_2:
AND $2, R12, R13
BEQ R13, align_4
MOVH (R5), R13
CRCWHW R4, R13, R4
ADDV $2, R5
ADDV $-2, R6
align_4:
AND $4, R12, R13
BEQ R13, aligned
MOVW (R5), R13
CRCWWW R4, R13, R4
ADDV $4, R5
ADDV $-4, R6
aligned:
// The input is now 8-byte aligned and we can process 8-byte chunks.
SGT $8, R6, R12
BNE R12, less_than_8
MOVV (R5), R13
CRCWVW R4, R13, R4
ADDV $8, R5
ADDV $-8, R6
JMP aligned
less_than_8:
// We may have some bytes left over; process 4 bytes, then 2, then 1.
AND $4, R6, R12
BEQ R12, less_than_4
MOVW (R5), R13
CRCWWW R4, R13, R4
ADDV $4, R5
ADDV $-4, R6
less_than_4:
AND $2, R6, R12
BEQ R12, less_than_2
MOVH (R5), R13
CRCWHW R4, R13, R4
ADDV $2, R5
ADDV $-2, R6
less_than_2:
BEQ R6, done
MOVB (R5), R13
CRCWBW R4, R13, R4
done:
MOVW R4, ret+32(FP)
RET