src/hash/crc32/crc32_loong64.s - go - Git at Google

 // Copyright 2024 The Go Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.

 #include "textflag.h"

 // castagnoliUpdate updates the non-inverted crc with the given data.

 // func castagnoliUpdate(crc uint32, p []byte) uint32
 TEXT ·castagnoliUpdate(SB),NOSPLIT,$0-36
 	MOVWU	crc+0(FP), R4		// a0 = CRC value
 	MOVV	p+8(FP), R5		// a1 = data pointer
 	MOVV	p_len+16(FP), R6	// a2 = len(p)

 	SGT	$8, R6, R12
 	BNE	R12, less_than_8
 	AND	$7, R5, R12
 	BEQ	R12, aligned

 	// Process the first few bytes to 8-byte align the input.
 	// t0 = 8 - t0. We need to process this many bytes to align.
 	SUB	$1, R12
 	XOR	$7, R12

 	AND	$1, R12, R13
 	BEQ	R13, align_2
 	MOVB	(R5), R13
 	CRCCWBW	R4, R13, R4
 	ADDV	$1, R5
 	ADDV	$-1, R6

 align_2:
 	AND	$2, R12, R13
 	BEQ	R13, align_4
 	MOVH	(R5), R13
 	CRCCWHW	R4, R13, R4
 	ADDV	$2, R5
 	ADDV	$-2, R6

 align_4:
 	AND	$4, R12, R13
 	BEQ	R13, aligned
 	MOVW	(R5), R13
 	CRCCWWW	R4, R13, R4
 	ADDV	$4, R5
 	ADDV	$-4, R6

 aligned:
 	// The input is now 8-byte aligned and we can process 8-byte chunks.
 	SGT	$8, R6, R12
 	BNE	R12, less_than_8
 	MOVV	(R5), R13
 	CRCCWVW	R4, R13, R4
 	ADDV	$8, R5
 	ADDV	$-8, R6
 	JMP	aligned

 less_than_8:
 	// We may have some bytes left over; process 4 bytes, then 2, then 1.
 	AND	$4, R6, R12
 	BEQ	R12, less_than_4
 	MOVW	(R5), R13
 	CRCCWWW	R4, R13, R4
 	ADDV	$4, R5
 	ADDV	$-4, R6

 less_than_4:
 	AND	$2, R6, R12
 	BEQ	R12, less_than_2
 	MOVH	(R5), R13
 	CRCCWHW	R4, R13, R4
 	ADDV	$2, R5
 	ADDV	$-2, R6

 less_than_2:
 	BEQ	R6, done
 	MOVB	(R5), R13
 	CRCCWBW	R4, R13, R4

 done:
 	MOVW	R4, ret+32(FP)
 	RET

 // ieeeUpdate updates the non-inverted crc with the given data.

 // func ieeeUpdate(crc uint32, p []byte) uint32
 TEXT ·ieeeUpdate(SB),NOSPLIT,$0-36
 	MOVWU	crc+0(FP), R4		// a0 = CRC value
 	MOVV	p+8(FP), R5		// a1 = data pointer
 	MOVV	p_len+16(FP), R6	// a2 = len(p)

 	SGT	$8, R6, R12
 	BNE	R12, less_than_8
 	AND	$7, R5, R12
 	BEQ	R12, aligned

 	// Process the first few bytes to 8-byte align the input.
 	// t0 = 8 - t0. We need to process this many bytes to align.
 	SUB	$1, R12
 	XOR	$7, R12

 	AND	$1, R12, R13
 	BEQ	R13, align_2
 	MOVB	(R5), R13
 	CRCWBW	R4, R13, R4
 	ADDV	$1, R5
 	ADDV	$-1, R6

 align_2:
 	AND	$2, R12, R13
 	BEQ	R13, align_4
 	MOVH	(R5), R13
 	CRCWHW	R4, R13, R4
 	ADDV	$2, R5
 	ADDV	$-2, R6

 align_4:
 	AND	$4, R12, R13
 	BEQ	R13, aligned
 	MOVW	(R5), R13
 	CRCWWW	R4, R13, R4
 	ADDV	$4, R5
 	ADDV	$-4, R6

 aligned:
 	// The input is now 8-byte aligned and we can process 8-byte chunks.
 	SGT	$8, R6, R12
 	BNE	R12, less_than_8
 	MOVV	(R5), R13
 	CRCWVW	R4, R13, R4
 	ADDV	$8, R5
 	ADDV	$-8, R6
 	JMP	aligned

 less_than_8:
 	// We may have some bytes left over; process 4 bytes, then 2, then 1.
 	AND	$4, R6, R12
 	BEQ	R12, less_than_4
 	MOVW	(R5), R13
 	CRCWWW	R4, R13, R4
 	ADDV	$4, R5
 	ADDV	$-4, R6

 less_than_4:
 	AND	$2, R6, R12
 	BEQ	R12, less_than_2
 	MOVH	(R5), R13
 	CRCWHW	R4, R13, R4
 	ADDV	$2, R5
 	ADDV	$-2, R6

 less_than_2:
 	BEQ	R6, done
 	MOVB	(R5), R13
 	CRCWBW	R4, R13, R4

 done:
 	MOVW	R4, ret+32(FP)
 	RET
	// Copyright 2024 The Go Authors. All rights reserved.
	// Use of this source code is governed by a BSD-style
	// license that can be found in the LICENSE file.

	#include "textflag.h"

	// castagnoliUpdate updates the non-inverted crc with the given data.

	// func castagnoliUpdate(crc uint32, p []byte) uint32
	TEXT ·castagnoliUpdate(SB),NOSPLIT,$0-36
	MOVWU crc+0(FP), R4 // a0 = CRC value
	MOVV p+8(FP), R5 // a1 = data pointer
	MOVV p_len+16(FP), R6 // a2 = len(p)

	SGT $8, R6, R12
	BNE R12, less_than_8
	AND $7, R5, R12
	BEQ R12, aligned

	// Process the first few bytes to 8-byte align the input.
	// t0 = 8 - t0. We need to process this many bytes to align.
	SUB $1, R12
	XOR $7, R12

	AND $1, R12, R13
	BEQ R13, align_2
	MOVB (R5), R13
	CRCCWBW R4, R13, R4
	ADDV $1, R5
	ADDV $-1, R6

	align_2:
	AND $2, R12, R13
	BEQ R13, align_4
	MOVH (R5), R13
	CRCCWHW R4, R13, R4
	ADDV $2, R5
	ADDV $-2, R6

	align_4:
	AND $4, R12, R13
	BEQ R13, aligned
	MOVW (R5), R13
	CRCCWWW R4, R13, R4
	ADDV $4, R5
	ADDV $-4, R6

	aligned:
	// The input is now 8-byte aligned and we can process 8-byte chunks.
	SGT $8, R6, R12
	BNE R12, less_than_8
	MOVV (R5), R13
	CRCCWVW R4, R13, R4
	ADDV $8, R5
	ADDV $-8, R6
	JMP aligned

	less_than_8:
	// We may have some bytes left over; process 4 bytes, then 2, then 1.
	AND $4, R6, R12
	BEQ R12, less_than_4
	MOVW (R5), R13
	CRCCWWW R4, R13, R4
	ADDV $4, R5
	ADDV $-4, R6

	less_than_4:
	AND $2, R6, R12
	BEQ R12, less_than_2
	MOVH (R5), R13
	CRCCWHW R4, R13, R4
	ADDV $2, R5
	ADDV $-2, R6

	less_than_2:
	BEQ R6, done
	MOVB (R5), R13
	CRCCWBW R4, R13, R4

	done:
	MOVW R4, ret+32(FP)
	RET

	// ieeeUpdate updates the non-inverted crc with the given data.

	// func ieeeUpdate(crc uint32, p []byte) uint32
	TEXT ·ieeeUpdate(SB),NOSPLIT,$0-36
	MOVWU crc+0(FP), R4 // a0 = CRC value
	MOVV p+8(FP), R5 // a1 = data pointer
	MOVV p_len+16(FP), R6 // a2 = len(p)

	SGT $8, R6, R12
	BNE R12, less_than_8
	AND $7, R5, R12
	BEQ R12, aligned

	// Process the first few bytes to 8-byte align the input.
	// t0 = 8 - t0. We need to process this many bytes to align.
	SUB $1, R12
	XOR $7, R12

	AND $1, R12, R13
	BEQ R13, align_2
	MOVB (R5), R13
	CRCWBW R4, R13, R4
	ADDV $1, R5
	ADDV $-1, R6

	align_2:
	AND $2, R12, R13
	BEQ R13, align_4
	MOVH (R5), R13
	CRCWHW R4, R13, R4
	ADDV $2, R5
	ADDV $-2, R6

	align_4:
	AND $4, R12, R13
	BEQ R13, aligned
	MOVW (R5), R13
	CRCWWW R4, R13, R4
	ADDV $4, R5
	ADDV $-4, R6

	aligned:
	// The input is now 8-byte aligned and we can process 8-byte chunks.
	SGT $8, R6, R12
	BNE R12, less_than_8
	MOVV (R5), R13
	CRCWVW R4, R13, R4
	ADDV $8, R5
	ADDV $-8, R6
	JMP aligned

	less_than_8:
	// We may have some bytes left over; process 4 bytes, then 2, then 1.
	AND $4, R6, R12
	BEQ R12, less_than_4
	MOVW (R5), R13
	CRCWWW R4, R13, R4
	ADDV $4, R5
	ADDV $-4, R6

	less_than_4:
	AND $2, R6, R12
	BEQ R12, less_than_2
	MOVH (R5), R13
	CRCWHW R4, R13, R4
	ADDV $2, R5
	ADDV $-2, R6

	less_than_2:
	BEQ R6, done
	MOVB (R5), R13
	CRCWBW R4, R13, R4

	done:
	MOVW R4, ret+32(FP)
	RET