blob: 638b03ed40e0ce22bbe09fc3d88886c19a51b633 [file] [log] [blame] [edit]
// Copyright 2025 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Code generated by 'go generate' (with ./internal/asmgen). DO NOT EDIT.
//go:build !math_big_pure_go
#include "textflag.h"
// func addVV(z, x, y []Word) (c Word)
TEXT ·addVV(SB), NOSPLIT, $0
MOVW z_len+4(FP), R0
MOVW x_base+12(FP), R1
MOVW y_base+24(FP), R2
MOVW z_base+0(FP), R3
// compute unrolled loop lengths
AND $3, R0, R4
MOVW R0>>2, R0
ADD.S $0, R0 // clear carry
loop1:
TEQ $0, R4; BEQ loop1done
loop1cont:
// unroll 1X
MOVW.P 4(R1), R5
MOVW.P 4(R2), R6
ADC.S R6, R5
MOVW.P R5, 4(R3)
SUB $1, R4
TEQ $0, R4; BNE loop1cont
loop1done:
loop4:
TEQ $0, R0; BEQ loop4done
loop4cont:
// unroll 4X
MOVW.P 4(R1), R4
MOVW.P 4(R1), R5
MOVW.P 4(R1), R6
MOVW.P 4(R1), R7
MOVW.P 4(R2), R8
MOVW.P 4(R2), R9
MOVW.P 4(R2), R11
MOVW.P 4(R2), R12
ADC.S R8, R4
ADC.S R9, R5
ADC.S R11, R6
ADC.S R12, R7
MOVW.P R4, 4(R3)
MOVW.P R5, 4(R3)
MOVW.P R6, 4(R3)
MOVW.P R7, 4(R3)
SUB $1, R0
TEQ $0, R0; BNE loop4cont
loop4done:
SBC R1, R1 // save carry
ADD $1, R1 // convert add carry
MOVW R1, c+36(FP)
RET
// func subVV(z, x, y []Word) (c Word)
TEXT ·subVV(SB), NOSPLIT, $0
MOVW z_len+4(FP), R0
MOVW x_base+12(FP), R1
MOVW y_base+24(FP), R2
MOVW z_base+0(FP), R3
// compute unrolled loop lengths
AND $3, R0, R4
MOVW R0>>2, R0
SUB.S $0, R0 // clear carry
loop1:
TEQ $0, R4; BEQ loop1done
loop1cont:
// unroll 1X
MOVW.P 4(R1), R5
MOVW.P 4(R2), R6
SBC.S R6, R5
MOVW.P R5, 4(R3)
SUB $1, R4
TEQ $0, R4; BNE loop1cont
loop1done:
loop4:
TEQ $0, R0; BEQ loop4done
loop4cont:
// unroll 4X
MOVW.P 4(R1), R4
MOVW.P 4(R1), R5
MOVW.P 4(R1), R6
MOVW.P 4(R1), R7
MOVW.P 4(R2), R8
MOVW.P 4(R2), R9
MOVW.P 4(R2), R11
MOVW.P 4(R2), R12
SBC.S R8, R4
SBC.S R9, R5
SBC.S R11, R6
SBC.S R12, R7
MOVW.P R4, 4(R3)
MOVW.P R5, 4(R3)
MOVW.P R6, 4(R3)
MOVW.P R7, 4(R3)
SUB $1, R0
TEQ $0, R0; BNE loop4cont
loop4done:
SBC R1, R1 // save carry
RSB $0, R1, R1 // convert sub carry
MOVW R1, c+36(FP)
RET
// func lshVU(z, x []Word, s uint) (c Word)
TEXT ·lshVU(SB), NOSPLIT, $0
MOVW z_len+4(FP), R0
TEQ $0, R0; BEQ ret0
MOVW s+24(FP), R1
MOVW x_base+12(FP), R2
MOVW z_base+0(FP), R3
// run loop backward
ADD R0<<2, R2, R2
ADD R0<<2, R3, R3
// shift first word into carry
MOVW.W -4(R2), R4
MOVW $32, R5
SUB R1, R5
MOVW R4>>R5, R6
MOVW R4<<R1, R4
MOVW R6, c+28(FP)
// shift remaining words
SUB $1, R0
// compute unrolled loop lengths
AND $3, R0, R6
MOVW R0>>2, R0
loop1:
TEQ $0, R6; BEQ loop1done
loop1cont:
// unroll 1X
MOVW.W -4(R2), R7
ORR R7>>R5, R4
MOVW.W R4, -4(R3)
MOVW R7<<R1, R4
SUB $1, R6
TEQ $0, R6; BNE loop1cont
loop1done:
loop4:
TEQ $0, R0; BEQ loop4done
loop4cont:
// unroll 4X
MOVW.W -4(R2), R6
MOVW.W -4(R2), R7
MOVW.W -4(R2), R8
MOVW.W -4(R2), R9
ORR R6>>R5, R4
MOVW.W R4, -4(R3)
MOVW R6<<R1, R4
ORR R7>>R5, R4
MOVW.W R4, -4(R3)
MOVW R7<<R1, R4
ORR R8>>R5, R4
MOVW.W R4, -4(R3)
MOVW R8<<R1, R4
ORR R9>>R5, R4
MOVW.W R4, -4(R3)
MOVW R9<<R1, R4
SUB $1, R0
TEQ $0, R0; BNE loop4cont
loop4done:
// store final shifted bits
MOVW.W R4, -4(R3)
RET
ret0:
MOVW $0, R1
MOVW R1, c+28(FP)
RET
// func rshVU(z, x []Word, s uint) (c Word)
TEXT ·rshVU(SB), NOSPLIT, $0
MOVW z_len+4(FP), R0
TEQ $0, R0; BEQ ret0
MOVW s+24(FP), R1
MOVW x_base+12(FP), R2
MOVW z_base+0(FP), R3
// shift first word into carry
MOVW.P 4(R2), R4
MOVW $32, R5
SUB R1, R5
MOVW R4<<R5, R6
MOVW R4>>R1, R4
MOVW R6, c+28(FP)
// shift remaining words
SUB $1, R0
// compute unrolled loop lengths
AND $3, R0, R6
MOVW R0>>2, R0
loop1:
TEQ $0, R6; BEQ loop1done
loop1cont:
// unroll 1X
MOVW.P 4(R2), R7
ORR R7<<R5, R4
MOVW.P R4, 4(R3)
MOVW R7>>R1, R4
SUB $1, R6
TEQ $0, R6; BNE loop1cont
loop1done:
loop4:
TEQ $0, R0; BEQ loop4done
loop4cont:
// unroll 4X
MOVW.P 4(R2), R6
MOVW.P 4(R2), R7
MOVW.P 4(R2), R8
MOVW.P 4(R2), R9
ORR R6<<R5, R4
MOVW.P R4, 4(R3)
MOVW R6>>R1, R4
ORR R7<<R5, R4
MOVW.P R4, 4(R3)
MOVW R7>>R1, R4
ORR R8<<R5, R4
MOVW.P R4, 4(R3)
MOVW R8>>R1, R4
ORR R9<<R5, R4
MOVW.P R4, 4(R3)
MOVW R9>>R1, R4
SUB $1, R0
TEQ $0, R0; BNE loop4cont
loop4done:
// store final shifted bits
MOVW.P R4, 4(R3)
RET
ret0:
MOVW $0, R1
MOVW R1, c+28(FP)
RET
// func mulAddVWW(z, x []Word, m, a Word) (c Word)
TEXT ·mulAddVWW(SB), NOSPLIT, $0
MOVW m+24(FP), R0
MOVW a+28(FP), R1
MOVW z_len+4(FP), R2
MOVW x_base+12(FP), R3
MOVW z_base+0(FP), R4
// compute unrolled loop lengths
AND $3, R2, R5
MOVW R2>>2, R2
loop1:
TEQ $0, R5; BEQ loop1done
loop1cont:
// unroll 1X
MOVW.P 4(R3), R6
// multiply
MULLU R0, R6, (R7, R6)
ADD.S R1, R6
ADC $0, R7, R1
MOVW.P R6, 4(R4)
SUB $1, R5
TEQ $0, R5; BNE loop1cont
loop1done:
loop4:
TEQ $0, R2; BEQ loop4done
loop4cont:
// unroll 4X in batches of 2
MOVW.P 4(R3), R5
MOVW.P 4(R3), R6
// multiply
MULLU R0, R5, (R7, R5)
ADD.S R1, R5
MULLU R0, R6, (R8, R6)
ADC.S R7, R6
ADC $0, R8, R1
MOVW.P R5, 4(R4)
MOVW.P R6, 4(R4)
MOVW.P 4(R3), R5
MOVW.P 4(R3), R6
// multiply
MULLU R0, R5, (R7, R5)
ADD.S R1, R5
MULLU R0, R6, (R8, R6)
ADC.S R7, R6
ADC $0, R8, R1
MOVW.P R5, 4(R4)
MOVW.P R6, 4(R4)
SUB $1, R2
TEQ $0, R2; BNE loop4cont
loop4done:
MOVW R1, c+32(FP)
RET
// func addMulVVWW(z, x, y []Word, m, a Word) (c Word)
TEXT ·addMulVVWW(SB), NOSPLIT, $0
MOVW m+36(FP), R0
MOVW a+40(FP), R1
MOVW z_len+4(FP), R2
MOVW x_base+12(FP), R3
MOVW y_base+24(FP), R4
MOVW z_base+0(FP), R5
// compute unrolled loop lengths
AND $3, R2, R6
MOVW R2>>2, R2
loop1:
TEQ $0, R6; BEQ loop1done
loop1cont:
// unroll 1X
MOVW.P 4(R3), R7
MOVW.P 4(R4), R8
// multiply
MULLU R0, R8, (R9, R8)
ADD.S R1, R8
ADC $0, R9, R1
// add
ADD.S R7, R8
ADC $0, R1
MOVW.P R8, 4(R5)
SUB $1, R6
TEQ $0, R6; BNE loop1cont
loop1done:
loop4:
TEQ $0, R2; BEQ loop4done
loop4cont:
// unroll 4X in batches of 2
MOVW.P 4(R3), R6
MOVW.P 4(R3), R7
MOVW.P 4(R4), R8
MOVW.P 4(R4), R9
// multiply
MULLU R0, R8, (R11, R8)
ADD.S R1, R8
MULLU R0, R9, (R12, R9)
ADC.S R11, R9
ADC $0, R12, R1
// add
ADD.S R6, R8
ADC.S R7, R9
ADC $0, R1
MOVW.P R8, 4(R5)
MOVW.P R9, 4(R5)
MOVW.P 4(R3), R6
MOVW.P 4(R3), R7
MOVW.P 4(R4), R8
MOVW.P 4(R4), R9
// multiply
MULLU R0, R8, (R11, R8)
ADD.S R1, R8
MULLU R0, R9, (R12, R9)
ADC.S R11, R9
ADC $0, R12, R1
// add
ADD.S R6, R8
ADC.S R7, R9
ADC $0, R1
MOVW.P R8, 4(R5)
MOVW.P R9, 4(R5)
SUB $1, R2
TEQ $0, R2; BNE loop4cont
loop4done:
MOVW R1, c+44(FP)
RET