blob: b81ed92480b6353f7a53425a93cb9438041194d2 [file] [edit]
// Copyright 2025 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Code generated by 'go generate' (with ./internal/asmgen). DO NOT EDIT.
//go:build !math_big_pure_go
#include "textflag.h"
// func addVV(z, x, y []Word) (c Word)
TEXT ·addVV(SB), NOSPLIT, $0
MOVB ·hasVX(SB), R1
CMPBEQ R1, $0, novec
JMP ·addVVvec(SB)
novec:
MOVD $0, R0
MOVD z_len+8(FP), R1
MOVD x_base+24(FP), R2
MOVD y_base+48(FP), R3
MOVD z_base+0(FP), R4
// compute unrolled loop lengths
MOVD R1, R5
AND $3, R5
SRD $2, R1
ADDC R0, R1 // clear carry
loop1:
CMPBEQ R5, $0, loop1done
loop1cont:
// unroll 1X
MOVD 0(R2), R6
MOVD 0(R3), R7
ADDE R7, R6
MOVD R6, 0(R4)
LAY 8(R2), R2 // ADD $8, R2
LAY 8(R3), R3 // ADD $8, R3
LAY 8(R4), R4 // ADD $8, R4
LAY -1(R5), R5 // ADD $-1, R5
CMPBNE R5, $0, loop1cont
loop1done:
loop4:
CMPBEQ R1, $0, loop4done
loop4cont:
// unroll 4X in batches of 2
MOVD 0(R2), R5
MOVD 8(R2), R6
MOVD 0(R3), R7
MOVD 8(R3), R8
ADDE R7, R5
ADDE R8, R6
MOVD R5, 0(R4)
MOVD R6, 8(R4)
MOVD 16(R2), R5
MOVD 24(R2), R6
MOVD 16(R3), R7
MOVD 24(R3), R8
ADDE R7, R5
ADDE R8, R6
MOVD R5, 16(R4)
MOVD R6, 24(R4)
LAY 32(R2), R2 // ADD $32, R2
LAY 32(R3), R3 // ADD $32, R3
LAY 32(R4), R4 // ADD $32, R4
LAY -1(R1), R1 // ADD $-1, R1
CMPBNE R1, $0, loop4cont
loop4done:
ADDE R0, R0, R2 // save & convert add carry
MOVD R2, c+72(FP)
RET
// func subVV(z, x, y []Word) (c Word)
TEXT ·subVV(SB), NOSPLIT, $0
MOVB ·hasVX(SB), R1
CMPBEQ R1, $0, novec
JMP ·subVVvec(SB)
novec:
MOVD $0, R0
MOVD z_len+8(FP), R1
MOVD x_base+24(FP), R2
MOVD y_base+48(FP), R3
MOVD z_base+0(FP), R4
// compute unrolled loop lengths
MOVD R1, R5
AND $3, R5
SRD $2, R1
SUBC R0, R1 // clear carry
loop1:
CMPBEQ R5, $0, loop1done
loop1cont:
// unroll 1X
MOVD 0(R2), R6
MOVD 0(R3), R7
SUBE R7, R6
MOVD R6, 0(R4)
LAY 8(R2), R2 // ADD $8, R2
LAY 8(R3), R3 // ADD $8, R3
LAY 8(R4), R4 // ADD $8, R4
LAY -1(R5), R5 // ADD $-1, R5
CMPBNE R5, $0, loop1cont
loop1done:
loop4:
CMPBEQ R1, $0, loop4done
loop4cont:
// unroll 4X in batches of 2
MOVD 0(R2), R5
MOVD 8(R2), R6
MOVD 0(R3), R7
MOVD 8(R3), R8
SUBE R7, R5
SUBE R8, R6
MOVD R5, 0(R4)
MOVD R6, 8(R4)
MOVD 16(R2), R5
MOVD 24(R2), R6
MOVD 16(R3), R7
MOVD 24(R3), R8
SUBE R7, R5
SUBE R8, R6
MOVD R5, 16(R4)
MOVD R6, 24(R4)
LAY 32(R2), R2 // ADD $32, R2
LAY 32(R3), R3 // ADD $32, R3
LAY 32(R4), R4 // ADD $32, R4
LAY -1(R1), R1 // ADD $-1, R1
CMPBNE R1, $0, loop4cont
loop4done:
SUBE R2, R2 // save carry
NEG R2 // convert sub carry
MOVD R2, c+72(FP)
RET
// func lshVU(z, x []Word, s uint) (c Word)
TEXT ·lshVU(SB), NOSPLIT, $0
MOVD $0, R0
MOVD z_len+8(FP), R1
CMPBEQ R1, $0, ret0
MOVD s+48(FP), R2
MOVD x_base+24(FP), R3
MOVD z_base+0(FP), R4
// run loop backward
SLD $3, R1, R5
LAY (R5)(R3), R3 // ADD R5, R3
SLD $3, R1, R5
LAY (R5)(R4), R4 // ADD R5, R4
// shift first word into carry
MOVD -8(R3), R5
MOVD $64, R6
SUBC R2, R6
SRD R6, R5, R7
SLD R2, R5
MOVD R7, c+56(FP)
// shift remaining words
SUBC $1, R1
// compute unrolled loop lengths
MOVD R1, R7
AND $3, R7
SRD $2, R1
loop1:
CMPBEQ R7, $0, loop1done
loop1cont:
// unroll 1X
MOVD -16(R3), R8
SRD R6, R8, R9
OR R5, R9
SLD R2, R8, R5
MOVD R9, -8(R4)
LAY -8(R3), R3 // ADD $-8, R3
LAY -8(R4), R4 // ADD $-8, R4
LAY -1(R7), R7 // ADD $-1, R7
CMPBNE R7, $0, loop1cont
loop1done:
loop4:
CMPBEQ R1, $0, loop4done
loop4cont:
// unroll 4X in batches of 2
MOVD -16(R3), R7
MOVD -24(R3), R8
SRD R6, R7, R9
OR R5, R9
SLD R2, R7, R5
SRD R6, R8, R7
OR R5, R7
SLD R2, R8, R5
MOVD R9, -8(R4)
MOVD R7, -16(R4)
MOVD -32(R3), R7
MOVD -40(R3), R8
SRD R6, R7, R9
OR R5, R9
SLD R2, R7, R5
SRD R6, R8, R7
OR R5, R7
SLD R2, R8, R5
MOVD R9, -24(R4)
MOVD R7, -32(R4)
LAY -32(R3), R3 // ADD $-32, R3
LAY -32(R4), R4 // ADD $-32, R4
LAY -1(R1), R1 // ADD $-1, R1
CMPBNE R1, $0, loop4cont
loop4done:
// store final shifted bits
MOVD R5, -8(R4)
RET
ret0:
MOVD R0, c+56(FP)
RET
// func rshVU(z, x []Word, s uint) (c Word)
TEXT ·rshVU(SB), NOSPLIT, $0
MOVD $0, R0
MOVD z_len+8(FP), R1
CMPBEQ R1, $0, ret0
MOVD s+48(FP), R2
MOVD x_base+24(FP), R3
MOVD z_base+0(FP), R4
// shift first word into carry
MOVD 0(R3), R5
MOVD $64, R6
SUBC R2, R6
SLD R6, R5, R7
SRD R2, R5
MOVD R7, c+56(FP)
// shift remaining words
SUBC $1, R1
// compute unrolled loop lengths
MOVD R1, R7
AND $3, R7
SRD $2, R1
loop1:
CMPBEQ R7, $0, loop1done
loop1cont:
// unroll 1X
MOVD 8(R3), R8
SLD R6, R8, R9
OR R5, R9
SRD R2, R8, R5
MOVD R9, 0(R4)
LAY 8(R3), R3 // ADD $8, R3
LAY 8(R4), R4 // ADD $8, R4
LAY -1(R7), R7 // ADD $-1, R7
CMPBNE R7, $0, loop1cont
loop1done:
loop4:
CMPBEQ R1, $0, loop4done
loop4cont:
// unroll 4X in batches of 2
MOVD 8(R3), R7
MOVD 16(R3), R8
SLD R6, R7, R9
OR R5, R9
SRD R2, R7, R5
SLD R6, R8, R7
OR R5, R7
SRD R2, R8, R5
MOVD R9, 0(R4)
MOVD R7, 8(R4)
MOVD 24(R3), R7
MOVD 32(R3), R8
SLD R6, R7, R9
OR R5, R9
SRD R2, R7, R5
SLD R6, R8, R7
OR R5, R7
SRD R2, R8, R5
MOVD R9, 16(R4)
MOVD R7, 24(R4)
LAY 32(R3), R3 // ADD $32, R3
LAY 32(R4), R4 // ADD $32, R4
LAY -1(R1), R1 // ADD $-1, R1
CMPBNE R1, $0, loop4cont
loop4done:
// store final shifted bits
MOVD R5, 0(R4)
RET
ret0:
MOVD R0, c+56(FP)
RET
// func mulAddVWW(z, x []Word, m, a Word) (c Word)
TEXT ·mulAddVWW(SB), NOSPLIT, $0
MOVD $0, R0
MOVD m+48(FP), R1
MOVD a+56(FP), R2
MOVD z_len+8(FP), R3
MOVD x_base+24(FP), R4
MOVD z_base+0(FP), R5
// compute unrolled loop lengths
MOVD R3, R6
AND $3, R6
SRD $2, R3
loop1:
CMPBEQ R6, $0, loop1done
loop1cont:
// unroll 1X in batches of 1
MOVD 0(R4), R11
// multiply
MLGR R1, R10
ADDC R2, R11
ADDE R0, R10, R2
MOVD R11, 0(R5)
LAY 8(R4), R4 // ADD $8, R4
LAY 8(R5), R5 // ADD $8, R5
LAY -1(R6), R6 // ADD $-1, R6
CMPBNE R6, $0, loop1cont
loop1done:
loop4:
CMPBEQ R3, $0, loop4done
loop4cont:
// unroll 4X in batches of 1
MOVD 0(R4), R11
// multiply
MLGR R1, R10
ADDC R2, R11
ADDE R0, R10, R2
MOVD R11, 0(R5)
MOVD 8(R4), R11
// multiply
MLGR R1, R10
ADDC R2, R11
ADDE R0, R10, R2
MOVD R11, 8(R5)
MOVD 16(R4), R11
// multiply
MLGR R1, R10
ADDC R2, R11
ADDE R0, R10, R2
MOVD R11, 16(R5)
MOVD 24(R4), R11
// multiply
MLGR R1, R10
ADDC R2, R11
ADDE R0, R10, R2
MOVD R11, 24(R5)
LAY 32(R4), R4 // ADD $32, R4
LAY 32(R5), R5 // ADD $32, R5
LAY -1(R3), R3 // ADD $-1, R3
CMPBNE R3, $0, loop4cont
loop4done:
MOVD R2, c+64(FP)
RET
// func addMulVVWW(z, x, y []Word, m, a Word) (c Word)
TEXT ·addMulVVWW(SB), NOSPLIT, $0
MOVD $0, R0
MOVD m+72(FP), R1
MOVD a+80(FP), R2
MOVD z_len+8(FP), R3
MOVD x_base+24(FP), R4
MOVD y_base+48(FP), R5
MOVD z_base+0(FP), R6
// compute unrolled loop lengths
MOVD R3, R7
AND $3, R7
SRD $2, R3
loop1:
CMPBEQ R7, $0, loop1done
loop1cont:
// unroll 1X in batches of 1
MOVD 0(R4), R8
MOVD 0(R5), R11
// multiply
MLGR R1, R10
ADDC R2, R11
ADDE R0, R10, R2
// add
ADDC R8, R11
ADDE R0, R2
MOVD R11, 0(R6)
LAY 8(R4), R4 // ADD $8, R4
LAY 8(R5), R5 // ADD $8, R5
LAY 8(R6), R6 // ADD $8, R6
LAY -1(R7), R7 // ADD $-1, R7
CMPBNE R7, $0, loop1cont
loop1done:
loop4:
CMPBEQ R3, $0, loop4done
loop4cont:
// unroll 4X in batches of 1
MOVD 0(R4), R7
MOVD 0(R5), R11
// multiply
MLGR R1, R10
ADDC R2, R11
ADDE R0, R10, R2
// add
ADDC R7, R11
ADDE R0, R2
MOVD R11, 0(R6)
MOVD 8(R4), R7
MOVD 8(R5), R11
// multiply
MLGR R1, R10
ADDC R2, R11
ADDE R0, R10, R2
// add
ADDC R7, R11
ADDE R0, R2
MOVD R11, 8(R6)
MOVD 16(R4), R7
MOVD 16(R5), R11
// multiply
MLGR R1, R10
ADDC R2, R11
ADDE R0, R10, R2
// add
ADDC R7, R11
ADDE R0, R2
MOVD R11, 16(R6)
MOVD 24(R4), R7
MOVD 24(R5), R11
// multiply
MLGR R1, R10
ADDC R2, R11
ADDE R0, R10, R2
// add
ADDC R7, R11
ADDE R0, R2
MOVD R11, 24(R6)
LAY 32(R4), R4 // ADD $32, R4
LAY 32(R5), R5 // ADD $32, R5
LAY 32(R6), R6 // ADD $32, R6
LAY -1(R3), R3 // ADD $-1, R3
CMPBNE R3, $0, loop4cont
loop4done:
MOVD R2, c+88(FP)
RET