blob: 1bcd30d7e5c90fb879b899f95b1d4a4159fcc427 [file] [log] [blame]
// Copyright 2025 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Code generated by 'go generate' (with ./internal/asmgen). DO NOT EDIT.
//go:build !math_big_pure_go && (ppc64 || ppc64le)
#include "textflag.h"
// func addVV(z, x, y []Word) (c Word)
TEXT ·addVV(SB), NOSPLIT, $0
MOVD z_len+8(FP), R3
MOVD x_base+24(FP), R4
MOVD y_base+48(FP), R5
MOVD z_base+0(FP), R6
// compute unrolled loop lengths
ANDCC $3, R3, R7
SRD $2, R3
ADDC R0, R3 // clear carry
loop1:
CMP R7, $0; BEQ loop1done; MOVD R7, CTR
loop1cont:
// unroll 1X
MOVD 0(R4), R8
MOVD 0(R5), R9
ADDE R9, R8
MOVD R8, 0(R6)
ADD $8, R4
ADD $8, R5
ADD $8, R6
BDNZ loop1cont
loop1done:
loop4:
CMP R3, $0; BEQ loop4done; MOVD R3, CTR
loop4cont:
// unroll 4X
MOVD 0(R4), R7
MOVD 8(R4), R8
MOVD 16(R4), R9
MOVD 24(R4), R10
MOVD 0(R5), R11
MOVD 8(R5), R12
MOVD 16(R5), R14
MOVD 24(R5), R15
ADDE R11, R7
ADDE R12, R8
ADDE R14, R9
ADDE R15, R10
MOVD R7, 0(R6)
MOVD R8, 8(R6)
MOVD R9, 16(R6)
MOVD R10, 24(R6)
ADD $32, R4
ADD $32, R5
ADD $32, R6
BDNZ loop4cont
loop4done:
ADDE R0, R0, R4 // save & convert add carry
MOVD R4, c+72(FP)
RET
// func subVV(z, x, y []Word) (c Word)
TEXT ·subVV(SB), NOSPLIT, $0
MOVD z_len+8(FP), R3
MOVD x_base+24(FP), R4
MOVD y_base+48(FP), R5
MOVD z_base+0(FP), R6
// compute unrolled loop lengths
ANDCC $3, R3, R7
SRD $2, R3
SUBC R0, R3 // clear carry
loop1:
CMP R7, $0; BEQ loop1done; MOVD R7, CTR
loop1cont:
// unroll 1X
MOVD 0(R4), R8
MOVD 0(R5), R9
SUBE R9, R8
MOVD R8, 0(R6)
ADD $8, R4
ADD $8, R5
ADD $8, R6
BDNZ loop1cont
loop1done:
loop4:
CMP R3, $0; BEQ loop4done; MOVD R3, CTR
loop4cont:
// unroll 4X
MOVD 0(R4), R7
MOVD 8(R4), R8
MOVD 16(R4), R9
MOVD 24(R4), R10
MOVD 0(R5), R11
MOVD 8(R5), R12
MOVD 16(R5), R14
MOVD 24(R5), R15
SUBE R11, R7
SUBE R12, R8
SUBE R14, R9
SUBE R15, R10
MOVD R7, 0(R6)
MOVD R8, 8(R6)
MOVD R9, 16(R6)
MOVD R10, 24(R6)
ADD $32, R4
ADD $32, R5
ADD $32, R6
BDNZ loop4cont
loop4done:
SUBE R4, R4 // save carry
SUB R4, R0, R4 // convert sub carry
MOVD R4, c+72(FP)
RET
// func lshVU(z, x []Word, s uint) (c Word)
TEXT ·lshVU(SB), NOSPLIT, $0
MOVD z_len+8(FP), R3
CMP R3, $0; BEQ ret0
MOVD s+48(FP), R4
MOVD x_base+24(FP), R5
MOVD z_base+0(FP), R6
// run loop backward
SLD $3, R3, R7
ADD R7, R5
SLD $3, R3, R7
ADD R7, R6
// shift first word into carry
MOVD -8(R5), R7
MOVD $64, R8
SUB R4, R8
SRD R8, R7, R9
SLD R4, R7
MOVD R9, c+56(FP)
// shift remaining words
SUB $1, R3
// compute unrolled loop lengths
ANDCC $3, R3, R9
SRD $2, R3
loop1:
CMP R9, $0; BEQ loop1done; MOVD R9, CTR
loop1cont:
// unroll 1X
MOVD -16(R5), R10
SRD R8, R10, R11
OR R7, R11
SLD R4, R10, R7
MOVD R11, -8(R6)
ADD $-8, R5
ADD $-8, R6
BDNZ loop1cont
loop1done:
loop4:
CMP R3, $0; BEQ loop4done; MOVD R3, CTR
loop4cont:
// unroll 4X
MOVD -16(R5), R9
MOVD -24(R5), R10
MOVD -32(R5), R11
MOVD -40(R5), R12
SRD R8, R9, R14
OR R7, R14
SLD R4, R9, R7
SRD R8, R10, R9
OR R7, R9
SLD R4, R10, R7
SRD R8, R11, R10
OR R7, R10
SLD R4, R11, R7
SRD R8, R12, R11
OR R7, R11
SLD R4, R12, R7
MOVD R14, -8(R6)
MOVD R9, -16(R6)
MOVD R10, -24(R6)
MOVD R11, -32(R6)
ADD $-32, R5
ADD $-32, R6
BDNZ loop4cont
loop4done:
// store final shifted bits
MOVD R7, -8(R6)
RET
ret0:
MOVD R0, c+56(FP)
RET
// func rshVU(z, x []Word, s uint) (c Word)
TEXT ·rshVU(SB), NOSPLIT, $0
MOVD z_len+8(FP), R3
CMP R3, $0; BEQ ret0
MOVD s+48(FP), R4
MOVD x_base+24(FP), R5
MOVD z_base+0(FP), R6
// shift first word into carry
MOVD 0(R5), R7
MOVD $64, R8
SUB R4, R8
SLD R8, R7, R9
SRD R4, R7
MOVD R9, c+56(FP)
// shift remaining words
SUB $1, R3
// compute unrolled loop lengths
ANDCC $3, R3, R9
SRD $2, R3
loop1:
CMP R9, $0; BEQ loop1done; MOVD R9, CTR
loop1cont:
// unroll 1X
MOVD 8(R5), R10
SLD R8, R10, R11
OR R7, R11
SRD R4, R10, R7
MOVD R11, 0(R6)
ADD $8, R5
ADD $8, R6
BDNZ loop1cont
loop1done:
loop4:
CMP R3, $0; BEQ loop4done; MOVD R3, CTR
loop4cont:
// unroll 4X
MOVD 8(R5), R9
MOVD 16(R5), R10
MOVD 24(R5), R11
MOVD 32(R5), R12
SLD R8, R9, R14
OR R7, R14
SRD R4, R9, R7
SLD R8, R10, R9
OR R7, R9
SRD R4, R10, R7
SLD R8, R11, R10
OR R7, R10
SRD R4, R11, R7
SLD R8, R12, R11
OR R7, R11
SRD R4, R12, R7
MOVD R14, 0(R6)
MOVD R9, 8(R6)
MOVD R10, 16(R6)
MOVD R11, 24(R6)
ADD $32, R5
ADD $32, R6
BDNZ loop4cont
loop4done:
// store final shifted bits
MOVD R7, 0(R6)
RET
ret0:
MOVD R0, c+56(FP)
RET
// func mulAddVWW(z, x []Word, m, a Word) (c Word)
TEXT ·mulAddVWW(SB), NOSPLIT, $0
MOVD m+48(FP), R3
MOVD a+56(FP), R4
MOVD z_len+8(FP), R5
MOVD x_base+24(FP), R6
MOVD z_base+0(FP), R7
// compute unrolled loop lengths
ANDCC $3, R5, R8
SRD $2, R5
loop1:
CMP R8, $0; BEQ loop1done; MOVD R8, CTR
loop1cont:
// unroll 1X
MOVD 0(R6), R9
// multiply
MULHDU R3, R9, R10
MULLD R3, R9
ADDC R4, R9
ADDE R0, R10, R4
MOVD R9, 0(R7)
ADD $8, R6
ADD $8, R7
BDNZ loop1cont
loop1done:
loop4:
CMP R5, $0; BEQ loop4done; MOVD R5, CTR
loop4cont:
// unroll 4X
MOVD 0(R6), R8
MOVD 8(R6), R9
MOVD 16(R6), R10
MOVD 24(R6), R11
// multiply
MULHDU R3, R8, R12
MULLD R3, R8
ADDC R4, R8
MULHDU R3, R9, R14
MULLD R3, R9
ADDE R12, R9
MULHDU R3, R10, R12
MULLD R3, R10
ADDE R14, R10
MULHDU R3, R11, R14
MULLD R3, R11
ADDE R12, R11
ADDE R0, R14, R4
MOVD R8, 0(R7)
MOVD R9, 8(R7)
MOVD R10, 16(R7)
MOVD R11, 24(R7)
ADD $32, R6
ADD $32, R7
BDNZ loop4cont
loop4done:
MOVD R4, c+64(FP)
RET
// func addMulVVWW(z, x, y []Word, m, a Word) (c Word)
TEXT ·addMulVVWW(SB), NOSPLIT, $0
MOVD m+72(FP), R3
MOVD a+80(FP), R4
MOVD z_len+8(FP), R5
MOVD x_base+24(FP), R6
MOVD y_base+48(FP), R7
MOVD z_base+0(FP), R8
// compute unrolled loop lengths
ANDCC $3, R5, R9
SRD $2, R5
loop1:
CMP R9, $0; BEQ loop1done; MOVD R9, CTR
loop1cont:
// unroll 1X
MOVD 0(R6), R10
MOVD 0(R7), R11
// multiply
MULHDU R3, R11, R12
MULLD R3, R11
ADDC R4, R11
ADDE R0, R12, R4
// add
ADDC R10, R11
ADDE R0, R4
MOVD R11, 0(R8)
ADD $8, R6
ADD $8, R7
ADD $8, R8
BDNZ loop1cont
loop1done:
loop4:
CMP R5, $0; BEQ loop4done; MOVD R5, CTR
loop4cont:
// unroll 4X
MOVD 0(R6), R9
MOVD 8(R6), R10
MOVD 16(R6), R11
MOVD 24(R6), R12
MOVD 0(R7), R14
MOVD 8(R7), R15
MOVD 16(R7), R16
MOVD 24(R7), R17
// multiply
MULHDU R3, R14, R18
MULLD R3, R14
ADDC R4, R14
MULHDU R3, R15, R19
MULLD R3, R15
ADDE R18, R15
MULHDU R3, R16, R18
MULLD R3, R16
ADDE R19, R16
MULHDU R3, R17, R19
MULLD R3, R17
ADDE R18, R17
ADDE R0, R19, R4
// add
ADDC R9, R14
ADDE R10, R15
ADDE R11, R16
ADDE R12, R17
ADDE R0, R4
MOVD R14, 0(R8)
MOVD R15, 8(R8)
MOVD R16, 16(R8)
MOVD R17, 24(R8)
ADD $32, R6
ADD $32, R7
ADD $32, R8
BDNZ loop4cont
loop4done:
MOVD R4, c+88(FP)
RET