| // Copyright 2025 The Go Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style |
| // license that can be found in the LICENSE file. |
| |
| // Code generated by 'go generate' (with ./internal/asmgen). DO NOT EDIT. |
| |
| //go:build !math_big_pure_go && (ppc64 || ppc64le) |
| |
| #include "textflag.h" |
| |
| // func addVV(z, x, y []Word) (c Word) |
| TEXT ·addVV(SB), NOSPLIT, $0 |
| MOVD z_len+8(FP), R3 |
| MOVD x_base+24(FP), R4 |
| MOVD y_base+48(FP), R5 |
| MOVD z_base+0(FP), R6 |
| // compute unrolled loop lengths |
| ANDCC $3, R3, R7 |
| SRD $2, R3 |
| ADDC R0, R3 // clear carry |
| loop1: |
| CMP R7, $0; BEQ loop1done; MOVD R7, CTR |
| loop1cont: |
| // unroll 1X |
| MOVD 0(R4), R8 |
| MOVD 0(R5), R9 |
| ADDE R9, R8 |
| MOVD R8, 0(R6) |
| ADD $8, R4 |
| ADD $8, R5 |
| ADD $8, R6 |
| BDNZ loop1cont |
| loop1done: |
| loop4: |
| CMP R3, $0; BEQ loop4done; MOVD R3, CTR |
| loop4cont: |
| // unroll 4X |
| MOVD 0(R4), R7 |
| MOVD 8(R4), R8 |
| MOVD 16(R4), R9 |
| MOVD 24(R4), R10 |
| MOVD 0(R5), R11 |
| MOVD 8(R5), R12 |
| MOVD 16(R5), R14 |
| MOVD 24(R5), R15 |
| ADDE R11, R7 |
| ADDE R12, R8 |
| ADDE R14, R9 |
| ADDE R15, R10 |
| MOVD R7, 0(R6) |
| MOVD R8, 8(R6) |
| MOVD R9, 16(R6) |
| MOVD R10, 24(R6) |
| ADD $32, R4 |
| ADD $32, R5 |
| ADD $32, R6 |
| BDNZ loop4cont |
| loop4done: |
| ADDE R0, R0, R4 // save & convert add carry |
| MOVD R4, c+72(FP) |
| RET |
| |
| // func subVV(z, x, y []Word) (c Word) |
| TEXT ·subVV(SB), NOSPLIT, $0 |
| MOVD z_len+8(FP), R3 |
| MOVD x_base+24(FP), R4 |
| MOVD y_base+48(FP), R5 |
| MOVD z_base+0(FP), R6 |
| // compute unrolled loop lengths |
| ANDCC $3, R3, R7 |
| SRD $2, R3 |
| SUBC R0, R3 // clear carry |
| loop1: |
| CMP R7, $0; BEQ loop1done; MOVD R7, CTR |
| loop1cont: |
| // unroll 1X |
| MOVD 0(R4), R8 |
| MOVD 0(R5), R9 |
| SUBE R9, R8 |
| MOVD R8, 0(R6) |
| ADD $8, R4 |
| ADD $8, R5 |
| ADD $8, R6 |
| BDNZ loop1cont |
| loop1done: |
| loop4: |
| CMP R3, $0; BEQ loop4done; MOVD R3, CTR |
| loop4cont: |
| // unroll 4X |
| MOVD 0(R4), R7 |
| MOVD 8(R4), R8 |
| MOVD 16(R4), R9 |
| MOVD 24(R4), R10 |
| MOVD 0(R5), R11 |
| MOVD 8(R5), R12 |
| MOVD 16(R5), R14 |
| MOVD 24(R5), R15 |
| SUBE R11, R7 |
| SUBE R12, R8 |
| SUBE R14, R9 |
| SUBE R15, R10 |
| MOVD R7, 0(R6) |
| MOVD R8, 8(R6) |
| MOVD R9, 16(R6) |
| MOVD R10, 24(R6) |
| ADD $32, R4 |
| ADD $32, R5 |
| ADD $32, R6 |
| BDNZ loop4cont |
| loop4done: |
| SUBE R4, R4 // save carry |
| SUB R4, R0, R4 // convert sub carry |
| MOVD R4, c+72(FP) |
| RET |
| |
| // func lshVU(z, x []Word, s uint) (c Word) |
| TEXT ·lshVU(SB), NOSPLIT, $0 |
| MOVD z_len+8(FP), R3 |
| CMP R3, $0; BEQ ret0 |
| MOVD s+48(FP), R4 |
| MOVD x_base+24(FP), R5 |
| MOVD z_base+0(FP), R6 |
| // run loop backward |
| SLD $3, R3, R7 |
| ADD R7, R5 |
| SLD $3, R3, R7 |
| ADD R7, R6 |
| // shift first word into carry |
| MOVD -8(R5), R7 |
| MOVD $64, R8 |
| SUB R4, R8 |
| SRD R8, R7, R9 |
| SLD R4, R7 |
| MOVD R9, c+56(FP) |
| // shift remaining words |
| SUB $1, R3 |
| // compute unrolled loop lengths |
| ANDCC $3, R3, R9 |
| SRD $2, R3 |
| loop1: |
| CMP R9, $0; BEQ loop1done; MOVD R9, CTR |
| loop1cont: |
| // unroll 1X |
| MOVD -16(R5), R10 |
| SRD R8, R10, R11 |
| OR R7, R11 |
| SLD R4, R10, R7 |
| MOVD R11, -8(R6) |
| ADD $-8, R5 |
| ADD $-8, R6 |
| BDNZ loop1cont |
| loop1done: |
| loop4: |
| CMP R3, $0; BEQ loop4done; MOVD R3, CTR |
| loop4cont: |
| // unroll 4X |
| MOVD -16(R5), R9 |
| MOVD -24(R5), R10 |
| MOVD -32(R5), R11 |
| MOVD -40(R5), R12 |
| SRD R8, R9, R14 |
| OR R7, R14 |
| SLD R4, R9, R7 |
| SRD R8, R10, R9 |
| OR R7, R9 |
| SLD R4, R10, R7 |
| SRD R8, R11, R10 |
| OR R7, R10 |
| SLD R4, R11, R7 |
| SRD R8, R12, R11 |
| OR R7, R11 |
| SLD R4, R12, R7 |
| MOVD R14, -8(R6) |
| MOVD R9, -16(R6) |
| MOVD R10, -24(R6) |
| MOVD R11, -32(R6) |
| ADD $-32, R5 |
| ADD $-32, R6 |
| BDNZ loop4cont |
| loop4done: |
| // store final shifted bits |
| MOVD R7, -8(R6) |
| RET |
| ret0: |
| MOVD R0, c+56(FP) |
| RET |
| |
| // func rshVU(z, x []Word, s uint) (c Word) |
| TEXT ·rshVU(SB), NOSPLIT, $0 |
| MOVD z_len+8(FP), R3 |
| CMP R3, $0; BEQ ret0 |
| MOVD s+48(FP), R4 |
| MOVD x_base+24(FP), R5 |
| MOVD z_base+0(FP), R6 |
| // shift first word into carry |
| MOVD 0(R5), R7 |
| MOVD $64, R8 |
| SUB R4, R8 |
| SLD R8, R7, R9 |
| SRD R4, R7 |
| MOVD R9, c+56(FP) |
| // shift remaining words |
| SUB $1, R3 |
| // compute unrolled loop lengths |
| ANDCC $3, R3, R9 |
| SRD $2, R3 |
| loop1: |
| CMP R9, $0; BEQ loop1done; MOVD R9, CTR |
| loop1cont: |
| // unroll 1X |
| MOVD 8(R5), R10 |
| SLD R8, R10, R11 |
| OR R7, R11 |
| SRD R4, R10, R7 |
| MOVD R11, 0(R6) |
| ADD $8, R5 |
| ADD $8, R6 |
| BDNZ loop1cont |
| loop1done: |
| loop4: |
| CMP R3, $0; BEQ loop4done; MOVD R3, CTR |
| loop4cont: |
| // unroll 4X |
| MOVD 8(R5), R9 |
| MOVD 16(R5), R10 |
| MOVD 24(R5), R11 |
| MOVD 32(R5), R12 |
| SLD R8, R9, R14 |
| OR R7, R14 |
| SRD R4, R9, R7 |
| SLD R8, R10, R9 |
| OR R7, R9 |
| SRD R4, R10, R7 |
| SLD R8, R11, R10 |
| OR R7, R10 |
| SRD R4, R11, R7 |
| SLD R8, R12, R11 |
| OR R7, R11 |
| SRD R4, R12, R7 |
| MOVD R14, 0(R6) |
| MOVD R9, 8(R6) |
| MOVD R10, 16(R6) |
| MOVD R11, 24(R6) |
| ADD $32, R5 |
| ADD $32, R6 |
| BDNZ loop4cont |
| loop4done: |
| // store final shifted bits |
| MOVD R7, 0(R6) |
| RET |
| ret0: |
| MOVD R0, c+56(FP) |
| RET |
| |
| // func mulAddVWW(z, x []Word, m, a Word) (c Word) |
| TEXT ·mulAddVWW(SB), NOSPLIT, $0 |
| MOVD m+48(FP), R3 |
| MOVD a+56(FP), R4 |
| MOVD z_len+8(FP), R5 |
| MOVD x_base+24(FP), R6 |
| MOVD z_base+0(FP), R7 |
| // compute unrolled loop lengths |
| ANDCC $3, R5, R8 |
| SRD $2, R5 |
| loop1: |
| CMP R8, $0; BEQ loop1done; MOVD R8, CTR |
| loop1cont: |
| // unroll 1X |
| MOVD 0(R6), R9 |
| // multiply |
| MULHDU R3, R9, R10 |
| MULLD R3, R9 |
| ADDC R4, R9 |
| ADDE R0, R10, R4 |
| MOVD R9, 0(R7) |
| ADD $8, R6 |
| ADD $8, R7 |
| BDNZ loop1cont |
| loop1done: |
| loop4: |
| CMP R5, $0; BEQ loop4done; MOVD R5, CTR |
| loop4cont: |
| // unroll 4X |
| MOVD 0(R6), R8 |
| MOVD 8(R6), R9 |
| MOVD 16(R6), R10 |
| MOVD 24(R6), R11 |
| // multiply |
| MULHDU R3, R8, R12 |
| MULLD R3, R8 |
| ADDC R4, R8 |
| MULHDU R3, R9, R14 |
| MULLD R3, R9 |
| ADDE R12, R9 |
| MULHDU R3, R10, R12 |
| MULLD R3, R10 |
| ADDE R14, R10 |
| MULHDU R3, R11, R14 |
| MULLD R3, R11 |
| ADDE R12, R11 |
| ADDE R0, R14, R4 |
| MOVD R8, 0(R7) |
| MOVD R9, 8(R7) |
| MOVD R10, 16(R7) |
| MOVD R11, 24(R7) |
| ADD $32, R6 |
| ADD $32, R7 |
| BDNZ loop4cont |
| loop4done: |
| MOVD R4, c+64(FP) |
| RET |
| |
| // func addMulVVWW(z, x, y []Word, m, a Word) (c Word) |
| TEXT ·addMulVVWW(SB), NOSPLIT, $0 |
| MOVD m+72(FP), R3 |
| MOVD a+80(FP), R4 |
| MOVD z_len+8(FP), R5 |
| MOVD x_base+24(FP), R6 |
| MOVD y_base+48(FP), R7 |
| MOVD z_base+0(FP), R8 |
| // compute unrolled loop lengths |
| ANDCC $3, R5, R9 |
| SRD $2, R5 |
| loop1: |
| CMP R9, $0; BEQ loop1done; MOVD R9, CTR |
| loop1cont: |
| // unroll 1X |
| MOVD 0(R6), R10 |
| MOVD 0(R7), R11 |
| // multiply |
| MULHDU R3, R11, R12 |
| MULLD R3, R11 |
| ADDC R4, R11 |
| ADDE R0, R12, R4 |
| // add |
| ADDC R10, R11 |
| ADDE R0, R4 |
| MOVD R11, 0(R8) |
| ADD $8, R6 |
| ADD $8, R7 |
| ADD $8, R8 |
| BDNZ loop1cont |
| loop1done: |
| loop4: |
| CMP R5, $0; BEQ loop4done; MOVD R5, CTR |
| loop4cont: |
| // unroll 4X |
| MOVD 0(R6), R9 |
| MOVD 8(R6), R10 |
| MOVD 16(R6), R11 |
| MOVD 24(R6), R12 |
| MOVD 0(R7), R14 |
| MOVD 8(R7), R15 |
| MOVD 16(R7), R16 |
| MOVD 24(R7), R17 |
| // multiply |
| MULHDU R3, R14, R18 |
| MULLD R3, R14 |
| ADDC R4, R14 |
| MULHDU R3, R15, R19 |
| MULLD R3, R15 |
| ADDE R18, R15 |
| MULHDU R3, R16, R18 |
| MULLD R3, R16 |
| ADDE R19, R16 |
| MULHDU R3, R17, R19 |
| MULLD R3, R17 |
| ADDE R18, R17 |
| ADDE R0, R19, R4 |
| // add |
| ADDC R9, R14 |
| ADDE R10, R15 |
| ADDE R11, R16 |
| ADDE R12, R17 |
| ADDE R0, R4 |
| MOVD R14, 0(R8) |
| MOVD R15, 8(R8) |
| MOVD R16, 16(R8) |
| MOVD R17, 24(R8) |
| ADD $32, R6 |
| ADD $32, R7 |
| ADD $32, R8 |
| BDNZ loop4cont |
| loop4done: |
| MOVD R4, c+88(FP) |
| RET |