| // Copyright 2025 The Go Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style |
| // license that can be found in the LICENSE file. |
| |
| // Code generated by 'go generate' (with ./internal/asmgen). DO NOT EDIT. |
| |
| //go:build !math_big_pure_go |
| |
| #include "textflag.h" |
| |
| // func addVV(z, x, y []Word) (c Word) |
| TEXT ·addVV(SB), NOSPLIT, $0 |
| MOVW z_len+4(FP), R0 |
| MOVW x_base+12(FP), R1 |
| MOVW y_base+24(FP), R2 |
| MOVW z_base+0(FP), R3 |
| // compute unrolled loop lengths |
| AND $3, R0, R4 |
| MOVW R0>>2, R0 |
| ADD.S $0, R0 // clear carry |
| loop1: |
| TEQ $0, R4; BEQ loop1done |
| loop1cont: |
| // unroll 1X |
| MOVW.P 4(R1), R5 |
| MOVW.P 4(R2), R6 |
| ADC.S R6, R5 |
| MOVW.P R5, 4(R3) |
| SUB $1, R4 |
| TEQ $0, R4; BNE loop1cont |
| loop1done: |
| loop4: |
| TEQ $0, R0; BEQ loop4done |
| loop4cont: |
| // unroll 4X |
| MOVW.P 4(R1), R4 |
| MOVW.P 4(R1), R5 |
| MOVW.P 4(R1), R6 |
| MOVW.P 4(R1), R7 |
| MOVW.P 4(R2), R8 |
| MOVW.P 4(R2), R9 |
| MOVW.P 4(R2), R11 |
| MOVW.P 4(R2), R12 |
| ADC.S R8, R4 |
| ADC.S R9, R5 |
| ADC.S R11, R6 |
| ADC.S R12, R7 |
| MOVW.P R4, 4(R3) |
| MOVW.P R5, 4(R3) |
| MOVW.P R6, 4(R3) |
| MOVW.P R7, 4(R3) |
| SUB $1, R0 |
| TEQ $0, R0; BNE loop4cont |
| loop4done: |
| SBC R1, R1 // save carry |
| ADD $1, R1 // convert add carry |
| MOVW R1, c+36(FP) |
| RET |
| |
| // func subVV(z, x, y []Word) (c Word) |
| TEXT ·subVV(SB), NOSPLIT, $0 |
| MOVW z_len+4(FP), R0 |
| MOVW x_base+12(FP), R1 |
| MOVW y_base+24(FP), R2 |
| MOVW z_base+0(FP), R3 |
| // compute unrolled loop lengths |
| AND $3, R0, R4 |
| MOVW R0>>2, R0 |
| SUB.S $0, R0 // clear carry |
| loop1: |
| TEQ $0, R4; BEQ loop1done |
| loop1cont: |
| // unroll 1X |
| MOVW.P 4(R1), R5 |
| MOVW.P 4(R2), R6 |
| SBC.S R6, R5 |
| MOVW.P R5, 4(R3) |
| SUB $1, R4 |
| TEQ $0, R4; BNE loop1cont |
| loop1done: |
| loop4: |
| TEQ $0, R0; BEQ loop4done |
| loop4cont: |
| // unroll 4X |
| MOVW.P 4(R1), R4 |
| MOVW.P 4(R1), R5 |
| MOVW.P 4(R1), R6 |
| MOVW.P 4(R1), R7 |
| MOVW.P 4(R2), R8 |
| MOVW.P 4(R2), R9 |
| MOVW.P 4(R2), R11 |
| MOVW.P 4(R2), R12 |
| SBC.S R8, R4 |
| SBC.S R9, R5 |
| SBC.S R11, R6 |
| SBC.S R12, R7 |
| MOVW.P R4, 4(R3) |
| MOVW.P R5, 4(R3) |
| MOVW.P R6, 4(R3) |
| MOVW.P R7, 4(R3) |
| SUB $1, R0 |
| TEQ $0, R0; BNE loop4cont |
| loop4done: |
| SBC R1, R1 // save carry |
| RSB $0, R1, R1 // convert sub carry |
| MOVW R1, c+36(FP) |
| RET |
| |
| // func lshVU(z, x []Word, s uint) (c Word) |
| TEXT ·lshVU(SB), NOSPLIT, $0 |
| MOVW z_len+4(FP), R0 |
| TEQ $0, R0; BEQ ret0 |
| MOVW s+24(FP), R1 |
| MOVW x_base+12(FP), R2 |
| MOVW z_base+0(FP), R3 |
| // run loop backward |
| ADD R0<<2, R2, R2 |
| ADD R0<<2, R3, R3 |
| // shift first word into carry |
| MOVW.W -4(R2), R4 |
| MOVW $32, R5 |
| SUB R1, R5 |
| MOVW R4>>R5, R6 |
| MOVW R4<<R1, R4 |
| MOVW R6, c+28(FP) |
| // shift remaining words |
| SUB $1, R0 |
| // compute unrolled loop lengths |
| AND $3, R0, R6 |
| MOVW R0>>2, R0 |
| loop1: |
| TEQ $0, R6; BEQ loop1done |
| loop1cont: |
| // unroll 1X |
| MOVW.W -4(R2), R7 |
| ORR R7>>R5, R4 |
| MOVW.W R4, -4(R3) |
| MOVW R7<<R1, R4 |
| SUB $1, R6 |
| TEQ $0, R6; BNE loop1cont |
| loop1done: |
| loop4: |
| TEQ $0, R0; BEQ loop4done |
| loop4cont: |
| // unroll 4X |
| MOVW.W -4(R2), R6 |
| MOVW.W -4(R2), R7 |
| MOVW.W -4(R2), R8 |
| MOVW.W -4(R2), R9 |
| ORR R6>>R5, R4 |
| MOVW.W R4, -4(R3) |
| MOVW R6<<R1, R4 |
| ORR R7>>R5, R4 |
| MOVW.W R4, -4(R3) |
| MOVW R7<<R1, R4 |
| ORR R8>>R5, R4 |
| MOVW.W R4, -4(R3) |
| MOVW R8<<R1, R4 |
| ORR R9>>R5, R4 |
| MOVW.W R4, -4(R3) |
| MOVW R9<<R1, R4 |
| SUB $1, R0 |
| TEQ $0, R0; BNE loop4cont |
| loop4done: |
| // store final shifted bits |
| MOVW.W R4, -4(R3) |
| RET |
| ret0: |
| MOVW $0, R1 |
| MOVW R1, c+28(FP) |
| RET |
| |
| // func rshVU(z, x []Word, s uint) (c Word) |
| TEXT ·rshVU(SB), NOSPLIT, $0 |
| MOVW z_len+4(FP), R0 |
| TEQ $0, R0; BEQ ret0 |
| MOVW s+24(FP), R1 |
| MOVW x_base+12(FP), R2 |
| MOVW z_base+0(FP), R3 |
| // shift first word into carry |
| MOVW.P 4(R2), R4 |
| MOVW $32, R5 |
| SUB R1, R5 |
| MOVW R4<<R5, R6 |
| MOVW R4>>R1, R4 |
| MOVW R6, c+28(FP) |
| // shift remaining words |
| SUB $1, R0 |
| // compute unrolled loop lengths |
| AND $3, R0, R6 |
| MOVW R0>>2, R0 |
| loop1: |
| TEQ $0, R6; BEQ loop1done |
| loop1cont: |
| // unroll 1X |
| MOVW.P 4(R2), R7 |
| ORR R7<<R5, R4 |
| MOVW.P R4, 4(R3) |
| MOVW R7>>R1, R4 |
| SUB $1, R6 |
| TEQ $0, R6; BNE loop1cont |
| loop1done: |
| loop4: |
| TEQ $0, R0; BEQ loop4done |
| loop4cont: |
| // unroll 4X |
| MOVW.P 4(R2), R6 |
| MOVW.P 4(R2), R7 |
| MOVW.P 4(R2), R8 |
| MOVW.P 4(R2), R9 |
| ORR R6<<R5, R4 |
| MOVW.P R4, 4(R3) |
| MOVW R6>>R1, R4 |
| ORR R7<<R5, R4 |
| MOVW.P R4, 4(R3) |
| MOVW R7>>R1, R4 |
| ORR R8<<R5, R4 |
| MOVW.P R4, 4(R3) |
| MOVW R8>>R1, R4 |
| ORR R9<<R5, R4 |
| MOVW.P R4, 4(R3) |
| MOVW R9>>R1, R4 |
| SUB $1, R0 |
| TEQ $0, R0; BNE loop4cont |
| loop4done: |
| // store final shifted bits |
| MOVW.P R4, 4(R3) |
| RET |
| ret0: |
| MOVW $0, R1 |
| MOVW R1, c+28(FP) |
| RET |
| |
| // func mulAddVWW(z, x []Word, m, a Word) (c Word) |
| TEXT ·mulAddVWW(SB), NOSPLIT, $0 |
| MOVW m+24(FP), R0 |
| MOVW a+28(FP), R1 |
| MOVW z_len+4(FP), R2 |
| MOVW x_base+12(FP), R3 |
| MOVW z_base+0(FP), R4 |
| // compute unrolled loop lengths |
| AND $3, R2, R5 |
| MOVW R2>>2, R2 |
| loop1: |
| TEQ $0, R5; BEQ loop1done |
| loop1cont: |
| // unroll 1X |
| MOVW.P 4(R3), R6 |
| // multiply |
| MULLU R0, R6, (R7, R6) |
| ADD.S R1, R6 |
| ADC $0, R7, R1 |
| MOVW.P R6, 4(R4) |
| SUB $1, R5 |
| TEQ $0, R5; BNE loop1cont |
| loop1done: |
| loop4: |
| TEQ $0, R2; BEQ loop4done |
| loop4cont: |
| // unroll 4X in batches of 2 |
| MOVW.P 4(R3), R5 |
| MOVW.P 4(R3), R6 |
| // multiply |
| MULLU R0, R5, (R7, R5) |
| ADD.S R1, R5 |
| MULLU R0, R6, (R8, R6) |
| ADC.S R7, R6 |
| ADC $0, R8, R1 |
| MOVW.P R5, 4(R4) |
| MOVW.P R6, 4(R4) |
| MOVW.P 4(R3), R5 |
| MOVW.P 4(R3), R6 |
| // multiply |
| MULLU R0, R5, (R7, R5) |
| ADD.S R1, R5 |
| MULLU R0, R6, (R8, R6) |
| ADC.S R7, R6 |
| ADC $0, R8, R1 |
| MOVW.P R5, 4(R4) |
| MOVW.P R6, 4(R4) |
| SUB $1, R2 |
| TEQ $0, R2; BNE loop4cont |
| loop4done: |
| MOVW R1, c+32(FP) |
| RET |
| |
| // func addMulVVWW(z, x, y []Word, m, a Word) (c Word) |
| TEXT ·addMulVVWW(SB), NOSPLIT, $0 |
| MOVW m+36(FP), R0 |
| MOVW a+40(FP), R1 |
| MOVW z_len+4(FP), R2 |
| MOVW x_base+12(FP), R3 |
| MOVW y_base+24(FP), R4 |
| MOVW z_base+0(FP), R5 |
| // compute unrolled loop lengths |
| AND $3, R2, R6 |
| MOVW R2>>2, R2 |
| loop1: |
| TEQ $0, R6; BEQ loop1done |
| loop1cont: |
| // unroll 1X |
| MOVW.P 4(R3), R7 |
| MOVW.P 4(R4), R8 |
| // multiply |
| MULLU R0, R8, (R9, R8) |
| ADD.S R1, R8 |
| ADC $0, R9, R1 |
| // add |
| ADD.S R7, R8 |
| ADC $0, R1 |
| MOVW.P R8, 4(R5) |
| SUB $1, R6 |
| TEQ $0, R6; BNE loop1cont |
| loop1done: |
| loop4: |
| TEQ $0, R2; BEQ loop4done |
| loop4cont: |
| // unroll 4X in batches of 2 |
| MOVW.P 4(R3), R6 |
| MOVW.P 4(R3), R7 |
| MOVW.P 4(R4), R8 |
| MOVW.P 4(R4), R9 |
| // multiply |
| MULLU R0, R8, (R11, R8) |
| ADD.S R1, R8 |
| MULLU R0, R9, (R12, R9) |
| ADC.S R11, R9 |
| ADC $0, R12, R1 |
| // add |
| ADD.S R6, R8 |
| ADC.S R7, R9 |
| ADC $0, R1 |
| MOVW.P R8, 4(R5) |
| MOVW.P R9, 4(R5) |
| MOVW.P 4(R3), R6 |
| MOVW.P 4(R3), R7 |
| MOVW.P 4(R4), R8 |
| MOVW.P 4(R4), R9 |
| // multiply |
| MULLU R0, R8, (R11, R8) |
| ADD.S R1, R8 |
| MULLU R0, R9, (R12, R9) |
| ADC.S R11, R9 |
| ADC $0, R12, R1 |
| // add |
| ADD.S R6, R8 |
| ADC.S R7, R9 |
| ADC $0, R1 |
| MOVW.P R8, 4(R5) |
| MOVW.P R9, 4(R5) |
| SUB $1, R2 |
| TEQ $0, R2; BNE loop4cont |
| loop4done: |
| MOVW R1, c+44(FP) |
| RET |