| // Copyright 2025 The Go Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style |
| // license that can be found in the LICENSE file. |
| |
| // Code generated by 'go generate' (with ./internal/asmgen). DO NOT EDIT. |
| |
| //go:build !math_big_pure_go && (mips64 || mips64le) |
| |
| #include "textflag.h" |
| |
| // func addVV(z, x, y []Word) (c Word) |
| TEXT ·addVV(SB), NOSPLIT, $0 |
| MOVV z_len+8(FP), R1 |
| MOVV x_base+24(FP), R2 |
| MOVV y_base+48(FP), R3 |
| MOVV z_base+0(FP), R4 |
| // compute unrolled loop lengths |
| AND $3, R1, R5 |
| SRLV $2, R1 |
| XOR R24, R24 // clear carry |
| loop1: |
| BEQ R5, loop1done |
| loop1cont: |
| // unroll 1X |
| MOVV 0(R2), R6 |
| MOVV 0(R3), R7 |
| ADDVU R7, R6 // ADCS R7, R6, R6 (cr=R24) |
| SGTU R7, R6, R23 // ... |
| ADDVU R24, R6 // ... |
| SGTU R24, R6, R24 // ... |
| ADDVU R23, R24 // ... |
| MOVV R6, 0(R4) |
| ADDVU $8, R2 |
| ADDVU $8, R3 |
| ADDVU $8, R4 |
| SUBVU $1, R5 |
| BNE R5, loop1cont |
| loop1done: |
| loop4: |
| BEQ R1, loop4done |
| loop4cont: |
| // unroll 4X |
| MOVV 0(R2), R5 |
| MOVV 8(R2), R6 |
| MOVV 16(R2), R7 |
| MOVV 24(R2), R8 |
| MOVV 0(R3), R9 |
| MOVV 8(R3), R10 |
| MOVV 16(R3), R11 |
| MOVV 24(R3), R12 |
| ADDVU R9, R5 // ADCS R9, R5, R5 (cr=R24) |
| SGTU R9, R5, R23 // ... |
| ADDVU R24, R5 // ... |
| SGTU R24, R5, R24 // ... |
| ADDVU R23, R24 // ... |
| ADDVU R10, R6 // ADCS R10, R6, R6 (cr=R24) |
| SGTU R10, R6, R23 // ... |
| ADDVU R24, R6 // ... |
| SGTU R24, R6, R24 // ... |
| ADDVU R23, R24 // ... |
| ADDVU R11, R7 // ADCS R11, R7, R7 (cr=R24) |
| SGTU R11, R7, R23 // ... |
| ADDVU R24, R7 // ... |
| SGTU R24, R7, R24 // ... |
| ADDVU R23, R24 // ... |
| ADDVU R12, R8 // ADCS R12, R8, R8 (cr=R24) |
| SGTU R12, R8, R23 // ... |
| ADDVU R24, R8 // ... |
| SGTU R24, R8, R24 // ... |
| ADDVU R23, R24 // ... |
| MOVV R5, 0(R4) |
| MOVV R6, 8(R4) |
| MOVV R7, 16(R4) |
| MOVV R8, 24(R4) |
| ADDVU $32, R2 |
| ADDVU $32, R3 |
| ADDVU $32, R4 |
| SUBVU $1, R1 |
| BNE R1, loop4cont |
| loop4done: |
| MOVV R24, c+72(FP) |
| RET |
| |
| // func subVV(z, x, y []Word) (c Word) |
| TEXT ·subVV(SB), NOSPLIT, $0 |
| MOVV z_len+8(FP), R1 |
| MOVV x_base+24(FP), R2 |
| MOVV y_base+48(FP), R3 |
| MOVV z_base+0(FP), R4 |
| // compute unrolled loop lengths |
| AND $3, R1, R5 |
| SRLV $2, R1 |
| XOR R24, R24 // clear carry |
| loop1: |
| BEQ R5, loop1done |
| loop1cont: |
| // unroll 1X |
| MOVV 0(R2), R6 |
| MOVV 0(R3), R7 |
| SGTU R24, R6, R23 // SBCS R7, R6, R6 |
| SUBVU R24, R6 // ... |
| SGTU R7, R6, R24 // ... |
| SUBVU R7, R6 // ... |
| ADDVU R23, R24 // ... |
| MOVV R6, 0(R4) |
| ADDVU $8, R2 |
| ADDVU $8, R3 |
| ADDVU $8, R4 |
| SUBVU $1, R5 |
| BNE R5, loop1cont |
| loop1done: |
| loop4: |
| BEQ R1, loop4done |
| loop4cont: |
| // unroll 4X |
| MOVV 0(R2), R5 |
| MOVV 8(R2), R6 |
| MOVV 16(R2), R7 |
| MOVV 24(R2), R8 |
| MOVV 0(R3), R9 |
| MOVV 8(R3), R10 |
| MOVV 16(R3), R11 |
| MOVV 24(R3), R12 |
| SGTU R24, R5, R23 // SBCS R9, R5, R5 |
| SUBVU R24, R5 // ... |
| SGTU R9, R5, R24 // ... |
| SUBVU R9, R5 // ... |
| ADDVU R23, R24 // ... |
| SGTU R24, R6, R23 // SBCS R10, R6, R6 |
| SUBVU R24, R6 // ... |
| SGTU R10, R6, R24 // ... |
| SUBVU R10, R6 // ... |
| ADDVU R23, R24 // ... |
| SGTU R24, R7, R23 // SBCS R11, R7, R7 |
| SUBVU R24, R7 // ... |
| SGTU R11, R7, R24 // ... |
| SUBVU R11, R7 // ... |
| ADDVU R23, R24 // ... |
| SGTU R24, R8, R23 // SBCS R12, R8, R8 |
| SUBVU R24, R8 // ... |
| SGTU R12, R8, R24 // ... |
| SUBVU R12, R8 // ... |
| ADDVU R23, R24 // ... |
| MOVV R5, 0(R4) |
| MOVV R6, 8(R4) |
| MOVV R7, 16(R4) |
| MOVV R8, 24(R4) |
| ADDVU $32, R2 |
| ADDVU $32, R3 |
| ADDVU $32, R4 |
| SUBVU $1, R1 |
| BNE R1, loop4cont |
| loop4done: |
| MOVV R24, c+72(FP) |
| RET |
| |
| // func lshVU(z, x []Word, s uint) (c Word) |
| TEXT ·lshVU(SB), NOSPLIT, $0 |
| MOVV z_len+8(FP), R1 |
| BEQ R1, ret0 |
| MOVV s+48(FP), R2 |
| MOVV x_base+24(FP), R3 |
| MOVV z_base+0(FP), R4 |
| // run loop backward |
| SLLV $3, R1, R5 |
| ADDVU R5, R3 |
| SLLV $3, R1, R5 |
| ADDVU R5, R4 |
| // shift first word into carry |
| MOVV -8(R3), R5 |
| MOVV $64, R6 |
| SUBVU R2, R6 |
| SRLV R6, R5, R7 |
| SLLV R2, R5 |
| MOVV R7, c+56(FP) |
| // shift remaining words |
| SUBVU $1, R1 |
| // compute unrolled loop lengths |
| AND $3, R1, R7 |
| SRLV $2, R1 |
| loop1: |
| BEQ R7, loop1done |
| loop1cont: |
| // unroll 1X |
| MOVV -16(R3), R8 |
| SRLV R6, R8, R9 |
| OR R5, R9 |
| SLLV R2, R8, R5 |
| MOVV R9, -8(R4) |
| ADDVU $-8, R3 |
| ADDVU $-8, R4 |
| SUBVU $1, R7 |
| BNE R7, loop1cont |
| loop1done: |
| loop4: |
| BEQ R1, loop4done |
| loop4cont: |
| // unroll 4X |
| MOVV -16(R3), R7 |
| MOVV -24(R3), R8 |
| MOVV -32(R3), R9 |
| MOVV -40(R3), R10 |
| SRLV R6, R7, R11 |
| OR R5, R11 |
| SLLV R2, R7, R5 |
| SRLV R6, R8, R7 |
| OR R5, R7 |
| SLLV R2, R8, R5 |
| SRLV R6, R9, R8 |
| OR R5, R8 |
| SLLV R2, R9, R5 |
| SRLV R6, R10, R9 |
| OR R5, R9 |
| SLLV R2, R10, R5 |
| MOVV R11, -8(R4) |
| MOVV R7, -16(R4) |
| MOVV R8, -24(R4) |
| MOVV R9, -32(R4) |
| ADDVU $-32, R3 |
| ADDVU $-32, R4 |
| SUBVU $1, R1 |
| BNE R1, loop4cont |
| loop4done: |
| // store final shifted bits |
| MOVV R5, -8(R4) |
| RET |
| ret0: |
| MOVV R0, c+56(FP) |
| RET |
| |
| // func rshVU(z, x []Word, s uint) (c Word) |
| TEXT ·rshVU(SB), NOSPLIT, $0 |
| MOVV z_len+8(FP), R1 |
| BEQ R1, ret0 |
| MOVV s+48(FP), R2 |
| MOVV x_base+24(FP), R3 |
| MOVV z_base+0(FP), R4 |
| // shift first word into carry |
| MOVV 0(R3), R5 |
| MOVV $64, R6 |
| SUBVU R2, R6 |
| SLLV R6, R5, R7 |
| SRLV R2, R5 |
| MOVV R7, c+56(FP) |
| // shift remaining words |
| SUBVU $1, R1 |
| // compute unrolled loop lengths |
| AND $3, R1, R7 |
| SRLV $2, R1 |
| loop1: |
| BEQ R7, loop1done |
| loop1cont: |
| // unroll 1X |
| MOVV 8(R3), R8 |
| SLLV R6, R8, R9 |
| OR R5, R9 |
| SRLV R2, R8, R5 |
| MOVV R9, 0(R4) |
| ADDVU $8, R3 |
| ADDVU $8, R4 |
| SUBVU $1, R7 |
| BNE R7, loop1cont |
| loop1done: |
| loop4: |
| BEQ R1, loop4done |
| loop4cont: |
| // unroll 4X |
| MOVV 8(R3), R7 |
| MOVV 16(R3), R8 |
| MOVV 24(R3), R9 |
| MOVV 32(R3), R10 |
| SLLV R6, R7, R11 |
| OR R5, R11 |
| SRLV R2, R7, R5 |
| SLLV R6, R8, R7 |
| OR R5, R7 |
| SRLV R2, R8, R5 |
| SLLV R6, R9, R8 |
| OR R5, R8 |
| SRLV R2, R9, R5 |
| SLLV R6, R10, R9 |
| OR R5, R9 |
| SRLV R2, R10, R5 |
| MOVV R11, 0(R4) |
| MOVV R7, 8(R4) |
| MOVV R8, 16(R4) |
| MOVV R9, 24(R4) |
| ADDVU $32, R3 |
| ADDVU $32, R4 |
| SUBVU $1, R1 |
| BNE R1, loop4cont |
| loop4done: |
| // store final shifted bits |
| MOVV R5, 0(R4) |
| RET |
| ret0: |
| MOVV R0, c+56(FP) |
| RET |
| |
| // func mulAddVWW(z, x []Word, m, a Word) (c Word) |
| TEXT ·mulAddVWW(SB), NOSPLIT, $0 |
| MOVV m+48(FP), R1 |
| MOVV a+56(FP), R2 |
| MOVV z_len+8(FP), R3 |
| MOVV x_base+24(FP), R4 |
| MOVV z_base+0(FP), R5 |
| // compute unrolled loop lengths |
| AND $3, R3, R6 |
| SRLV $2, R3 |
| loop1: |
| BEQ R6, loop1done |
| loop1cont: |
| // unroll 1X |
| MOVV 0(R4), R7 |
| // synthetic carry, one column at a time |
| MULVU R1, R7 |
| MOVV LO, R8 |
| MOVV HI, R9 |
| ADDVU R2, R8, R7 // ADDS R2, R8, R7 (cr=R24) |
| SGTU R2, R7, R24 // ... |
| ADDVU R24, R9, R2 // ADC $0, R9, R2 |
| MOVV R7, 0(R5) |
| ADDVU $8, R4 |
| ADDVU $8, R5 |
| SUBVU $1, R6 |
| BNE R6, loop1cont |
| loop1done: |
| loop4: |
| BEQ R3, loop4done |
| loop4cont: |
| // unroll 4X |
| MOVV 0(R4), R6 |
| MOVV 8(R4), R7 |
| MOVV 16(R4), R8 |
| MOVV 24(R4), R9 |
| // synthetic carry, one column at a time |
| MULVU R1, R6 |
| MOVV LO, R10 |
| MOVV HI, R11 |
| ADDVU R2, R10, R6 // ADDS R2, R10, R6 (cr=R24) |
| SGTU R2, R6, R24 // ... |
| ADDVU R24, R11, R2 // ADC $0, R11, R2 |
| MULVU R1, R7 |
| MOVV LO, R10 |
| MOVV HI, R11 |
| ADDVU R2, R10, R7 // ADDS R2, R10, R7 (cr=R24) |
| SGTU R2, R7, R24 // ... |
| ADDVU R24, R11, R2 // ADC $0, R11, R2 |
| MULVU R1, R8 |
| MOVV LO, R10 |
| MOVV HI, R11 |
| ADDVU R2, R10, R8 // ADDS R2, R10, R8 (cr=R24) |
| SGTU R2, R8, R24 // ... |
| ADDVU R24, R11, R2 // ADC $0, R11, R2 |
| MULVU R1, R9 |
| MOVV LO, R10 |
| MOVV HI, R11 |
| ADDVU R2, R10, R9 // ADDS R2, R10, R9 (cr=R24) |
| SGTU R2, R9, R24 // ... |
| ADDVU R24, R11, R2 // ADC $0, R11, R2 |
| MOVV R6, 0(R5) |
| MOVV R7, 8(R5) |
| MOVV R8, 16(R5) |
| MOVV R9, 24(R5) |
| ADDVU $32, R4 |
| ADDVU $32, R5 |
| SUBVU $1, R3 |
| BNE R3, loop4cont |
| loop4done: |
| MOVV R2, c+64(FP) |
| RET |
| |
| // func addMulVVWW(z, x, y []Word, m, a Word) (c Word) |
| TEXT ·addMulVVWW(SB), NOSPLIT, $0 |
| MOVV m+72(FP), R1 |
| MOVV a+80(FP), R2 |
| MOVV z_len+8(FP), R3 |
| MOVV x_base+24(FP), R4 |
| MOVV y_base+48(FP), R5 |
| MOVV z_base+0(FP), R6 |
| // compute unrolled loop lengths |
| AND $3, R3, R7 |
| SRLV $2, R3 |
| loop1: |
| BEQ R7, loop1done |
| loop1cont: |
| // unroll 1X |
| MOVV 0(R4), R8 |
| MOVV 0(R5), R9 |
| // synthetic carry, one column at a time |
| MULVU R1, R9 |
| MOVV LO, R10 |
| MOVV HI, R11 |
| ADDVU R8, R10 // ADDS R8, R10, R10 (cr=R24) |
| SGTU R8, R10, R24 // ... |
| ADDVU R24, R11 // ADC $0, R11, R11 |
| ADDVU R2, R10, R9 // ADDS R2, R10, R9 (cr=R24) |
| SGTU R2, R9, R24 // ... |
| ADDVU R24, R11, R2 // ADC $0, R11, R2 |
| MOVV R9, 0(R6) |
| ADDVU $8, R4 |
| ADDVU $8, R5 |
| ADDVU $8, R6 |
| SUBVU $1, R7 |
| BNE R7, loop1cont |
| loop1done: |
| loop4: |
| BEQ R3, loop4done |
| loop4cont: |
| // unroll 4X |
| MOVV 0(R4), R7 |
| MOVV 8(R4), R8 |
| MOVV 16(R4), R9 |
| MOVV 24(R4), R10 |
| MOVV 0(R5), R11 |
| MOVV 8(R5), R12 |
| MOVV 16(R5), R13 |
| MOVV 24(R5), R14 |
| // synthetic carry, one column at a time |
| MULVU R1, R11 |
| MOVV LO, R15 |
| MOVV HI, R16 |
| ADDVU R7, R15 // ADDS R7, R15, R15 (cr=R24) |
| SGTU R7, R15, R24 // ... |
| ADDVU R24, R16 // ADC $0, R16, R16 |
| ADDVU R2, R15, R11 // ADDS R2, R15, R11 (cr=R24) |
| SGTU R2, R11, R24 // ... |
| ADDVU R24, R16, R2 // ADC $0, R16, R2 |
| MULVU R1, R12 |
| MOVV LO, R15 |
| MOVV HI, R16 |
| ADDVU R8, R15 // ADDS R8, R15, R15 (cr=R24) |
| SGTU R8, R15, R24 // ... |
| ADDVU R24, R16 // ADC $0, R16, R16 |
| ADDVU R2, R15, R12 // ADDS R2, R15, R12 (cr=R24) |
| SGTU R2, R12, R24 // ... |
| ADDVU R24, R16, R2 // ADC $0, R16, R2 |
| MULVU R1, R13 |
| MOVV LO, R15 |
| MOVV HI, R16 |
| ADDVU R9, R15 // ADDS R9, R15, R15 (cr=R24) |
| SGTU R9, R15, R24 // ... |
| ADDVU R24, R16 // ADC $0, R16, R16 |
| ADDVU R2, R15, R13 // ADDS R2, R15, R13 (cr=R24) |
| SGTU R2, R13, R24 // ... |
| ADDVU R24, R16, R2 // ADC $0, R16, R2 |
| MULVU R1, R14 |
| MOVV LO, R15 |
| MOVV HI, R16 |
| ADDVU R10, R15 // ADDS R10, R15, R15 (cr=R24) |
| SGTU R10, R15, R24 // ... |
| ADDVU R24, R16 // ADC $0, R16, R16 |
| ADDVU R2, R15, R14 // ADDS R2, R15, R14 (cr=R24) |
| SGTU R2, R14, R24 // ... |
| ADDVU R24, R16, R2 // ADC $0, R16, R2 |
| MOVV R11, 0(R6) |
| MOVV R12, 8(R6) |
| MOVV R13, 16(R6) |
| MOVV R14, 24(R6) |
| ADDVU $32, R4 |
| ADDVU $32, R5 |
| ADDVU $32, R6 |
| SUBVU $1, R3 |
| BNE R3, loop4cont |
| loop4done: |
| MOVV R2, c+88(FP) |
| RET |