| // Copyright 2025 The Go Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style |
| // license that can be found in the LICENSE file. |
| |
| // Code generated by 'go generate' (with ./internal/asmgen). DO NOT EDIT. |
| |
| //go:build !math_big_pure_go |
| |
| #include "textflag.h" |
| |
| // func addVV(z, x, y []Word) (c Word) |
| TEXT ·addVV(SB), NOSPLIT, $0 |
| MOVV z_len+8(FP), R4 |
| MOVV x_base+24(FP), R5 |
| MOVV y_base+48(FP), R6 |
| MOVV z_base+0(FP), R7 |
| // compute unrolled loop lengths |
| AND $3, R4, R8 |
| SRLV $2, R4 |
| XOR R28, R28 // clear carry |
| loop1: |
| BEQ R8, loop1done |
| loop1cont: |
| // unroll 1X |
| MOVV 0(R5), R9 |
| MOVV 0(R6), R10 |
| ADDVU R10, R9 // ADCS R10, R9, R9 (cr=R28) |
| SGTU R10, R9, R30 // ... |
| ADDVU R28, R9 // ... |
| SGTU R28, R9, R28 // ... |
| ADDVU R30, R28 // ... |
| MOVV R9, 0(R7) |
| ADDVU $8, R5 |
| ADDVU $8, R6 |
| ADDVU $8, R7 |
| SUBVU $1, R8 |
| BNE R8, loop1cont |
| loop1done: |
| loop4: |
| BEQ R4, loop4done |
| loop4cont: |
| // unroll 4X |
| MOVV 0(R5), R8 |
| MOVV 8(R5), R9 |
| MOVV 16(R5), R10 |
| MOVV 24(R5), R11 |
| MOVV 0(R6), R12 |
| MOVV 8(R6), R13 |
| MOVV 16(R6), R14 |
| MOVV 24(R6), R15 |
| ADDVU R12, R8 // ADCS R12, R8, R8 (cr=R28) |
| SGTU R12, R8, R30 // ... |
| ADDVU R28, R8 // ... |
| SGTU R28, R8, R28 // ... |
| ADDVU R30, R28 // ... |
| ADDVU R13, R9 // ADCS R13, R9, R9 (cr=R28) |
| SGTU R13, R9, R30 // ... |
| ADDVU R28, R9 // ... |
| SGTU R28, R9, R28 // ... |
| ADDVU R30, R28 // ... |
| ADDVU R14, R10 // ADCS R14, R10, R10 (cr=R28) |
| SGTU R14, R10, R30 // ... |
| ADDVU R28, R10 // ... |
| SGTU R28, R10, R28 // ... |
| ADDVU R30, R28 // ... |
| ADDVU R15, R11 // ADCS R15, R11, R11 (cr=R28) |
| SGTU R15, R11, R30 // ... |
| ADDVU R28, R11 // ... |
| SGTU R28, R11, R28 // ... |
| ADDVU R30, R28 // ... |
| MOVV R8, 0(R7) |
| MOVV R9, 8(R7) |
| MOVV R10, 16(R7) |
| MOVV R11, 24(R7) |
| ADDVU $32, R5 |
| ADDVU $32, R6 |
| ADDVU $32, R7 |
| SUBVU $1, R4 |
| BNE R4, loop4cont |
| loop4done: |
| MOVV R28, c+72(FP) |
| RET |
| |
| // func subVV(z, x, y []Word) (c Word) |
| TEXT ·subVV(SB), NOSPLIT, $0 |
| MOVV z_len+8(FP), R4 |
| MOVV x_base+24(FP), R5 |
| MOVV y_base+48(FP), R6 |
| MOVV z_base+0(FP), R7 |
| // compute unrolled loop lengths |
| AND $3, R4, R8 |
| SRLV $2, R4 |
| XOR R28, R28 // clear carry |
| loop1: |
| BEQ R8, loop1done |
| loop1cont: |
| // unroll 1X |
| MOVV 0(R5), R9 |
| MOVV 0(R6), R10 |
| SGTU R28, R9, R30 // SBCS R10, R9, R9 |
| SUBVU R28, R9 // ... |
| SGTU R10, R9, R28 // ... |
| SUBVU R10, R9 // ... |
| ADDVU R30, R28 // ... |
| MOVV R9, 0(R7) |
| ADDVU $8, R5 |
| ADDVU $8, R6 |
| ADDVU $8, R7 |
| SUBVU $1, R8 |
| BNE R8, loop1cont |
| loop1done: |
| loop4: |
| BEQ R4, loop4done |
| loop4cont: |
| // unroll 4X |
| MOVV 0(R5), R8 |
| MOVV 8(R5), R9 |
| MOVV 16(R5), R10 |
| MOVV 24(R5), R11 |
| MOVV 0(R6), R12 |
| MOVV 8(R6), R13 |
| MOVV 16(R6), R14 |
| MOVV 24(R6), R15 |
| SGTU R28, R8, R30 // SBCS R12, R8, R8 |
| SUBVU R28, R8 // ... |
| SGTU R12, R8, R28 // ... |
| SUBVU R12, R8 // ... |
| ADDVU R30, R28 // ... |
| SGTU R28, R9, R30 // SBCS R13, R9, R9 |
| SUBVU R28, R9 // ... |
| SGTU R13, R9, R28 // ... |
| SUBVU R13, R9 // ... |
| ADDVU R30, R28 // ... |
| SGTU R28, R10, R30 // SBCS R14, R10, R10 |
| SUBVU R28, R10 // ... |
| SGTU R14, R10, R28 // ... |
| SUBVU R14, R10 // ... |
| ADDVU R30, R28 // ... |
| SGTU R28, R11, R30 // SBCS R15, R11, R11 |
| SUBVU R28, R11 // ... |
| SGTU R15, R11, R28 // ... |
| SUBVU R15, R11 // ... |
| ADDVU R30, R28 // ... |
| MOVV R8, 0(R7) |
| MOVV R9, 8(R7) |
| MOVV R10, 16(R7) |
| MOVV R11, 24(R7) |
| ADDVU $32, R5 |
| ADDVU $32, R6 |
| ADDVU $32, R7 |
| SUBVU $1, R4 |
| BNE R4, loop4cont |
| loop4done: |
| MOVV R28, c+72(FP) |
| RET |
| |
| // func lshVU(z, x []Word, s uint) (c Word) |
| TEXT ·lshVU(SB), NOSPLIT, $0 |
| MOVV z_len+8(FP), R4 |
| BEQ R4, ret0 |
| MOVV s+48(FP), R5 |
| MOVV x_base+24(FP), R6 |
| MOVV z_base+0(FP), R7 |
| // run loop backward |
| SLLV $3, R4, R8 |
| ADDVU R8, R6 |
| SLLV $3, R4, R8 |
| ADDVU R8, R7 |
| // shift first word into carry |
| MOVV -8(R6), R8 |
| MOVV $64, R9 |
| SUBVU R5, R9 |
| SRLV R9, R8, R10 |
| SLLV R5, R8 |
| MOVV R10, c+56(FP) |
| // shift remaining words |
| SUBVU $1, R4 |
| // compute unrolled loop lengths |
| AND $3, R4, R10 |
| SRLV $2, R4 |
| loop1: |
| BEQ R10, loop1done |
| loop1cont: |
| // unroll 1X |
| MOVV -16(R6), R11 |
| SRLV R9, R11, R12 |
| OR R8, R12 |
| SLLV R5, R11, R8 |
| MOVV R12, -8(R7) |
| ADDVU $-8, R6 |
| ADDVU $-8, R7 |
| SUBVU $1, R10 |
| BNE R10, loop1cont |
| loop1done: |
| loop4: |
| BEQ R4, loop4done |
| loop4cont: |
| // unroll 4X |
| MOVV -16(R6), R10 |
| MOVV -24(R6), R11 |
| MOVV -32(R6), R12 |
| MOVV -40(R6), R13 |
| SRLV R9, R10, R14 |
| OR R8, R14 |
| SLLV R5, R10, R8 |
| SRLV R9, R11, R10 |
| OR R8, R10 |
| SLLV R5, R11, R8 |
| SRLV R9, R12, R11 |
| OR R8, R11 |
| SLLV R5, R12, R8 |
| SRLV R9, R13, R12 |
| OR R8, R12 |
| SLLV R5, R13, R8 |
| MOVV R14, -8(R7) |
| MOVV R10, -16(R7) |
| MOVV R11, -24(R7) |
| MOVV R12, -32(R7) |
| ADDVU $-32, R6 |
| ADDVU $-32, R7 |
| SUBVU $1, R4 |
| BNE R4, loop4cont |
| loop4done: |
| // store final shifted bits |
| MOVV R8, -8(R7) |
| RET |
| ret0: |
| MOVV R0, c+56(FP) |
| RET |
| |
| // func rshVU(z, x []Word, s uint) (c Word) |
| TEXT ·rshVU(SB), NOSPLIT, $0 |
| MOVV z_len+8(FP), R4 |
| BEQ R4, ret0 |
| MOVV s+48(FP), R5 |
| MOVV x_base+24(FP), R6 |
| MOVV z_base+0(FP), R7 |
| // shift first word into carry |
| MOVV 0(R6), R8 |
| MOVV $64, R9 |
| SUBVU R5, R9 |
| SLLV R9, R8, R10 |
| SRLV R5, R8 |
| MOVV R10, c+56(FP) |
| // shift remaining words |
| SUBVU $1, R4 |
| // compute unrolled loop lengths |
| AND $3, R4, R10 |
| SRLV $2, R4 |
| loop1: |
| BEQ R10, loop1done |
| loop1cont: |
| // unroll 1X |
| MOVV 8(R6), R11 |
| SLLV R9, R11, R12 |
| OR R8, R12 |
| SRLV R5, R11, R8 |
| MOVV R12, 0(R7) |
| ADDVU $8, R6 |
| ADDVU $8, R7 |
| SUBVU $1, R10 |
| BNE R10, loop1cont |
| loop1done: |
| loop4: |
| BEQ R4, loop4done |
| loop4cont: |
| // unroll 4X |
| MOVV 8(R6), R10 |
| MOVV 16(R6), R11 |
| MOVV 24(R6), R12 |
| MOVV 32(R6), R13 |
| SLLV R9, R10, R14 |
| OR R8, R14 |
| SRLV R5, R10, R8 |
| SLLV R9, R11, R10 |
| OR R8, R10 |
| SRLV R5, R11, R8 |
| SLLV R9, R12, R11 |
| OR R8, R11 |
| SRLV R5, R12, R8 |
| SLLV R9, R13, R12 |
| OR R8, R12 |
| SRLV R5, R13, R8 |
| MOVV R14, 0(R7) |
| MOVV R10, 8(R7) |
| MOVV R11, 16(R7) |
| MOVV R12, 24(R7) |
| ADDVU $32, R6 |
| ADDVU $32, R7 |
| SUBVU $1, R4 |
| BNE R4, loop4cont |
| loop4done: |
| // store final shifted bits |
| MOVV R8, 0(R7) |
| RET |
| ret0: |
| MOVV R0, c+56(FP) |
| RET |
| |
| // func mulAddVWW(z, x []Word, m, a Word) (c Word) |
| TEXT ·mulAddVWW(SB), NOSPLIT, $0 |
| MOVV m+48(FP), R4 |
| MOVV a+56(FP), R5 |
| MOVV z_len+8(FP), R6 |
| MOVV x_base+24(FP), R7 |
| MOVV z_base+0(FP), R8 |
| // compute unrolled loop lengths |
| AND $3, R6, R9 |
| SRLV $2, R6 |
| loop1: |
| BEQ R9, loop1done |
| loop1cont: |
| // unroll 1X |
| MOVV 0(R7), R10 |
| // synthetic carry, one column at a time |
| MULV R4, R10, R11 |
| MULHVU R4, R10, R12 |
| ADDVU R5, R11, R10 // ADDS R5, R11, R10 (cr=R28) |
| SGTU R5, R10, R28 // ... |
| ADDVU R28, R12, R5 // ADC $0, R12, R5 |
| MOVV R10, 0(R8) |
| ADDVU $8, R7 |
| ADDVU $8, R8 |
| SUBVU $1, R9 |
| BNE R9, loop1cont |
| loop1done: |
| loop4: |
| BEQ R6, loop4done |
| loop4cont: |
| // unroll 4X |
| MOVV 0(R7), R9 |
| MOVV 8(R7), R10 |
| MOVV 16(R7), R11 |
| MOVV 24(R7), R12 |
| // synthetic carry, one column at a time |
| MULV R4, R9, R13 |
| MULHVU R4, R9, R14 |
| ADDVU R5, R13, R9 // ADDS R5, R13, R9 (cr=R28) |
| SGTU R5, R9, R28 // ... |
| ADDVU R28, R14, R5 // ADC $0, R14, R5 |
| MULV R4, R10, R13 |
| MULHVU R4, R10, R14 |
| ADDVU R5, R13, R10 // ADDS R5, R13, R10 (cr=R28) |
| SGTU R5, R10, R28 // ... |
| ADDVU R28, R14, R5 // ADC $0, R14, R5 |
| MULV R4, R11, R13 |
| MULHVU R4, R11, R14 |
| ADDVU R5, R13, R11 // ADDS R5, R13, R11 (cr=R28) |
| SGTU R5, R11, R28 // ... |
| ADDVU R28, R14, R5 // ADC $0, R14, R5 |
| MULV R4, R12, R13 |
| MULHVU R4, R12, R14 |
| ADDVU R5, R13, R12 // ADDS R5, R13, R12 (cr=R28) |
| SGTU R5, R12, R28 // ... |
| ADDVU R28, R14, R5 // ADC $0, R14, R5 |
| MOVV R9, 0(R8) |
| MOVV R10, 8(R8) |
| MOVV R11, 16(R8) |
| MOVV R12, 24(R8) |
| ADDVU $32, R7 |
| ADDVU $32, R8 |
| SUBVU $1, R6 |
| BNE R6, loop4cont |
| loop4done: |
| MOVV R5, c+64(FP) |
| RET |
| |
| // func addMulVVWW(z, x, y []Word, m, a Word) (c Word) |
| TEXT ·addMulVVWW(SB), NOSPLIT, $0 |
| MOVV m+72(FP), R4 |
| MOVV a+80(FP), R5 |
| MOVV z_len+8(FP), R6 |
| MOVV x_base+24(FP), R7 |
| MOVV y_base+48(FP), R8 |
| MOVV z_base+0(FP), R9 |
| // compute unrolled loop lengths |
| AND $3, R6, R10 |
| SRLV $2, R6 |
| loop1: |
| BEQ R10, loop1done |
| loop1cont: |
| // unroll 1X |
| MOVV 0(R7), R11 |
| MOVV 0(R8), R12 |
| // synthetic carry, one column at a time |
| MULV R4, R12, R13 |
| MULHVU R4, R12, R14 |
| ADDVU R11, R13 // ADDS R11, R13, R13 (cr=R28) |
| SGTU R11, R13, R28 // ... |
| ADDVU R28, R14 // ADC $0, R14, R14 |
| ADDVU R5, R13, R12 // ADDS R5, R13, R12 (cr=R28) |
| SGTU R5, R12, R28 // ... |
| ADDVU R28, R14, R5 // ADC $0, R14, R5 |
| MOVV R12, 0(R9) |
| ADDVU $8, R7 |
| ADDVU $8, R8 |
| ADDVU $8, R9 |
| SUBVU $1, R10 |
| BNE R10, loop1cont |
| loop1done: |
| loop4: |
| BEQ R6, loop4done |
| loop4cont: |
| // unroll 4X |
| MOVV 0(R7), R10 |
| MOVV 8(R7), R11 |
| MOVV 16(R7), R12 |
| MOVV 24(R7), R13 |
| MOVV 0(R8), R14 |
| MOVV 8(R8), R15 |
| MOVV 16(R8), R16 |
| MOVV 24(R8), R17 |
| // synthetic carry, one column at a time |
| MULV R4, R14, R18 |
| MULHVU R4, R14, R19 |
| ADDVU R10, R18 // ADDS R10, R18, R18 (cr=R28) |
| SGTU R10, R18, R28 // ... |
| ADDVU R28, R19 // ADC $0, R19, R19 |
| ADDVU R5, R18, R14 // ADDS R5, R18, R14 (cr=R28) |
| SGTU R5, R14, R28 // ... |
| ADDVU R28, R19, R5 // ADC $0, R19, R5 |
| MULV R4, R15, R18 |
| MULHVU R4, R15, R19 |
| ADDVU R11, R18 // ADDS R11, R18, R18 (cr=R28) |
| SGTU R11, R18, R28 // ... |
| ADDVU R28, R19 // ADC $0, R19, R19 |
| ADDVU R5, R18, R15 // ADDS R5, R18, R15 (cr=R28) |
| SGTU R5, R15, R28 // ... |
| ADDVU R28, R19, R5 // ADC $0, R19, R5 |
| MULV R4, R16, R18 |
| MULHVU R4, R16, R19 |
| ADDVU R12, R18 // ADDS R12, R18, R18 (cr=R28) |
| SGTU R12, R18, R28 // ... |
| ADDVU R28, R19 // ADC $0, R19, R19 |
| ADDVU R5, R18, R16 // ADDS R5, R18, R16 (cr=R28) |
| SGTU R5, R16, R28 // ... |
| ADDVU R28, R19, R5 // ADC $0, R19, R5 |
| MULV R4, R17, R18 |
| MULHVU R4, R17, R19 |
| ADDVU R13, R18 // ADDS R13, R18, R18 (cr=R28) |
| SGTU R13, R18, R28 // ... |
| ADDVU R28, R19 // ADC $0, R19, R19 |
| ADDVU R5, R18, R17 // ADDS R5, R18, R17 (cr=R28) |
| SGTU R5, R17, R28 // ... |
| ADDVU R28, R19, R5 // ADC $0, R19, R5 |
| MOVV R14, 0(R9) |
| MOVV R15, 8(R9) |
| MOVV R16, 16(R9) |
| MOVV R17, 24(R9) |
| ADDVU $32, R7 |
| ADDVU $32, R8 |
| ADDVU $32, R9 |
| SUBVU $1, R6 |
| BNE R6, loop4cont |
| loop4done: |
| MOVV R5, c+88(FP) |
| RET |