| // Code generated by command: go run nat_amd64_asm.go -out ../nat_amd64.s -stubs ../nat_amd64.go -pkg bigmod. DO NOT EDIT. |
| |
| //go:build amd64 && gc && !purego |
| |
| // func montgomeryLoop(d []uint, a []uint, b []uint, m []uint, m0inv uint) uint |
| TEXT ·montgomeryLoop(SB), $8-112 |
| MOVQ d_len+8(FP), CX |
| MOVQ d_base+0(FP), BX |
| MOVQ b_base+48(FP), SI |
| MOVQ m_base+72(FP), DI |
| MOVQ m0inv+96(FP), R8 |
| XORQ R9, R9 |
| XORQ R10, R10 |
| |
| outerLoop: |
| MOVQ a_base+24(FP), R11 |
| MOVQ (R11)(R10*8), R11 |
| MOVQ (SI), AX |
| MULQ R11 |
| MOVQ AX, R13 |
| MOVQ DX, R12 |
| ADDQ (BX), R13 |
| ADCQ $0x00, R12 |
| MOVQ R8, R14 |
| IMULQ R13, R14 |
| BTRQ $0x3f, R14 |
| MOVQ (DI), AX |
| MULQ R14 |
| ADDQ AX, R13 |
| ADCQ DX, R12 |
| SHRQ $0x3f, R12, R13 |
| XORQ R12, R12 |
| INCQ R12 |
| JMP innerLoopCondition |
| |
| innerLoop: |
| MOVQ (SI)(R12*8), AX |
| MULQ R11 |
| MOVQ AX, BP |
| MOVQ DX, R15 |
| MOVQ (DI)(R12*8), AX |
| MULQ R14 |
| ADDQ AX, BP |
| ADCQ DX, R15 |
| ADDQ (BX)(R12*8), BP |
| ADCQ $0x00, R15 |
| ADDQ R13, BP |
| ADCQ $0x00, R15 |
| MOVQ BP, AX |
| BTRQ $0x3f, AX |
| MOVQ AX, -8(BX)(R12*8) |
| SHRQ $0x3f, R15, BP |
| MOVQ BP, R13 |
| INCQ R12 |
| |
| innerLoopCondition: |
| CMPQ CX, R12 |
| JGT innerLoop |
| ADDQ R13, R9 |
| MOVQ R9, AX |
| BTRQ $0x3f, AX |
| MOVQ AX, -8(BX)(CX*8) |
| SHRQ $0x3f, R9 |
| INCQ R10 |
| CMPQ CX, R10 |
| JGT outerLoop |
| MOVQ R9, ret+104(FP) |
| RET |