| // Copyright 2013 The Go Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style |
| // license that can be found in the LICENSE file. |
| |
| // +build !math_big_pure_go |
| |
| #include "textflag.h" |
| |
| // This file provides fast assembly versions for the elementary |
| // arithmetic operations on vectors implemented in arith.go. |
| |
| // TODO: Consider re-implementing using Advanced SIMD |
| // once the assembler supports those instructions. |
| |
| // func mulWW(x, y Word) (z1, z0 Word) |
| TEXT ·mulWW(SB),NOSPLIT,$0 |
| MOVD x+0(FP), R0 |
| MOVD y+8(FP), R1 |
| MUL R0, R1, R2 |
| UMULH R0, R1, R3 |
| MOVD R3, z1+16(FP) |
| MOVD R2, z0+24(FP) |
| RET |
| |
| |
| // func divWW(x1, x0, y Word) (q, r Word) |
| TEXT ·divWW(SB),NOSPLIT,$0 |
| B ·divWW_g(SB) // ARM64 has no multiword division |
| |
| |
| // func addVV(z, x, y []Word) (c Word) |
| TEXT ·addVV(SB),NOSPLIT,$0 |
| MOVD z+0(FP), R3 |
| MOVD z_len+8(FP), R0 |
| MOVD x+24(FP), R1 |
| MOVD y+48(FP), R2 |
| ADDS $0, R0 // clear carry flag |
| loop: |
| CBZ R0, done // careful not to touch the carry flag |
| MOVD.P 8(R1), R4 |
| MOVD.P 8(R2), R5 |
| ADCS R4, R5 |
| MOVD.P R5, 8(R3) |
| SUB $1, R0 |
| B loop |
| done: |
| CSET HS, R0 // extract carry flag |
| MOVD R0, c+72(FP) |
| RET |
| |
| |
| // func subVV(z, x, y []Word) (c Word) |
| TEXT ·subVV(SB),NOSPLIT,$0 |
| MOVD z+0(FP), R3 |
| MOVD z_len+8(FP), R0 |
| MOVD x+24(FP), R1 |
| MOVD y+48(FP), R2 |
| CMP R0, R0 // set carry flag |
| loop: |
| CBZ R0, done // careful not to touch the carry flag |
| MOVD.P 8(R1), R4 |
| MOVD.P 8(R2), R5 |
| SBCS R5, R4 |
| MOVD.P R4, 8(R3) |
| SUB $1, R0 |
| B loop |
| done: |
| CSET LO, R0 // extract carry flag |
| MOVD R0, c+72(FP) |
| RET |
| |
| |
| // func addVW(z, x []Word, y Word) (c Word) |
| TEXT ·addVW(SB),NOSPLIT,$0 |
| MOVD z+0(FP), R3 |
| MOVD z_len+8(FP), R0 |
| MOVD x+24(FP), R1 |
| MOVD y+48(FP), R2 |
| CBZ R0, return_y |
| MOVD.P 8(R1), R4 |
| ADDS R2, R4 |
| MOVD.P R4, 8(R3) |
| SUB $1, R0 |
| loop: |
| CBZ R0, done // careful not to touch the carry flag |
| MOVD.P 8(R1), R4 |
| ADCS $0, R4 |
| MOVD.P R4, 8(R3) |
| SUB $1, R0 |
| B loop |
| done: |
| CSET HS, R0 // extract carry flag |
| MOVD R0, c+56(FP) |
| RET |
| return_y: // z is empty; copy y to c |
| MOVD R2, c+56(FP) |
| RET |
| |
| |
| // func subVW(z, x []Word, y Word) (c Word) |
| TEXT ·subVW(SB),NOSPLIT,$0 |
| MOVD z+0(FP), R3 |
| MOVD z_len+8(FP), R0 |
| MOVD x+24(FP), R1 |
| MOVD y+48(FP), R2 |
| CBZ R0, rety |
| MOVD.P 8(R1), R4 |
| SUBS R2, R4 |
| MOVD.P R4, 8(R3) |
| SUB $1, R0 |
| loop: |
| CBZ R0, done // careful not to touch the carry flag |
| MOVD.P 8(R1), R4 |
| SBCS $0, R4 |
| MOVD.P R4, 8(R3) |
| SUB $1, R0 |
| B loop |
| done: |
| CSET LO, R0 // extract carry flag |
| MOVD R0, c+56(FP) |
| RET |
| rety: // z is empty; copy y to c |
| MOVD R2, c+56(FP) |
| RET |
| |
| |
| // func shlVU(z, x []Word, s uint) (c Word) |
| TEXT ·shlVU(SB),NOSPLIT,$0 |
| B ·shlVU_g(SB) |
| |
| |
| // func shrVU(z, x []Word, s uint) (c Word) |
| TEXT ·shrVU(SB),NOSPLIT,$0 |
| B ·shrVU_g(SB) |
| |
| |
| // func mulAddVWW(z, x []Word, y, r Word) (c Word) |
| TEXT ·mulAddVWW(SB),NOSPLIT,$0 |
| MOVD z+0(FP), R1 |
| MOVD z_len+8(FP), R0 |
| MOVD x+24(FP), R2 |
| MOVD y+48(FP), R3 |
| MOVD r+56(FP), R4 |
| loop: |
| CBZ R0, done |
| MOVD.P 8(R2), R5 |
| UMULH R5, R3, R7 |
| MUL R5, R3, R6 |
| ADDS R4, R6 |
| ADC $0, R7 |
| MOVD.P R6, 8(R1) |
| MOVD R7, R4 |
| SUB $1, R0 |
| B loop |
| done: |
| MOVD R4, c+64(FP) |
| RET |
| |
| |
| // func addMulVVW(z, x []Word, y Word) (c Word) |
| TEXT ·addMulVVW(SB),NOSPLIT,$0 |
| B ·addMulVVW_g(SB) |
| |
| |
| // func divWVW(z []Word, xn Word, x []Word, y Word) (r Word) |
| TEXT ·divWVW(SB),NOSPLIT,$0 |
| B ·divWVW_g(SB) |
| |
| |
| // func bitLen(x Word) (n int) |
| TEXT ·bitLen(SB),NOSPLIT,$0 |
| MOVD x+0(FP), R0 |
| CLZ R0, R0 |
| MOVD $64, R1 |
| SUB R0, R1, R0 |
| MOVD R0, n+8(FP) |
| RET |