| // Copyright 2013 The Go Authors. All rights reserved. | 
 | // Use of this source code is governed by a BSD-style | 
 | // license that can be found in the LICENSE file. | 
 |  | 
 | // +build !math_big_pure_go | 
 |  | 
 | #include "textflag.h" | 
 |  | 
 | // This file provides fast assembly versions for the elementary | 
 | // arithmetic operations on vectors implemented in arith.go. | 
 |  | 
 | // TODO: Consider re-implementing using Advanced SIMD | 
 | // once the assembler supports those instructions. | 
 |  | 
 | // func mulWW(x, y Word) (z1, z0 Word) | 
 | TEXT ·mulWW(SB),NOSPLIT,$0 | 
 | 	MOVD	x+0(FP), R0 | 
 | 	MOVD	y+8(FP), R1 | 
 | 	MUL	R0, R1, R2 | 
 | 	UMULH	R0, R1, R3 | 
 | 	MOVD	R3, z1+16(FP) | 
 | 	MOVD	R2, z0+24(FP) | 
 | 	RET | 
 |  | 
 |  | 
 | // func divWW(x1, x0, y Word) (q, r Word) | 
 | TEXT ·divWW(SB),NOSPLIT,$0 | 
 | 	B	·divWW_g(SB) // ARM64 has no multiword division | 
 |  | 
 |  | 
 | // func addVV(z, x, y []Word) (c Word) | 
 | TEXT ·addVV(SB),NOSPLIT,$0 | 
 | 	MOVD	z+0(FP), R3 | 
 | 	MOVD	z_len+8(FP), R0 | 
 | 	MOVD	x+24(FP), R1 | 
 | 	MOVD	y+48(FP), R2 | 
 | 	ADDS	$0, R0 // clear carry flag | 
 | loop: | 
 | 	CBZ	R0, done // careful not to touch the carry flag | 
 | 	MOVD.P	8(R1), R4 | 
 | 	MOVD.P	8(R2), R5 | 
 | 	ADCS	R4, R5 | 
 | 	MOVD.P	R5, 8(R3) | 
 | 	SUB	$1, R0 | 
 | 	B	loop | 
 | done: | 
 | 	CSET	HS, R0 // extract carry flag | 
 | 	MOVD	R0, c+72(FP) | 
 | 	RET | 
 |  | 
 |  | 
 | // func subVV(z, x, y []Word) (c Word) | 
 | TEXT ·subVV(SB),NOSPLIT,$0 | 
 | 	MOVD	z+0(FP), R3 | 
 | 	MOVD	z_len+8(FP), R0 | 
 | 	MOVD	x+24(FP), R1 | 
 | 	MOVD	y+48(FP), R2 | 
 | 	CMP	R0, R0 // set carry flag | 
 | loop: | 
 | 	CBZ	R0, done // careful not to touch the carry flag | 
 | 	MOVD.P	8(R1), R4 | 
 | 	MOVD.P	8(R2), R5 | 
 | 	SBCS	R5, R4 | 
 | 	MOVD.P	R4, 8(R3) | 
 | 	SUB	$1, R0 | 
 | 	B	loop | 
 | done: | 
 | 	CSET	LO, R0 // extract carry flag | 
 | 	MOVD	R0, c+72(FP) | 
 | 	RET | 
 |  | 
 |  | 
 | // func addVW(z, x []Word, y Word) (c Word) | 
 | TEXT ·addVW(SB),NOSPLIT,$0 | 
 | 	MOVD	z+0(FP), R3 | 
 | 	MOVD	z_len+8(FP), R0 | 
 | 	MOVD	x+24(FP), R1 | 
 | 	MOVD	y+48(FP), R2 | 
 | 	CBZ	R0, return_y | 
 | 	MOVD.P	8(R1), R4 | 
 | 	ADDS	R2, R4 | 
 | 	MOVD.P	R4, 8(R3) | 
 | 	SUB	$1, R0 | 
 | loop: | 
 | 	CBZ	R0, done // careful not to touch the carry flag | 
 | 	MOVD.P	8(R1), R4 | 
 | 	ADCS	$0, R4 | 
 | 	MOVD.P	R4, 8(R3) | 
 | 	SUB	$1, R0 | 
 | 	B	loop | 
 | done: | 
 | 	CSET	HS, R0 // extract carry flag | 
 | 	MOVD	R0, c+56(FP) | 
 | 	RET | 
 | return_y: // z is empty; copy y to c | 
 | 	MOVD	R2, c+56(FP) | 
 | 	RET | 
 |  | 
 |  | 
 | // func subVW(z, x []Word, y Word) (c Word) | 
 | TEXT ·subVW(SB),NOSPLIT,$0 | 
 | 	MOVD	z+0(FP), R3 | 
 | 	MOVD	z_len+8(FP), R0 | 
 | 	MOVD	x+24(FP), R1 | 
 | 	MOVD	y+48(FP), R2 | 
 | 	CBZ	R0, rety | 
 | 	MOVD.P	8(R1), R4 | 
 | 	SUBS	R2, R4 | 
 | 	MOVD.P	R4, 8(R3) | 
 | 	SUB	$1, R0 | 
 | loop: | 
 | 	CBZ	R0, done // careful not to touch the carry flag | 
 | 	MOVD.P	8(R1), R4 | 
 | 	SBCS	$0, R4 | 
 | 	MOVD.P	R4, 8(R3) | 
 | 	SUB	$1, R0 | 
 | 	B	loop | 
 | done: | 
 | 	CSET	LO, R0 // extract carry flag | 
 | 	MOVD	R0, c+56(FP) | 
 | 	RET | 
 | rety: // z is empty; copy y to c | 
 | 	MOVD	R2, c+56(FP) | 
 | 	RET | 
 |  | 
 |  | 
 | // func shlVU(z, x []Word, s uint) (c Word) | 
 | TEXT ·shlVU(SB),NOSPLIT,$0 | 
 | 	B ·shlVU_g(SB) | 
 |  | 
 |  | 
 | // func shrVU(z, x []Word, s uint) (c Word) | 
 | TEXT ·shrVU(SB),NOSPLIT,$0 | 
 | 	B ·shrVU_g(SB) | 
 |  | 
 |  | 
 | // func mulAddVWW(z, x []Word, y, r Word) (c Word) | 
 | TEXT ·mulAddVWW(SB),NOSPLIT,$0 | 
 | 	MOVD	z+0(FP), R1 | 
 | 	MOVD	z_len+8(FP), R0 | 
 | 	MOVD	x+24(FP), R2 | 
 | 	MOVD	y+48(FP), R3 | 
 | 	MOVD	r+56(FP), R4 | 
 | loop: | 
 | 	CBZ	R0, done | 
 | 	MOVD.P	8(R2), R5 | 
 | 	UMULH	R5, R3, R7 | 
 | 	MUL	R5, R3, R6 | 
 | 	ADDS	R4, R6 | 
 | 	ADC	$0, R7 | 
 | 	MOVD.P	R6, 8(R1) | 
 | 	MOVD	R7, R4 | 
 | 	SUB	$1, R0 | 
 | 	B	loop | 
 | done: | 
 | 	MOVD	R4, c+64(FP) | 
 | 	RET | 
 |  | 
 |  | 
 | // func addMulVVW(z, x []Word, y Word) (c Word) | 
 | TEXT ·addMulVVW(SB),NOSPLIT,$0 | 
 | 	B ·addMulVVW_g(SB) | 
 |  | 
 |  | 
 | // func divWVW(z []Word, xn Word, x []Word, y Word) (r Word) | 
 | TEXT ·divWVW(SB),NOSPLIT,$0 | 
 | 	B ·divWVW_g(SB) | 
 |  | 
 |  | 
 | // func bitLen(x Word) (n int) | 
 | TEXT ·bitLen(SB),NOSPLIT,$0 | 
 | 	MOVD	x+0(FP), R0 | 
 | 	CLZ	R0, R0 | 
 | 	MOVD	$64, R1 | 
 | 	SUB	R0, R1, R0 | 
 | 	MOVD	R0, n+8(FP) | 
 | 	RET |