blob: 9adeb9981dd1f90698bb9d09cf33f79b83b5857f [file] [log] [blame] [edit]
// Copyright 2016 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build !purego
// Register usage (z13 convention):
// R2 = rp (result pointer)
// R3 = ap (source pointer)
// R4 = an / idx (loop counter)
// R5 = b0 (multiplier limb)
// R6 = cy (carry)
#include "textflag.h"
// func addMulVVW1024(z, x *uint, y uint) (c uint)
TEXT ·addMulVVW1024(SB), $0-32
MOVD $16, R4
JMP addMulVVWx(SB)
// func addMulVVW1536(z, x *uint, y uint) (c uint)
TEXT ·addMulVVW1536(SB), $0-32
MOVD $24, R4
JMP addMulVVWx(SB)
// func addMulVVW2048(z, x *uint, y uint) (c uint)
TEXT ·addMulVVW2048(SB), $0-32
MOVD $32, R4
JMP addMulVVWx(SB)
TEXT addMulVVWx(SB), NOFRAME|NOSPLIT, $0
MOVD z+0(FP), R2
MOVD x+8(FP), R3
MOVD y+16(FP), R5
MOVD $0, R6
L_ent:
VZERO V0
VZERO V2
SRD $2, R4, R10
TMLL R4, $1
BRC $8, L_bx0
L_bx1:
VLEG $1, 0(R2), V2
VZERO V4
TMLL R4, $2
BRC $7, L_b11
L_b01:
MOVD $-24, R4
MOVD R6, R0
MOVD 0(R3), R7
MLGR R5, R6
ADDC R0, R7
MOVD $0, R0
ADDE R0, R6
VLVGG $1, R7, V4
VAQ V2, V4, V2
VSTEG $1, V2, 0(R2)
VMRHG V2, V2, V2
CMPBEQ R10, $0, L_1
BR L_cj0
L_b11:
MOVD $-8, R4
MOVD 0(R3), R9
MLGR R5, R8
ADDC R6, R9
MOVD $0, R6
ADDE R6, R8
VLVGG $1, R9, V4
VAQ V2, V4, V2
VSTEG $1, V2, 0(R2)
VMRHG V2, V2, V2
BR L_cj1
L_bx0:
TMLL R4, $2
BRC $7, L_b10
L_b00:
MOVD $-32, R4
L_cj0:
MOVD 32(R3)(R4), R1
MOVD 40(R3)(R4), R9
MLGR R5, R0
MLGR R5, R8
VL 32(R4)(R2), V1
VPDI $4, V1, V1, V1
VLVGP R0, R1, V6
VLVGP R9, R6, V7
BR L_mid
L_b10:
MOVD $-16, R4
MOVD R6, R8
L_cj1:
MOVD 16(R4)(R3), R1
MOVD 24(R4)(R3), R7
MLGR R5, R0
MLGR R5, R6
VL 16(R4)(R2), V1
VPDI $4, V1, V1, V1
VLVGP R0, R1, V6
VLVGP R7, R8, V7
CMPBEQ R10, $0, L_end
L_top:
MOVD 32(R4)(R3), R1
MOVD 40(R4)(R3), R9
MLGR R5, R0
MLGR R5, R8
VACQ V6, V1, V0, V5
VACCCQ V6, V1, V0, V0
VACQ V5, V7, V2, V3
VACCCQ V5, V7, V2, V2
VPDI $4, V3, V3, V3
VL 32(R4)(R2), V1
VPDI $4, V1, V1, V1
VST V3, 16(R4)(R2)
VLVGP R0, R1, V6
VLVGP R9, R6, V7
L_mid:
MOVD 48(R4)(R3), R1
MOVD 56(R4)(R3), R7
MLGR R5, R0
MLGR R5, R6
VACQ V6, V1, V0, V5
VACCCQ V6, V1, V0, V0
VACQ V5, V7, V2, V3
VACCCQ V5, V7, V2, V2
VPDI $4, V3, V3, V3
VL 48(R4)(R2), V1
VPDI $4, V1, V1, V1
VST V3, 32(R4)(R2)
VLVGP R0, R1, V6
VLVGP R7, R8, V7
MOVD $32(R4), R4
BRCTG R10, L_top
L_end:
VACQ V6, V1, V0, V5
VACCCQ V6, V1, V0, V0
VACQ V5, V7, V2, V3
VACCCQ V5, V7, V2, V2
VPDI $4, V3, V3, V3
VST V3, 16(R2)(R4)
VAG V0, V2, V2
L_1:
VLGVG $1, V2, R2
ADDC R6, R2
MOVD R2, c+24(FP)
RET