blob: bf64565d5c9fc84a7a09fb12fe30f6a47e9bb7ba [file] [log] [blame]
// Copyright 2023 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package main
import (
"strconv"
. "github.com/mmcloughlin/avo/build"
. "github.com/mmcloughlin/avo/operand"
. "github.com/mmcloughlin/avo/reg"
)
//go:generate go run . -out ../nat_amd64.s -pkg bigmod
func main() {
Package("crypto/internal/bigmod")
ConstraintExpr("!purego")
addMulVVW(1024)
addMulVVW(1536)
addMulVVW(2048)
Generate()
}
func addMulVVW(bits int) {
if bits%64 != 0 {
panic("bit size unsupported")
}
Implement("addMulVVW" + strconv.Itoa(bits))
CMPB(Mem{Symbol: Symbol{Name: "·supportADX"}, Base: StaticBase}, Imm(1))
JEQ(LabelRef("adx"))
z := Mem{Base: Load(Param("z"), GP64())}
x := Mem{Base: Load(Param("x"), GP64())}
y := Load(Param("y"), GP64())
carry := GP64()
XORQ(carry, carry) // zero out carry
for i := 0; i < bits/64; i++ {
Comment("Iteration " + strconv.Itoa(i))
hi, lo := RDX, RAX // implicit MULQ inputs and outputs
MOVQ(x.Offset(i*8), lo)
MULQ(y)
ADDQ(z.Offset(i*8), lo)
ADCQ(Imm(0), hi)
ADDQ(carry, lo)
ADCQ(Imm(0), hi)
MOVQ(hi, carry)
MOVQ(lo, z.Offset(i*8))
}
Store(carry, ReturnIndex(0))
RET()
Label("adx")
// The ADX strategy implements the following function, where c1 and c2 are
// the overflow and the carry flag respectively.
//
// func addMulVVW(z, x []uint, y uint) (carry uint) {
// var c1, c2 uint
// for i := range z {
// hi, lo := bits.Mul(x[i], y)
// lo, c1 = bits.Add(lo, z[i], c1)
// z[i], c2 = bits.Add(lo, carry, c2)
// carry = hi
// }
// return carry + c1 + c2
// }
//
// The loop is fully unrolled and the hi / carry registers are alternated
// instead of introducing a MOV.
z = Mem{Base: Load(Param("z"), GP64())}
x = Mem{Base: Load(Param("x"), GP64())}
Load(Param("y"), RDX) // implicit source of MULXQ
carry = GP64()
XORQ(carry, carry) // zero out carry
z0 := GP64()
XORQ(z0, z0) // unset flags and zero out z0
for i := 0; i < bits/64; i++ {
hi, lo := GP64(), GP64()
Comment("Iteration " + strconv.Itoa(i))
MULXQ(x.Offset(i*8), lo, hi)
ADCXQ(carry, lo)
ADOXQ(z.Offset(i*8), lo)
MOVQ(lo, z.Offset(i*8))
i++
Comment("Iteration " + strconv.Itoa(i))
MULXQ(x.Offset(i*8), lo, carry)
ADCXQ(hi, lo)
ADOXQ(z.Offset(i*8), lo)
MOVQ(lo, z.Offset(i*8))
}
Comment("Add back carry flags and return")
ADCXQ(z0, carry)
ADOXQ(z0, carry)
Store(carry, ReturnIndex(0))
RET()
}