blob: 46180f12f6dfa083d1e8f8a6dee20a8cafd51f3c [file]
// Copyright 2026 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build goexperiment.simd && wasm
package archsimd
var nn = [2]int64{-1 << 63, -1 << 63}
var f0s = [16]int8{-1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0}
var ff00s = [8]int16{-1, 0, -1, 0, -1, 0, -1, 0}
var ffff0000s = [4]int32{-1, 0, -1, 0}
// For unsigned comparison, the trick for converting it into
// signed comparisonm is to notice that the unsigned range is
// the same as the signed range plus 1 << bitwidth-1.
// And adding or subtracting the sign bit is the same as XORing
// it. Thus, XOR both sign bits and then used the signed
// comparison operations.
// Less return a mask vector of x[i] < y[i]
func (x Uint64x2) Less(y Uint64x2) Mask64x2 {
signs := LoadInt64x2Array(&nn)
ix := x.BitsToInt64().Xor(signs)
iy := y.BitsToInt64().Xor(signs)
return ix.Less(iy)
}
// LessEqual return a mask vector of x[i] <= y[i]
func (x Uint64x2) LessEqual(y Uint64x2) Mask64x2 {
signs := LoadInt64x2Array(&nn)
ix := x.BitsToInt64().Xor(signs)
iy := y.BitsToInt64().Xor(signs)
return ix.LessEqual(iy)
}
// Greater return a mask vector of x[i] > y[i]
func (x Uint64x2) Greater(y Uint64x2) Mask64x2 {
signs := LoadInt64x2Array(&nn)
ix := x.BitsToInt64().Xor(signs)
iy := y.BitsToInt64().Xor(signs)
return ix.Greater(iy)
}
// GreaterEqual return a mask vector of x[i] >= y[i]
func (x Uint64x2) GreaterEqual(y Uint64x2) Mask64x2 {
signs := LoadInt64x2Array(&nn)
ix := x.BitsToInt64().Xor(signs)
iy := y.BitsToInt64().Xor(signs)
return ix.GreaterEqual(iy)
}
// Max returns the elementswise maximum of elements in x and y
func (x Int64x2) Max(y Int64x2) Int64x2 {
mask := x.Greater(y).ToInt64x2()
return x.And(mask).Or(y.AndNot(mask))
}
// Min returns the elementswise minimum of elements in x and y
func (x Int64x2) Min(y Int64x2) Int64x2 {
mask := x.Less(y).ToInt64x2()
return x.And(mask).Or(y.AndNot(mask))
}
// Max returns the elementswise maximum of elements in x and y
func (x Uint64x2) Max(y Uint64x2) Uint64x2 {
mask := x.Greater(y).ToInt64x2().ToBits()
return x.And(mask).Or(y.AndNot(mask))
}
// Min returns the elementswise minimum of elements in x and y
func (x Uint64x2) Min(y Uint64x2) Uint64x2 {
mask := x.Less(y).ToInt64x2().ToBits()
return x.And(mask).Or(y.AndNot(mask))
}
// Mul returns the elementswise product of elements in x and y
func (x Int8x16) Mul(y Int8x16) Int8x16 {
// To obtain an 8-bit multiply, split the vectors into even and odd
// elements, shift odds into even position, widen elements in both
// vectors, multiply, discard high parts, realign the odd results
// and combine.
mask := LoadInt8x16Array(&f0s)
mask16 := mask.ToBits().ReshapeToUint16s()
xe := x.And(mask).ToBits().ReshapeToUint16s()
xo := x.AndNot(mask).ToBits().ReshapeToUint16s().ShiftAllRight(8)
ye := y.And(mask).ToBits().ReshapeToUint16s()
yo := y.AndNot(mask).ToBits().ReshapeToUint16s().ShiftAllRight(8)
pe := xe.Mul(ye).And(mask16)
po := xo.Mul(yo).And(mask16).ShiftAllLeft(8)
return pe.Or(po).ReshapeToUint8s().BitsToInt8()
}
// Mul returns the elementswise product of elements in x and y
func (x Uint8x16) Mul(y Uint8x16) Uint8x16 {
mask := LoadInt8x16Array(&f0s).ToBits()
mask16 := mask.ReshapeToUint16s()
xe := x.And(mask).ReshapeToUint16s()
xo := x.AndNot(mask).ReshapeToUint16s().ShiftAllRight(8)
ye := y.And(mask).ReshapeToUint16s()
yo := y.AndNot(mask).ReshapeToUint16s().ShiftAllRight(8)
pe := xe.Mul(ye).And(mask16)
po := xo.Mul(yo).And(mask16).ShiftAllLeft(8)
return pe.Or(po).ReshapeToUint8s()
}
// OnesCount returns the number of set bits in each vector element
func (x Int16x8) OnesCount() Int16x8 {
mask := LoadInt8x16Array(&f0s)
c := x.ToBits().ReshapeToUint8s().BitsToInt8().OnesCount() // per-byte counts
ce := c.And(mask).ToBits().ReshapeToUint16s().BitsToInt16() // even-element per-byte counts, as 16-bit elements
co := c.AndNot(mask).ToBits().ReshapeToUint16s().BitsToInt16().ShiftAllRight(8) // odd-element per-byte counts, as 16-bit elements, aligned
return ce.Add(co) // return their elementwise sum
}
// OnesCount returns the number of set bits in each vector element
func (x Int32x4) OnesCount() Int32x4 {
mask := LoadInt8x16Array(&f0s)
c := x.ToBits().ReshapeToUint8s().BitsToInt8().OnesCount() // per-byte counts
ce := c.And(mask).ToBits().ReshapeToUint16s().BitsToInt16() // even-element per-byte counts, as 16-bit elements
co := c.AndNot(mask).ToBits().ReshapeToUint16s().BitsToInt16().ShiftAllRight(8) // odd-element per-byte counts, as 16-bit elements, aligned
mask16 := LoadInt16x8Array(&ff00s)
y := ce.Add(co) // per int16 counts, etc.
ye := y.And(mask16).ToBits().ReshapeToUint32s().BitsToInt32()
yo := y.AndNot(mask16).ToBits().ReshapeToUint32s().BitsToInt32().ShiftAllRight(16)
return ye.Add(yo)
}
// OnesCount returns the number of set bits in each vector element
func (x Int64x2) OnesCount() Int64x2 {
mask := LoadInt8x16Array(&f0s)
c := x.ToBits().ReshapeToUint8s().BitsToInt8().OnesCount()
ce := c.And(mask).ToBits().ReshapeToUint16s().BitsToInt16()
co := c.AndNot(mask).ToBits().ReshapeToUint16s().BitsToInt16().ShiftAllRight(8)
mask16 := LoadInt16x8Array(&ff00s)
y := ce.Add(co)
ye := y.And(mask16).ToBits().ReshapeToUint32s().BitsToInt32()
yo := y.AndNot(mask16).ToBits().ReshapeToUint32s().BitsToInt32().ShiftAllRight(16)
mask32 := LoadInt32x4Array(&ffff0000s)
z := ye.Add(yo)
ze := z.And(mask32).ToBits().ReshapeToUint64s().BitsToInt64()
zo := z.AndNot(mask32).ToBits().ReshapeToUint64s().BitsToInt64().ShiftAllRight(32)
return ze.Add(zo)
}
// OnesCount returns the number of set bits in each vector element
func (x Uint8x16) OnesCount() Uint8x16 {
return x.BitsToInt8().OnesCount().ToBits()
}
// OnesCount returns the number of set bits in each vector element
func (x Uint16x8) OnesCount() Uint16x8 {
return x.BitsToInt16().OnesCount().ToBits()
}
// OnesCount returns the number of set bits in each vector element
func (x Uint32x4) OnesCount() Uint32x4 {
return x.BitsToInt32().OnesCount().ToBits()
}
// OnesCount returns the number of set bits in each vector element
func (x Uint64x2) OnesCount() Uint64x2 {
return x.BitsToInt64().OnesCount().ToBits()
}
// CarrylessMultiplyEven computes the carryless
// multiplications of selected even halves of the elements of x and y.
//
// A carryless multiplication uses bitwise XOR instead of
// add-with-carry, for example (in base two):
//
// 11 * 11 = 11 * (10 ^ 1) = (11 * 10) ^ (11 * 1) = 110 ^ 11 = 101
//
// This also models multiplication of polynomials with coefficients
// from GF(2) -- 11 * 11 models (x+1)*(x+1) = x**2 + (1^1)x + 1 =
// x**2 + 0x + 1 = x**2 + 1 modeled by 101. (Note that "+" adds
// polynomial terms, but coefficients "add" with XOR.)
//
// Emulated
func (x Uint64x2) CarrylessMultiplyEven(y Uint64x2) Uint64x2 {
return x.carrylessMultiply(y)
}
// CarrylessMultiplyOdd computes the carryless
// multiplications of selected odd halves of the elements of x and y.
//
// A carryless multiplication uses bitwise XOR instead of
// add-with-carry, for example (in base two):
//
// 11 * 11 = 11 * (10 ^ 1) = (11 * 10) ^ (11 * 1) = 110 ^ 11 = 101
//
// This also models multiplication of polynomials with coefficients
// from GF(2) -- 11 * 11 models (x+1)*(x+1) = x**2 + (1^1)x + 1 =
// x**2 + 0x + 1 = x**2 + 1 modeled by 101. (Note that "+" adds
// polynomial terms, but coefficients "add" with XOR.)
//
// Emulated
func (x Uint64x2) CarrylessMultiplyOdd(y Uint64x2) Uint64x2 {
x = x.SetElem(0, x.GetElem(1))
y = y.SetElem(0, x.GetElem(1))
return x.carrylessMultiply(y)
}