blob: a9625582a9bee82c54056572817e948e830dcece [file]
// Copyright 2026 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build goexperiment.simd && !(amd64 || wasm || arm64)
package simd
import (
"fmt"
"math"
"math/bits"
)
// VectorSize returns the bit length of the emulated vector (fixed to 128).
func VectorBitSize() int {
return 128
}
// Emulated returns whether simd is emulated.
func Emulated() bool {
return true
}
// HasHardwareCarrylessMultiply returns whether this platform
// has a hardware-implemented version of carryless multiply.
// With default GODEBUG=simd settings, if this is false,
// it is emulated and merely slow, but with non-default settings
// this can indicate the possibility of a missing instruction
// that will fail ("SIGILL") if it is executed.
func HasHardwareCarrylessMultiply() bool {
return false
}
type _simd struct {
_ [0]func(*_simd) *_simd
}
// Int8s represents a 128-bit vector of 16 int8 elements.
type Int8s struct {
_ _simd
a, b uint64
}
// LoadInt8s loads a slice of int8 into an Int8s vector.
func LoadInt8s(s []int8) Int8s {
var a, b uint64
for i := 0; i < 16; i++ {
val := uint64(uint8(s[i]))
if i < 8 {
a |= val << (8 * i)
} else {
b |= val << (8 * (i - 8))
}
}
return Int8s{a: a, b: b}
}
// LoadInt8sPart loads a partial slice of int8 into an Int8s vector.
func LoadInt8sPart(s []int8) (Int8s, int) {
var a, b uint64
n := len(s)
if n > 16 {
n = 16
}
for i := 0; i < n; i++ {
val := uint64(uint8(s[i]))
if i < 8 {
a |= val << (8 * i)
} else {
b |= val << (8 * (i - 8))
}
}
return Int8s{a: a, b: b}, n
}
func (x Int8s) get(i int) int8 {
if i < 8 {
return int8(x.a >> (8 * i))
}
return int8(x.b >> (8 * (i - 8)))
}
func (x *Int8s) set(i int, v int8) {
val := uint64(uint8(v))
if i < 8 {
mask := uint64(0xff) << (8 * i)
x.a = (x.a &^ mask) | (val << (8 * i))
} else {
mask := uint64(0xff) << (8 * (i - 8))
x.b = (x.b &^ mask) | (val << (8 * (i - 8)))
}
}
// Abs returns the element-wise absolute value of x.
func (x Int8s) Abs() Int8s {
var res Int8s
for i := 0; i < 16; i++ {
v := x.get(i)
if v < 0 {
res.set(i, -v)
} else {
res.set(i, v)
}
}
return res
}
// Add returns the element-wise sum of x and y.
func (x Int8s) Add(y Int8s) Int8s {
var res Int8s
for i := 0; i < 16; i++ {
res.set(i, x.get(i)+y.get(i))
}
return res
}
// AddSaturated returns the element-wise saturated sum of x and y.
func (x Int8s) AddSaturated(y Int8s) Int8s {
var res Int8s
for i := 0; i < 16; i++ {
sum := int(x.get(i)) + int(y.get(i))
if sum > math.MaxInt8 {
res.set(i, math.MaxInt8)
} else if sum < math.MinInt8 {
res.set(i, math.MinInt8)
} else {
res.set(i, int8(sum))
}
}
return res
}
// And returns the bitwise AND of x and y.
func (x Int8s) And(y Int8s) Int8s {
return Int8s{a: x.a & y.a, b: x.b & y.b}
}
// AndNot returns the bitwise AND NOT of x and y.
func (x Int8s) AndNot(y Int8s) Int8s {
return Int8s{a: x.a &^ y.a, b: x.b &^ y.b}
}
// Equal returns a mask indicating where x and y are equal.
func (x Int8s) Equal(y Int8s) Mask8s {
var res Mask8s
for i := 0; i < 16; i++ {
if x.get(i) == y.get(i) {
res.set(i, true)
}
}
return res
}
// Greater returns a mask indicating where x is greater than y.
func (x Int8s) Greater(y Int8s) Mask8s {
var res Mask8s
for i := 0; i < 16; i++ {
if x.get(i) > y.get(i) {
res.set(i, true)
}
}
return res
}
// GreaterEqual returns a mask indicating where x is greater than or equal to y.
func (x Int8s) GreaterEqual(y Int8s) Mask8s {
var res Mask8s
for i := 0; i < 16; i++ {
if x.get(i) >= y.get(i) {
res.set(i, true)
}
}
return res
}
// Less returns a mask indicating where x is less than y.
func (x Int8s) Less(y Int8s) Mask8s {
var res Mask8s
for i := 0; i < 16; i++ {
if x.get(i) < y.get(i) {
res.set(i, true)
}
}
return res
}
// LessEqual returns a mask indicating where x is less than or equal to y.
func (x Int8s) LessEqual(y Int8s) Mask8s {
var res Mask8s
for i := 0; i < 16; i++ {
if x.get(i) <= y.get(i) {
res.set(i, true)
}
}
return res
}
// NotEqual returns a mask indicating where x and y are not equal.
func (x Int8s) NotEqual(y Int8s) Mask8s {
var res Mask8s
for i := 0; i < 16; i++ {
if x.get(i) != y.get(i) {
res.set(i, true)
}
}
return res
}
// Len returns the number of elements in the vector.
func (x Int8s) Len() int {
return 16
}
// Masked returns a new vector with elements from x where mask is true, and zero elsewhere.
func (x Int8s) Masked(mask Mask8s) Int8s {
return Int8s{a: x.a & mask.a, b: x.b & mask.b}
}
// Max returns the element-wise maximum of x and y.
func (x Int8s) Max(y Int8s) Int8s {
var res Int8s
for i := 0; i < 16; i++ {
vx := x.get(i)
vy := y.get(i)
if vx > vy {
res.set(i, vx)
} else {
res.set(i, vy)
}
}
return res
}
// Mul returns the element-wise product of x and y.
func (x Int8s) Mul(y Int8s) Int8s {
var res Int8s
for i := 0; i < 16; i++ {
res.set(i, x.get(i)*y.get(i))
}
return res
}
// IfElse returns a new vector with elements from x where mask is true, and y where mask is false.
func (x Int8s) IfElse(mask Mask8s, y Int8s) Int8s {
return Int8s{
a: (x.a & mask.a) | (y.a &^ mask.a),
b: (x.b & mask.b) | (y.b &^ mask.b),
}
}
// Min returns the element-wise minimum of x and y.
func (x Int8s) Min(y Int8s) Int8s {
var res Int8s
for i := 0; i < 16; i++ {
vx := x.get(i)
vy := y.get(i)
if vx < vy {
res.set(i, vx)
} else {
res.set(i, vy)
}
}
return res
}
// Neg returns the element-wise negation of x.
func (x Int8s) Neg() Int8s {
var res Int8s
for i := 0; i < 16; i++ {
res.set(i, -x.get(i))
}
return res
}
// Not returns the bitwise NOT of x.
func (x Int8s) Not() Int8s {
return Int8s{a: ^x.a, b: ^x.b}
}
// Or returns the bitwise OR of x and y.
func (x Int8s) Or(y Int8s) Int8s {
return Int8s{a: x.a | y.a, b: x.b | y.b}
}
// Store stores the vector elements into the slice s.
func (x Int8s) Store(s []int8) {
for i := 0; i < 16 && i < len(s); i++ {
s[i] = x.get(i)
}
}
// StorePart stores a partial vector into the slice s.
func (x Int8s) StorePart(s []int8) int {
x.Store(s)
return min(len(s), x.Len())
}
// String returns a string representation of the vector.
func (x Int8s) String() string {
var parts [16]int8
for i := 0; i < 16; i++ {
parts[i] = x.get(i)
}
return fmt.Sprint(parts)
}
// Sub returns the element-wise difference of x and y.
func (x Int8s) Sub(y Int8s) Int8s {
var res Int8s
for i := 0; i < 16; i++ {
res.set(i, x.get(i)-y.get(i))
}
return res
}
// SubSaturated returns the element-wise saturated difference of x and y.
func (x Int8s) SubSaturated(y Int8s) Int8s {
var res Int8s
for i := 0; i < 16; i++ {
diff := int(x.get(i)) - int(y.get(i))
if diff > math.MaxInt8 {
res.set(i, math.MaxInt8)
} else if diff < math.MinInt8 {
res.set(i, math.MinInt8)
} else {
res.set(i, int8(diff))
}
}
return res
}
// ToMask returns a mask representation of the vector.
func (x Int8s) ToMask() Mask8s {
var res Mask8s
for i := 0; i < 16; i++ {
if x.get(i) != 0 {
res.set(i, true)
}
}
return res
}
// Xor returns the bitwise XOR of x and y.
func (x Int8s) Xor(y Int8s) Int8s {
return Int8s{a: x.a ^ y.a, b: x.b ^ y.b}
}
// ConvertToUint8 converts the vector elements to uint8.
func (x Int8s) ConvertToUint8() Uint8s {
return Uint8s{a: x.a, b: x.b}
}
// ToBits reinterprets the vector bits as a Uint8s vector.
func (x Int8s) ToBits() Uint8s {
return Uint8s{a: x.a, b: x.b}
}
// Int16s represents a 128-bit vector of 8 int16 elements.
type Int16s struct {
_ _simd
a, b uint64
}
// LoadInt16s loads a slice of int16 into an Int16s vector.
func LoadInt16s(s []int16) Int16s {
var a, b uint64
for i := 0; i < 8; i++ {
val := uint64(uint16(s[i]))
if i < 4 {
a |= val << (16 * i)
} else {
b |= val << (16 * (i - 4))
}
}
return Int16s{a: a, b: b}
}
// LoadInt16sPart loads a partial slice of int16 into an Int16s vector.
func LoadInt16sPart(s []int16) (Int16s, int) {
var a, b uint64
n := len(s)
if n > 8 {
n = 8
}
for i := 0; i < n; i++ {
val := uint64(uint16(s[i]))
if i < 4 {
a |= val << (16 * i)
} else {
b |= val << (16 * (i - 4))
}
}
return Int16s{a: a, b: b}, n
}
func (x Int16s) get(i int) int16 {
if i < 4 {
return int16(x.a >> (16 * i))
}
return int16(x.b >> (16 * (i - 4)))
}
func (x *Int16s) set(i int, v int16) {
val := uint64(uint16(v))
if i < 4 {
mask := uint64(0xffff) << (16 * i)
x.a = (x.a &^ mask) | (val << (16 * i))
} else {
mask := uint64(0xffff) << (16 * (i - 4))
x.b = (x.b &^ mask) | (val << (16 * (i - 4)))
}
}
// Abs returns the element-wise absolute value of x.
func (x Int16s) Abs() Int16s {
var res Int16s
for i := 0; i < 8; i++ {
v := x.get(i)
if v < 0 {
res.set(i, -v)
} else {
res.set(i, v)
}
}
return res
}
// Add returns the element-wise sum of x and y.
func (x Int16s) Add(y Int16s) Int16s {
var res Int16s
for i := 0; i < 8; i++ {
res.set(i, x.get(i)+y.get(i))
}
return res
}
// AddSaturated returns the element-wise saturated sum of x and y.
func (x Int16s) AddSaturated(y Int16s) Int16s {
var res Int16s
for i := 0; i < 8; i++ {
sum := int(x.get(i)) + int(y.get(i))
if sum > math.MaxInt16 {
res.set(i, math.MaxInt16)
} else if sum < math.MinInt16 {
res.set(i, math.MinInt16)
} else {
res.set(i, int16(sum))
}
}
return res
}
// And returns the bitwise AND of x and y.
func (x Int16s) And(y Int16s) Int16s {
return Int16s{a: x.a & y.a, b: x.b & y.b}
}
// AndNot returns the bitwise AND NOT of x and y.
func (x Int16s) AndNot(y Int16s) Int16s {
return Int16s{a: x.a &^ y.a, b: x.b &^ y.b}
}
// Equal returns a mask indicating where x and y are equal.
func (x Int16s) Equal(y Int16s) Mask16s {
var res Mask16s
for i := 0; i < 8; i++ {
if x.get(i) == y.get(i) {
res.set(i, true)
}
}
return res
}
// Greater returns a mask indicating where x is greater than y.
func (x Int16s) Greater(y Int16s) Mask16s {
var res Mask16s
for i := 0; i < 8; i++ {
if x.get(i) > y.get(i) {
res.set(i, true)
}
}
return res
}
// GreaterEqual returns a mask indicating where x is greater than or equal to y.
func (x Int16s) GreaterEqual(y Int16s) Mask16s {
var res Mask16s
for i := 0; i < 8; i++ {
if x.get(i) >= y.get(i) {
res.set(i, true)
}
}
return res
}
// Less returns a mask indicating where x is less than y.
func (x Int16s) Less(y Int16s) Mask16s {
var res Mask16s
for i := 0; i < 8; i++ {
if x.get(i) < y.get(i) {
res.set(i, true)
}
}
return res
}
// LessEqual returns a mask indicating where x is less than or equal to y.
func (x Int16s) LessEqual(y Int16s) Mask16s {
var res Mask16s
for i := 0; i < 8; i++ {
if x.get(i) <= y.get(i) {
res.set(i, true)
}
}
return res
}
// NotEqual returns a mask indicating where x and y are not equal.
func (x Int16s) NotEqual(y Int16s) Mask16s {
var res Mask16s
for i := 0; i < 8; i++ {
if x.get(i) != y.get(i) {
res.set(i, true)
}
}
return res
}
// Len returns the number of elements in the vector.
func (x Int16s) Len() int {
return 8
}
// Masked returns a new vector with elements from x where mask is true, and zero elsewhere.
func (x Int16s) Masked(mask Mask16s) Int16s {
return Int16s{a: x.a & mask.a, b: x.b & mask.b}
}
// Max returns the element-wise maximum of x and y.
func (x Int16s) Max(y Int16s) Int16s {
var res Int16s
for i := 0; i < 8; i++ {
vx := x.get(i)
vy := y.get(i)
if vx > vy {
res.set(i, vx)
} else {
res.set(i, vy)
}
}
return res
}
// IfElse returns a new vector with elements from x where mask is true, and y where mask is false.
func (x Int16s) IfElse(mask Mask16s, y Int16s) Int16s {
return Int16s{
a: (x.a & mask.a) | (y.a &^ mask.a),
b: (x.b & mask.b) | (y.b &^ mask.b),
}
}
// Min returns the element-wise minimum of x and y.
func (x Int16s) Min(y Int16s) Int16s {
var res Int16s
for i := 0; i < 8; i++ {
vx := x.get(i)
vy := y.get(i)
if vx < vy {
res.set(i, vx)
} else {
res.set(i, vy)
}
}
return res
}
// Mul returns the element-wise product of x and y.
func (x Int16s) Mul(y Int16s) Int16s {
var res Int16s
for i := 0; i < 8; i++ {
res.set(i, x.get(i)*y.get(i))
}
return res
}
// Neg returns the element-wise negation of x.
func (x Int16s) Neg() Int16s {
var res Int16s
for i := 0; i < 8; i++ {
res.set(i, -x.get(i))
}
return res
}
// Not returns the bitwise NOT of x.
func (x Int16s) Not() Int16s {
return Int16s{a: ^x.a, b: ^x.b}
}
// Or returns the bitwise OR of x and y.
func (x Int16s) Or(y Int16s) Int16s {
return Int16s{a: x.a | y.a, b: x.b | y.b}
}
// ShiftAllLeft shifts all elements left by y bits.
func (x Int16s) ShiftAllLeft(y uint8) Int16s {
var res Int16s
for i := 0; i < 8; i++ {
res.set(i, x.get(i)<<y)
}
return res
}
// ShiftAllRight shifts all elements right by y bits.
func (x Int16s) ShiftAllRight(y uint8) Int16s {
var res Int16s
for i := 0; i < 8; i++ {
res.set(i, x.get(i)>>y)
}
return res
}
// RotateAllLeft rotates all elements left by dist bits.
func (x Int16s) RotateAllLeft(dist uint64) Int16s {
var res Int16s
d := dist & 15
for i := 0; i < 8; i++ {
u := uint16(x.get(i))
r := (u << d) | (u >> ((16 - d) & 15))
res.set(i, int16(r))
}
return res
}
// RotateAllRight rotates all elements right by dist bits.
func (x Int16s) RotateAllRight(dist uint64) Int16s {
var res Int16s
d := dist & 15
for i := 0; i < 8; i++ {
u := uint16(x.get(i))
r := (u >> d) | (u << ((16 - d) & 15))
res.set(i, int16(r))
}
return res
}
// Store stores the vector elements into the slice s.
func (x Int16s) Store(s []int16) {
for i := 0; i < 8 && i < len(s); i++ {
s[i] = x.get(i)
}
}
// StorePart stores a partial vector into the slice s.
func (x Int16s) StorePart(s []int16) int {
x.Store(s)
return min(len(s), x.Len())
}
// String returns a string representation of the vector.
func (x Int16s) String() string {
var parts [8]int16
for i := 0; i < 8; i++ {
parts[i] = x.get(i)
}
return fmt.Sprint(parts)
}
// Sub returns the element-wise difference of x and y.
func (x Int16s) Sub(y Int16s) Int16s {
var res Int16s
for i := 0; i < 8; i++ {
res.set(i, x.get(i)-y.get(i))
}
return res
}
// SubSaturated returns the element-wise saturated difference of x and y.
func (x Int16s) SubSaturated(y Int16s) Int16s {
var res Int16s
for i := 0; i < 8; i++ {
diff := int(x.get(i)) - int(y.get(i))
if diff > math.MaxInt16 {
res.set(i, math.MaxInt16)
} else if diff < math.MinInt16 {
res.set(i, math.MinInt16)
} else {
res.set(i, int16(diff))
}
}
return res
}
// ToMask returns a mask representation of the vector.
func (x Int16s) ToMask() Mask16s {
var res Mask16s
for i := 0; i < 8; i++ {
if x.get(i) != 0 {
res.set(i, true)
}
}
return res
}
// Xor returns the bitwise XOR of x and y.
func (x Int16s) Xor(y Int16s) Int16s {
return Int16s{a: x.a ^ y.a, b: x.b ^ y.b}
}
// ConvertToUint16 converts the vector elements to uint16.
func (x Int16s) ConvertToUint16() Uint16s {
return Uint16s{a: x.a, b: x.b}
}
// ToBits reinterprets the vector bits as a Uint16s vector.
func (x Int16s) ToBits() Uint16s {
return Uint16s{a: x.a, b: x.b}
}
// Int32s represents a 128-bit vector of 4 int32 elements.
type Int32s struct {
_ _simd
a, b uint64
}
// LoadInt32s loads a slice of int32 into an Int32s vector.
func LoadInt32s(s []int32) Int32s {
var a, b uint64
for i := 0; i < 4; i++ {
val := uint64(uint32(s[i]))
if i < 2 {
a |= val << (32 * i)
} else {
b |= val << (32 * (i - 2))
}
}
return Int32s{a: a, b: b}
}
// LoadInt32sPart loads a partial slice of int32 into an Int32s vector.
func LoadInt32sPart(s []int32) (Int32s, int) {
var a, b uint64
n := len(s)
if n > 4 {
n = 4
}
for i := 0; i < n; i++ {
val := uint64(uint32(s[i]))
if i < 2 {
a |= val << (32 * i)
} else {
b |= val << (32 * (i - 2))
}
}
return Int32s{a: a, b: b}, n
}
func (x Int32s) get(i int) int32 {
if i < 2 {
return int32(x.a >> (32 * i))
}
return int32(x.b >> (32 * (i - 2)))
}
func (x *Int32s) set(i int, v int32) {
val := uint64(uint32(v))
if i < 2 {
mask := uint64(0xffffffff) << (32 * i)
x.a = (x.a &^ mask) | (val << (32 * i))
} else {
mask := uint64(0xffffffff) << (32 * (i - 2))
x.b = (x.b &^ mask) | (val << (32 * (i - 2)))
}
}
// Abs returns the element-wise absolute value of x.
func (x Int32s) Abs() Int32s {
var res Int32s
for i := 0; i < 4; i++ {
v := x.get(i)
if v < 0 {
res.set(i, -v)
} else {
res.set(i, v)
}
}
return res
}
// Add returns the element-wise sum of x and y.
func (x Int32s) Add(y Int32s) Int32s {
var res Int32s
for i := 0; i < 4; i++ {
res.set(i, x.get(i)+y.get(i))
}
return res
}
// And returns the bitwise AND of x and y.
func (x Int32s) And(y Int32s) Int32s {
return Int32s{a: x.a & y.a, b: x.b & y.b}
}
// AndNot returns the bitwise AND NOT of x and y.
func (x Int32s) AndNot(y Int32s) Int32s {
return Int32s{a: x.a &^ y.a, b: x.b &^ y.b}
}
// ConvertToFloat32 converts the vector elements to float32.
func (x Int32s) ConvertToFloat32() Float32s {
var res Float32s
for i := 0; i < 4; i++ {
res.set(i, float32(x.get(i)))
}
return res
}
// Equal returns a mask indicating where x and y are equal.
func (x Int32s) Equal(y Int32s) Mask32s {
var res Mask32s
for i := 0; i < 4; i++ {
if x.get(i) == y.get(i) {
res.set(i, true)
}
}
return res
}
// Greater returns a mask indicating where x is greater than y.
func (x Int32s) Greater(y Int32s) Mask32s {
var res Mask32s
for i := 0; i < 4; i++ {
if x.get(i) > y.get(i) {
res.set(i, true)
}
}
return res
}
// GreaterEqual returns a mask indicating where x is greater than or equal to y.
func (x Int32s) GreaterEqual(y Int32s) Mask32s {
var res Mask32s
for i := 0; i < 4; i++ {
if x.get(i) >= y.get(i) {
res.set(i, true)
}
}
return res
}
// Less returns a mask indicating where x is less than y.
func (x Int32s) Less(y Int32s) Mask32s {
var res Mask32s
for i := 0; i < 4; i++ {
if x.get(i) < y.get(i) {
res.set(i, true)
}
}
return res
}
// LessEqual returns a mask indicating where x is less than or equal to y.
func (x Int32s) LessEqual(y Int32s) Mask32s {
var res Mask32s
for i := 0; i < 4; i++ {
if x.get(i) <= y.get(i) {
res.set(i, true)
}
}
return res
}
// NotEqual returns a mask indicating where x and y are not equal.
func (x Int32s) NotEqual(y Int32s) Mask32s {
var res Mask32s
for i := 0; i < 4; i++ {
if x.get(i) != y.get(i) {
res.set(i, true)
}
}
return res
}
// Len returns the number of elements in the vector.
func (x Int32s) Len() int {
return 4
}
// Masked returns a new vector with elements from x where mask is true, and zero elsewhere.
func (x Int32s) Masked(mask Mask32s) Int32s {
return Int32s{a: x.a & mask.a, b: x.b & mask.b}
}
// Max returns the element-wise maximum of x and y.
func (x Int32s) Max(y Int32s) Int32s {
var res Int32s
for i := 0; i < 4; i++ {
vx := x.get(i)
vy := y.get(i)
if vx > vy {
res.set(i, vx)
} else {
res.set(i, vy)
}
}
return res
}
// IfElse returns a new vector with elements from x where mask is true, and y where mask is false.
func (x Int32s) IfElse(mask Mask32s, y Int32s) Int32s {
return Int32s{
a: (x.a & mask.a) | (y.a &^ mask.a),
b: (x.b & mask.b) | (y.b &^ mask.b),
}
}
// Min returns the element-wise minimum of x and y.
func (x Int32s) Min(y Int32s) Int32s {
var res Int32s
for i := 0; i < 4; i++ {
vx := x.get(i)
vy := y.get(i)
if vx < vy {
res.set(i, vx)
} else {
res.set(i, vy)
}
}
return res
}
// Mul returns the element-wise product of x and y.
func (x Int32s) Mul(y Int32s) Int32s {
var res Int32s
for i := 0; i < 4; i++ {
res.set(i, x.get(i)*y.get(i))
}
return res
}
// Neg returns the element-wise negation of x.
func (x Int32s) Neg() Int32s {
var res Int32s
for i := 0; i < 4; i++ {
res.set(i, -x.get(i))
}
return res
}
// Not returns the bitwise NOT of x.
func (x Int32s) Not() Int32s {
return Int32s{a: ^x.a, b: ^x.b}
}
// Or returns the bitwise OR of x and y.
func (x Int32s) Or(y Int32s) Int32s {
return Int32s{a: x.a | y.a, b: x.b | y.b}
}
// ShiftAllLeft shifts all elements left by y bits.
func (x Int32s) ShiftAllLeft(y uint8) Int32s {
var res Int32s
for i := 0; i < 4; i++ {
res.set(i, x.get(i)<<y)
}
return res
}
// ShiftAllRight shifts all elements right by y bits.
func (x Int32s) ShiftAllRight(y uint8) Int32s {
var res Int32s
for i := 0; i < 4; i++ {
res.set(i, x.get(i)>>y)
}
return res
}
// RotateAllLeft rotates all elements left by dist bits.
func (x Int32s) RotateAllLeft(dist uint64) Int32s {
var res Int32s
d := dist & 31
for i := 0; i < 4; i++ {
u := uint32(x.get(i))
r := (u << d) | (u >> ((32 - d) & 31))
res.set(i, int32(r))
}
return res
}
// RotateAllRight rotates all elements right by dist bits.
func (x Int32s) RotateAllRight(dist uint64) Int32s {
var res Int32s
d := dist & 31
for i := 0; i < 4; i++ {
u := uint32(x.get(i))
r := (u >> d) | (u << ((32 - d) & 31))
res.set(i, int32(r))
}
return res
}
// Store stores the vector elements into the slice s.
func (x Int32s) Store(s []int32) {
for i := 0; i < 4 && i < len(s); i++ {
s[i] = x.get(i)
}
}
// StorePart stores a partial vector into the slice s.
func (x Int32s) StorePart(s []int32) int {
x.Store(s)
return min(len(s), x.Len())
}
// String returns a string representation of the vector.
func (x Int32s) String() string {
var parts [4]int32
for i := 0; i < 4; i++ {
parts[i] = x.get(i)
}
return fmt.Sprint(parts)
}
// Sub returns the element-wise difference of x and y.
func (x Int32s) Sub(y Int32s) Int32s {
var res Int32s
for i := 0; i < 4; i++ {
res.set(i, x.get(i)-y.get(i))
}
return res
}
// ToMask returns a mask representation of the vector.
func (x Int32s) ToMask() Mask32s {
var res Mask32s
for i := 0; i < 4; i++ {
if x.get(i) != 0 {
res.set(i, true)
}
}
return res
}
// Xor returns the bitwise XOR of x and y.
func (x Int32s) Xor(y Int32s) Int32s {
return Int32s{a: x.a ^ y.a, b: x.b ^ y.b}
}
// ConvertToUint32 converts the vector elements to uint32.
func (x Int32s) ConvertToUint32() Uint32s {
return Uint32s{a: x.a, b: x.b}
}
// ToBits reinterprets the vector bits as a Uint32s vector.
func (x Int32s) ToBits() Uint32s {
return Uint32s{a: x.a, b: x.b}
}
// Int64s represents a 128-bit vector of 2 int64 elements.
type Int64s struct {
_ _simd
a, b uint64
}
// LoadInt64s loads a slice of int64 into an Int64s vector.
func LoadInt64s(s []int64) Int64s {
var a, b uint64
a = uint64(s[0])
b = uint64(s[1])
return Int64s{a: a, b: b}
}
// LoadInt64sPart loads a partial slice of int64 into an Int64s vector.
func LoadInt64sPart(s []int64) (Int64s, int) {
var a, b uint64
if len(s) > 0 {
a = uint64(s[0])
}
if len(s) > 1 {
b = uint64(s[1])
}
return Int64s{a: a, b: b}, len(s)
}
func (x Int64s) get(i int) int64 {
if i == 0 {
return int64(x.a)
}
return int64(x.b)
}
func (x *Int64s) set(i int, v int64) {
if i == 0 {
x.a = uint64(v)
} else {
x.b = uint64(v)
}
}
// Add returns the element-wise sum of x and y.
func (x Int64s) Add(y Int64s) Int64s {
return Int64s{a: x.a + y.a, b: x.b + y.b}
}
// And returns the bitwise AND of x and y.
func (x Int64s) And(y Int64s) Int64s {
return Int64s{a: x.a & y.a, b: x.b & y.b}
}
// AndNot returns the bitwise AND NOT of x and y.
func (x Int64s) AndNot(y Int64s) Int64s {
return Int64s{a: x.a &^ y.a, b: x.b &^ y.b}
}
// Equal returns a mask indicating where x and y are equal.
func (x Int64s) Equal(y Int64s) Mask64s {
var res Mask64s
if x.a == y.a {
res.a = ^uint64(0)
}
if x.b == y.b {
res.b = ^uint64(0)
}
return res
}
// Greater returns a mask indicating where x is greater than y.
func (x Int64s) Greater(y Int64s) Mask64s {
var res Mask64s
if int64(x.a) > int64(y.a) {
res.a = ^uint64(0)
}
if int64(x.b) > int64(y.b) {
res.b = ^uint64(0)
}
return res
}
// GreaterEqual returns a mask indicating where x is greater than or equal to y.
func (x Int64s) GreaterEqual(y Int64s) Mask64s {
var res Mask64s
if int64(x.a) >= int64(y.a) {
res.a = ^uint64(0)
}
if int64(x.b) >= int64(y.b) {
res.b = ^uint64(0)
}
return res
}
// Less returns a mask indicating where x is less than y.
func (x Int64s) Less(y Int64s) Mask64s {
var res Mask64s
if int64(x.a) < int64(y.a) {
res.a = ^uint64(0)
}
if int64(x.b) < int64(y.b) {
res.b = ^uint64(0)
}
return res
}
// LessEqual returns a mask indicating where x is less than or equal to y.
func (x Int64s) LessEqual(y Int64s) Mask64s {
var res Mask64s
if int64(x.a) <= int64(y.a) {
res.a = ^uint64(0)
}
if int64(x.b) <= int64(y.b) {
res.b = ^uint64(0)
}
return res
}
// NotEqual returns a mask indicating where x and y are not equal.
func (x Int64s) NotEqual(y Int64s) Mask64s {
var res Mask64s
if x.a != y.a {
res.a = ^uint64(0)
}
if x.b != y.b {
res.b = ^uint64(0)
}
return res
}
// Len returns the number of elements in the vector.
func (x Int64s) Len() int {
return 2
}
// Masked returns a new vector with elements from x where mask is true, and zero elsewhere.
func (x Int64s) Masked(mask Mask64s) Int64s {
return Int64s{a: x.a & mask.a, b: x.b & mask.b}
}
// IfElse returns a new vector with elements from x where mask is true, and y where mask is false.
func (x Int64s) IfElse(mask Mask64s, y Int64s) Int64s {
return Int64s{
a: (x.a & mask.a) | (y.a &^ mask.a),
b: (x.b & mask.b) | (y.b &^ mask.b),
}
}
// Neg returns the element-wise negation of x.
func (x Int64s) Neg() Int64s {
return Int64s{a: uint64(-int64(x.a)), b: uint64(-int64(x.b))}
}
// Not returns the bitwise NOT of x.
func (x Int64s) Not() Int64s {
return Int64s{a: ^x.a, b: ^x.b}
}
// Or returns the bitwise OR of x and y.
func (x Int64s) Or(y Int64s) Int64s {
return Int64s{a: x.a | y.a, b: x.b | y.b}
}
// ShiftAllLeft shifts all elements left by y bits.
func (x Int64s) ShiftAllLeft(y uint8) Int64s {
return Int64s{a: x.a << y, b: x.b << y}
}
// RotateAllLeft rotates all elements left by dist bits.
func (x Int64s) RotateAllLeft(dist uint64) Int64s {
d := dist & 63
return Int64s{
a: (x.a << d) | (x.a >> ((64 - d) & 63)),
b: (x.b << d) | (x.b >> ((64 - d) & 63)),
}
}
// RotateAllRight rotates all elements right by dist bits.
func (x Int64s) RotateAllRight(dist uint64) Int64s {
d := dist & 63
return Int64s{
a: (x.a >> d) | (x.a << ((64 - d) & 63)),
b: (x.b >> d) | (x.b << ((64 - d) & 63)),
}
}
// Store stores the vector elements into the slice s.
func (x Int64s) Store(s []int64) {
if len(s) > 0 {
s[0] = int64(x.a)
}
if len(s) > 1 {
s[1] = int64(x.b)
}
}
// StorePart stores a partial vector into the slice s.
func (x Int64s) StorePart(s []int64) int {
x.Store(s)
return min(len(s), x.Len())
}
// String returns a string representation of the vector.
func (x Int64s) String() string {
return fmt.Sprint([2]int64{int64(x.a), int64(x.b)})
}
// Sub returns the element-wise difference of x and y.
func (x Int64s) Sub(y Int64s) Int64s {
return Int64s{a: x.a - y.a, b: x.b - y.b}
}
// ToMask returns a mask representation of the vector.
func (x Int64s) ToMask() Mask64s {
var res Mask64s
if x.a != 0 {
res.a = ^uint64(0)
}
if x.b != 0 {
res.b = ^uint64(0)
}
return res
}
// Xor returns the bitwise XOR of x and y.
func (x Int64s) Xor(y Int64s) Int64s {
return Int64s{a: x.a ^ y.a, b: x.b ^ y.b}
}
// ConvertToUint64 converts the vector elements to uint64.
func (x Int64s) ConvertToUint64() Uint64s {
return Uint64s{a: x.a, b: x.b}
}
// ToBits reinterprets the vector bits as a Uint64s vector.
func (x Int64s) ToBits() Uint64s {
return Uint64s{a: x.a, b: x.b}
}
// Uint8s represents a 128-bit vector of 16 uint8 elements.
type Uint8s struct {
_ _simd
a, b uint64
}
// LoadUint8s loads a slice of uint8 into an Uint8s vector.
func LoadUint8s(s []uint8) Uint8s {
var a, b uint64
for i := 0; i < 16; i++ {
val := uint64(s[i])
if i < 8 {
a |= val << (8 * i)
} else {
b |= val << (8 * (i - 8))
}
}
return Uint8s{a: a, b: b}
}
// LoadUint8sPart loads a partial slice of uint8 into an Uint8s vector.
func LoadUint8sPart(s []uint8) (Uint8s, int) {
var a, b uint64
n := len(s)
if n > 16 {
n = 16
}
for i := 0; i < n; i++ {
val := uint64(s[i])
if i < 8 {
a |= val << (8 * i)
} else {
b |= val << (8 * (i - 8))
}
}
return Uint8s{a: a, b: b}, n
}
func (x Uint8s) get(i int) uint8 {
if i < 8 {
return uint8(x.a >> (8 * i))
}
return uint8(x.b >> (8 * (i - 8)))
}
func (x *Uint8s) set(i int, v uint8) {
val := uint64(v)
if i < 8 {
mask := uint64(0xff) << (8 * i)
x.a = (x.a &^ mask) | (val << (8 * i))
} else {
mask := uint64(0xff) << (8 * (i - 8))
x.b = (x.b &^ mask) | (val << (8 * (i - 8)))
}
}
// Add returns the element-wise sum of x and y.
func (x Uint8s) Add(y Uint8s) Uint8s {
var res Uint8s
for i := 0; i < 16; i++ {
res.set(i, x.get(i)+y.get(i))
}
return res
}
// AddSaturated returns the element-wise saturated sum of x and y.
func (x Uint8s) AddSaturated(y Uint8s) Uint8s {
var res Uint8s
for i := 0; i < 16; i++ {
sum := int(x.get(i)) + int(y.get(i))
if sum > math.MaxUint8 {
res.set(i, math.MaxUint8)
} else {
res.set(i, uint8(sum))
}
}
return res
}
// And returns the bitwise AND of x and y.
func (x Uint8s) And(y Uint8s) Uint8s {
return Uint8s{a: x.a & y.a, b: x.b & y.b}
}
// AndNot returns the bitwise AND NOT of x and y.
func (x Uint8s) AndNot(y Uint8s) Uint8s {
return Uint8s{a: x.a &^ y.a, b: x.b &^ y.b}
}
// Average returns the element-wise average of x and y.
func (x Uint8s) Average(y Uint8s) Uint8s {
var res Uint8s
for i := 0; i < 16; i++ {
res.set(i, uint8((int(x.get(i))+int(y.get(i))+1)>>1))
}
return res
}
// Equal returns a mask indicating where x and y are equal.
func (x Uint8s) Equal(y Uint8s) Mask8s {
var res Mask8s
for i := 0; i < 16; i++ {
if x.get(i) == y.get(i) {
res.set(i, true)
}
}
return res
}
// NotEqual returns a mask indicating where x and y are not equal.
func (x Uint8s) NotEqual(y Uint8s) Mask8s {
var res Mask8s
for i := 0; i < 16; i++ {
if x.get(i) != y.get(i) {
res.set(i, true)
}
}
return res
}
// Len returns the number of elements in the vector.
func (x Uint8s) Len() int {
return 16
}
// Masked returns a new vector with elements from x where mask is true, and zero elsewhere.
func (x Uint8s) Masked(mask Mask8s) Uint8s {
return Uint8s{a: x.a & mask.a, b: x.b & mask.b}
}
// Max returns the element-wise maximum of x and y.
func (x Uint8s) Max(y Uint8s) Uint8s {
var res Uint8s
for i := 0; i < 16; i++ {
vx := x.get(i)
vy := y.get(i)
if vx > vy {
res.set(i, vx)
} else {
res.set(i, vy)
}
}
return res
}
// IfElse returns a new vector with elements from x where mask is true, and y where mask is false.
func (x Uint8s) IfElse(mask Mask8s, y Uint8s) Uint8s {
return Uint8s{
a: (x.a & mask.a) | (y.a &^ mask.a),
b: (x.b & mask.b) | (y.b &^ mask.b),
}
}
// Min returns the element-wise minimum of x and y.
func (x Uint8s) Min(y Uint8s) Uint8s {
var res Uint8s
for i := 0; i < 16; i++ {
vx := x.get(i)
vy := y.get(i)
if vx < vy {
res.set(i, vx)
} else {
res.set(i, vy)
}
}
return res
}
// Mul returns the element-wise product of x and y.
func (x Uint8s) Mul(y Uint8s) Uint8s {
var res Uint8s
for i := 0; i < 16; i++ {
res.set(i, x.get(i)*y.get(i))
}
return res
}
// Not returns the bitwise NOT of x.
func (x Uint8s) Not() Uint8s {
return Uint8s{a: ^x.a, b: ^x.b}
}
// Or returns the bitwise OR of x and y.
func (x Uint8s) Or(y Uint8s) Uint8s {
return Uint8s{a: x.a | y.a, b: x.b | y.b}
}
// Store stores the vector elements into the slice s.
func (x Uint8s) Store(s []uint8) {
for i := 0; i < 16 && i < len(s); i++ {
s[i] = x.get(i)
}
}
// StorePart stores a partial vector into the slice s.
func (x Uint8s) StorePart(s []uint8) int {
x.Store(s)
return min(len(s), x.Len())
}
// String returns a string representation of the vector.
func (x Uint8s) String() string {
var parts [16]uint8
for i := 0; i < 16; i++ {
parts[i] = x.get(i)
}
return fmt.Sprint(parts)
}
// Sub returns the element-wise difference of x and y.
func (x Uint8s) Sub(y Uint8s) Uint8s {
var res Uint8s
for i := 0; i < 16; i++ {
res.set(i, x.get(i)-y.get(i))
}
return res
}
// SubSaturated returns the element-wise saturated difference of x and y.
func (x Uint8s) SubSaturated(y Uint8s) Uint8s {
var res Uint8s
for i := 0; i < 16; i++ {
vx := x.get(i)
vy := y.get(i)
if vx < vy {
res.set(i, 0)
} else {
res.set(i, vx-vy)
}
}
return res
}
// Xor returns the bitwise XOR of x and y.
func (x Uint8s) Xor(y Uint8s) Uint8s {
return Uint8s{a: x.a ^ y.a, b: x.b ^ y.b}
}
// BitsToInt8 reinterprets the vector bits as an Int8s vector.
func (x Uint8s) BitsToInt8() Int8s {
return Int8s{a: x.a, b: x.b}
}
// ConvertToInt8 converts the vector elements to int8.
func (x Uint8s) ConvertToInt8() Int8s {
return Int8s{a: x.a, b: x.b}
}
// ReshapeToUint16s reinterprets the vector bits as a Uint16s vector.
func (x Uint8s) ReshapeToUint16s() Uint16s {
return Uint16s{a: x.a, b: x.b}
}
// ReshapeToUint32s reinterprets the vector bits as a Uint32s vector.
func (x Uint8s) ReshapeToUint32s() Uint32s {
return Uint32s{a: x.a, b: x.b}
}
// ReshapeToUint64s reinterprets the vector bits as a Uint64s vector.
func (x Uint8s) ReshapeToUint64s() Uint64s {
return Uint64s{a: x.a, b: x.b}
}
// Uint16s represents a 128-bit vector of 8 uint16 elements.
type Uint16s struct {
_ _simd
a, b uint64
}
// LoadUint16s loads a slice of uint16 into an Uint16s vector.
func LoadUint16s(s []uint16) Uint16s {
var a, b uint64
for i := 0; i < 8; i++ {
val := uint64(s[i])
if i < 4 {
a |= val << (16 * i)
} else {
b |= val << (16 * (i - 4))
}
}
return Uint16s{a: a, b: b}
}
// LoadUint16sPart loads a partial slice of uint16 into an Uint16s vector.
func LoadUint16sPart(s []uint16) (Uint16s, int) {
var a, b uint64
n := len(s)
if n > 8 {
n = 8
}
for i := 0; i < n; i++ {
val := uint64(s[i])
if i < 4 {
a |= val << (16 * i)
} else {
b |= val << (16 * (i - 4))
}
}
return Uint16s{a: a, b: b}, n
}
func (x Uint16s) get(i int) uint16 {
if i < 4 {
return uint16(x.a >> (16 * i))
}
return uint16(x.b >> (16 * (i - 4)))
}
func (x *Uint16s) set(i int, v uint16) {
val := uint64(v)
if i < 4 {
mask := uint64(0xffff) << (16 * i)
x.a = (x.a &^ mask) | (val << (16 * i))
} else {
mask := uint64(0xffff) << (16 * (i - 4))
x.b = (x.b &^ mask) | (val << (16 * (i - 4)))
}
}
// Add returns the element-wise sum of x and y.
func (x Uint16s) Add(y Uint16s) Uint16s {
var res Uint16s
for i := 0; i < 8; i++ {
res.set(i, x.get(i)+y.get(i))
}
return res
}
// AddSaturated returns the element-wise saturated sum of x and y.
func (x Uint16s) AddSaturated(y Uint16s) Uint16s {
var res Uint16s
for i := 0; i < 8; i++ {
sum := int(x.get(i)) + int(y.get(i))
if sum > math.MaxUint16 {
res.set(i, math.MaxUint16)
} else {
res.set(i, uint16(sum))
}
}
return res
}
// And returns the bitwise AND of x and y.
func (x Uint16s) And(y Uint16s) Uint16s {
return Uint16s{a: x.a & y.a, b: x.b & y.b}
}
// AndNot returns the bitwise AND NOT of x and y.
func (x Uint16s) AndNot(y Uint16s) Uint16s {
return Uint16s{a: x.a &^ y.a, b: x.b &^ y.b}
}
// Average returns the element-wise average of x and y.
func (x Uint16s) Average(y Uint16s) Uint16s {
var res Uint16s
for i := 0; i < 8; i++ {
res.set(i, uint16((int(x.get(i))+int(y.get(i))+1)>>1))
}
return res
}
// Equal returns a mask indicating where x and y are equal.
func (x Uint16s) Equal(y Uint16s) Mask16s {
var res Mask16s
for i := 0; i < 8; i++ {
if x.get(i) == y.get(i) {
res.set(i, true)
}
}
return res
}
// Greater returns a mask indicating where x is greater than y.
func (x Uint16s) Greater(y Uint16s) Mask16s {
var res Mask16s
for i := 0; i < 8; i++ {
if x.get(i) > y.get(i) {
res.set(i, true)
}
}
return res
}
// GreaterEqual returns a mask indicating where x is greater than or equal to y.
func (x Uint16s) GreaterEqual(y Uint16s) Mask16s {
var res Mask16s
for i := 0; i < 8; i++ {
if x.get(i) >= y.get(i) {
res.set(i, true)
}
}
return res
}
// Less returns a mask indicating where x is less than y.
func (x Uint16s) Less(y Uint16s) Mask16s {
var res Mask16s
for i := 0; i < 8; i++ {
if x.get(i) < y.get(i) {
res.set(i, true)
}
}
return res
}
// LessEqual returns a mask indicating where x is less than or equal to y.
func (x Uint16s) LessEqual(y Uint16s) Mask16s {
var res Mask16s
for i := 0; i < 8; i++ {
if x.get(i) <= y.get(i) {
res.set(i, true)
}
}
return res
}
// NotEqual returns a mask indicating where x and y are not equal.
func (x Uint16s) NotEqual(y Uint16s) Mask16s {
var res Mask16s
for i := 0; i < 8; i++ {
if x.get(i) != y.get(i) {
res.set(i, true)
}
}
return res
}
// Len returns the number of elements in the vector.
func (x Uint16s) Len() int {
return 8
}
// Masked returns a new vector with elements from x where mask is true, and zero elsewhere.
func (x Uint16s) Masked(mask Mask16s) Uint16s {
return Uint16s{a: x.a & mask.a, b: x.b & mask.b}
}
// Max returns the element-wise maximum of x and y.
func (x Uint16s) Max(y Uint16s) Uint16s {
var res Uint16s
for i := 0; i < 8; i++ {
vx := x.get(i)
vy := y.get(i)
if vx > vy {
res.set(i, vx)
} else {
res.set(i, vy)
}
}
return res
}
// IfElse returns a new vector with elements from x where mask is true, and y where mask is false.
func (x Uint16s) IfElse(mask Mask16s, y Uint16s) Uint16s {
return Uint16s{
a: (x.a & mask.a) | (y.a &^ mask.a),
b: (x.b & mask.b) | (y.b &^ mask.b),
}
}
// Min returns the element-wise minimum of x and y.
func (x Uint16s) Min(y Uint16s) Uint16s {
var res Uint16s
for i := 0; i < 8; i++ {
vx := x.get(i)
vy := y.get(i)
if vx < vy {
res.set(i, vx)
} else {
res.set(i, vy)
}
}
return res
}
// Mul returns the element-wise product of x and y.
func (x Uint16s) Mul(y Uint16s) Uint16s {
var res Uint16s
for i := 0; i < 8; i++ {
res.set(i, x.get(i)*y.get(i))
}
return res
}
// Not returns the bitwise NOT of x.
func (x Uint16s) Not() Uint16s {
return Uint16s{a: ^x.a, b: ^x.b}
}
// Or returns the bitwise OR of x and y.
func (x Uint16s) Or(y Uint16s) Uint16s {
return Uint16s{a: x.a | y.a, b: x.b | y.b}
}
// ShiftAllLeft shifts all elements left by y bits.
func (x Uint16s) ShiftAllLeft(y uint8) Uint16s {
var res Uint16s
for i := 0; i < 8; i++ {
res.set(i, x.get(i)<<y)
}
return res
}
// ShiftAllRight shifts all elements right by y bits.
func (x Uint16s) ShiftAllRight(y uint8) Uint16s {
var res Uint16s
for i := 0; i < 8; i++ {
res.set(i, x.get(i)>>y)
}
return res
}
// RotateAllLeft rotates all elements left by dist bits.
func (x Uint16s) RotateAllLeft(dist uint64) Uint16s {
var res Uint16s
d := dist & 15
for i := 0; i < 8; i++ {
u := x.get(i)
r := (u << d) | (u >> ((16 - d) & 15))
res.set(i, r)
}
return res
}
// RotateAllRight rotates all elements right by dist bits.
func (x Uint16s) RotateAllRight(dist uint64) Uint16s {
var res Uint16s
d := dist & 15
for i := 0; i < 8; i++ {
u := x.get(i)
r := (u >> d) | (u << ((16 - d) & 15))
res.set(i, r)
}
return res
}
// Store stores the vector elements into the slice s.
func (x Uint16s) Store(s []uint16) {
for i := 0; i < 8 && i < len(s); i++ {
s[i] = x.get(i)
}
}
// StorePart stores a partial vector into the slice s.
func (x Uint16s) StorePart(s []uint16) int {
x.Store(s)
return min(len(s), x.Len())
}
// String returns a string representation of the vector.
func (x Uint16s) String() string {
var parts [8]uint16
for i := 0; i < 8; i++ {
parts[i] = x.get(i)
}
return fmt.Sprint(parts)
}
// Sub returns the element-wise difference of x and y.
func (x Uint16s) Sub(y Uint16s) Uint16s {
var res Uint16s
for i := 0; i < 8; i++ {
res.set(i, x.get(i)-y.get(i))
}
return res
}
// SubSaturated returns the element-wise saturated difference of x and y.
func (x Uint16s) SubSaturated(y Uint16s) Uint16s {
var res Uint16s
for i := 0; i < 8; i++ {
vx := x.get(i)
vy := y.get(i)
if vx < vy {
res.set(i, 0)
} else {
res.set(i, vx-vy)
}
}
return res
}
// Xor returns the bitwise XOR of x and y.
func (x Uint16s) Xor(y Uint16s) Uint16s {
return Uint16s{a: x.a ^ y.a, b: x.b ^ y.b}
}
// BitsToInt16 reinterprets the vector bits as an Int16s vector.
func (x Uint16s) BitsToInt16() Int16s {
return Int16s{a: x.a, b: x.b}
}
// ConvertToInt16 converts the vector elements to int16.
func (x Uint16s) ConvertToInt16() Int16s {
return Int16s{a: x.a, b: x.b}
}
// ReshapeToUint32s reinterprets the vector bits as a Uint32s vector.
func (x Uint16s) ReshapeToUint32s() Uint32s {
return Uint32s{a: x.a, b: x.b}
}
// ReshapeToUint64s reinterprets the vector bits as a Uint64s vector.
func (x Uint16s) ReshapeToUint64s() Uint64s {
return Uint64s{a: x.a, b: x.b}
}
// ReshapeToUint8s reinterprets the vector bits as a Uint8s vector.
func (x Uint16s) ReshapeToUint8s() Uint8s {
return Uint8s{a: x.a, b: x.b}
}
// Uint32s represents a 128-bit vector of 4 uint32 elements.
type Uint32s struct {
_ _simd
a, b uint64
}
// LoadUint32s loads a slice of uint32 into an Uint32s vector.
func LoadUint32s(s []uint32) Uint32s {
var a, b uint64
for i := 0; i < 4; i++ {
val := uint64(s[i])
if i < 2 {
a |= val << (32 * i)
} else {
b |= val << (32 * (i - 2))
}
}
return Uint32s{a: a, b: b}
}
// LoadUint32sPart loads a partial slice of uint32 into an Uint32s vector.
func LoadUint32sPart(s []uint32) (Uint32s, int) {
var a, b uint64
n := len(s)
if n > 4 {
n = 4
}
for i := 0; i < n; i++ {
val := uint64(s[i])
if i < 2 {
a |= val << (32 * i)
} else {
b |= val << (32 * (i - 2))
}
}
return Uint32s{a: a, b: b}, n
}
func (x Uint32s) get(i int) uint32 {
if i < 2 {
return uint32(x.a >> (32 * i))
}
return uint32(x.b >> (32 * (i - 2)))
}
func (x *Uint32s) set(i int, v uint32) {
val := uint64(v)
if i < 2 {
mask := uint64(0xffffffff) << (32 * i)
x.a = (x.a &^ mask) | (val << (32 * i))
} else {
mask := uint64(0xffffffff) << (32 * (i - 2))
x.b = (x.b &^ mask) | (val << (32 * (i - 2)))
}
}
// Add returns the element-wise sum of x and y.
func (x Uint32s) Add(y Uint32s) Uint32s {
var res Uint32s
for i := 0; i < 4; i++ {
res.set(i, x.get(i)+y.get(i))
}
return res
}
// And returns the bitwise AND of x and y.
func (x Uint32s) And(y Uint32s) Uint32s {
return Uint32s{a: x.a & y.a, b: x.b & y.b}
}
// AndNot returns the bitwise AND NOT of x and y.
func (x Uint32s) AndNot(y Uint32s) Uint32s {
return Uint32s{a: x.a &^ y.a, b: x.b &^ y.b}
}
// Equal returns a mask indicating where x and y are equal.
func (x Uint32s) Equal(y Uint32s) Mask32s {
var res Mask32s
for i := 0; i < 4; i++ {
if x.get(i) == y.get(i) {
res.set(i, true)
}
}
return res
}
// Greater returns a mask indicating where x is greater than y.
func (x Uint32s) Greater(y Uint32s) Mask32s {
var res Mask32s
for i := 0; i < 4; i++ {
if x.get(i) > y.get(i) {
res.set(i, true)
}
}
return res
}
// GreaterEqual returns a mask indicating where x is greater than or equal to y.
func (x Uint32s) GreaterEqual(y Uint32s) Mask32s {
var res Mask32s
for i := 0; i < 4; i++ {
if x.get(i) >= y.get(i) {
res.set(i, true)
}
}
return res
}
// Less returns a mask indicating where x is less than y.
func (x Uint32s) Less(y Uint32s) Mask32s {
var res Mask32s
for i := 0; i < 4; i++ {
if x.get(i) < y.get(i) {
res.set(i, true)
}
}
return res
}
// LessEqual returns a mask indicating where x is less than or equal to y.
func (x Uint32s) LessEqual(y Uint32s) Mask32s {
var res Mask32s
for i := 0; i < 4; i++ {
if x.get(i) <= y.get(i) {
res.set(i, true)
}
}
return res
}
// NotEqual returns a mask indicating where x and y are not equal.
func (x Uint32s) NotEqual(y Uint32s) Mask32s {
var res Mask32s
for i := 0; i < 4; i++ {
if x.get(i) != y.get(i) {
res.set(i, true)
}
}
return res
}
// Len returns the number of elements in the vector.
func (x Uint32s) Len() int {
return 4
}
// Masked returns a new vector with elements from x where mask is true, and zero elsewhere.
func (x Uint32s) Masked(mask Mask32s) Uint32s {
return Uint32s{a: x.a & mask.a, b: x.b & mask.b}
}
// Max returns the element-wise maximum of x and y.
func (x Uint32s) Max(y Uint32s) Uint32s {
var res Uint32s
for i := 0; i < 4; i++ {
vx := x.get(i)
vy := y.get(i)
if vx > vy {
res.set(i, vx)
} else {
res.set(i, vy)
}
}
return res
}
// IfElse returns a new vector with elements from x where mask is true, and y where mask is false.
func (x Uint32s) IfElse(mask Mask32s, y Uint32s) Uint32s {
return Uint32s{
a: (x.a & mask.a) | (y.a &^ mask.a),
b: (x.b & mask.b) | (y.b &^ mask.b),
}
}
// Min returns the element-wise minimum of x and y.
func (x Uint32s) Min(y Uint32s) Uint32s {
var res Uint32s
for i := 0; i < 4; i++ {
vx := x.get(i)
vy := y.get(i)
if vx < vy {
res.set(i, vx)
} else {
res.set(i, vy)
}
}
return res
}
// Mul returns the element-wise product of x and y.
func (x Uint32s) Mul(y Uint32s) Uint32s {
var res Uint32s
for i := 0; i < 4; i++ {
res.set(i, x.get(i)*y.get(i))
}
return res
}
// Not returns the bitwise NOT of x.
func (x Uint32s) Not() Uint32s {
return Uint32s{a: ^x.a, b: ^x.b}
}
// Or returns the bitwise OR of x and y.
func (x Uint32s) Or(y Uint32s) Uint32s {
return Uint32s{a: x.a | y.a, b: x.b | y.b}
}
// ShiftAllLeft shifts all elements left by y bits.
func (x Uint32s) ShiftAllLeft(y uint8) Uint32s {
var res Uint32s
for i := 0; i < 4; i++ {
res.set(i, x.get(i)<<y)
}
return res
}
// ShiftAllRight shifts all elements right by y bits.
func (x Uint32s) ShiftAllRight(y uint8) Uint32s {
var res Uint32s
for i := 0; i < 4; i++ {
res.set(i, x.get(i)>>y)
}
return res
}
// RotateAllLeft rotates all elements left by dist bits.
func (x Uint32s) RotateAllLeft(dist uint64) Uint32s {
var res Uint32s
d := dist & 31
for i := 0; i < 4; i++ {
u := x.get(i)
r := (u << d) | (u >> ((32 - d) & 31))
res.set(i, r)
}
return res
}
// RotateAllRight rotates all elements right by dist bits.
func (x Uint32s) RotateAllRight(dist uint64) Uint32s {
var res Uint32s
d := dist & 31
for i := 0; i < 4; i++ {
u := x.get(i)
r := (u >> d) | (u << ((32 - d) & 31))
res.set(i, r)
}
return res
}
// Store stores the vector elements into the slice s.
func (x Uint32s) Store(s []uint32) {
for i := 0; i < 4 && i < len(s); i++ {
s[i] = x.get(i)
}
}
// StorePart stores a partial vector into the slice s.
func (x Uint32s) StorePart(s []uint32) int {
x.Store(s)
return min(len(s), x.Len())
}
// String returns a string representation of the vector.
func (x Uint32s) String() string {
var parts [4]uint32
for i := 0; i < 4; i++ {
parts[i] = x.get(i)
}
return fmt.Sprint(parts)
}
// Sub returns the element-wise difference of x and y.
func (x Uint32s) Sub(y Uint32s) Uint32s {
var res Uint32s
for i := 0; i < 4; i++ {
res.set(i, x.get(i)-y.get(i))
}
return res
}
// Xor returns the bitwise XOR of x and y.
func (x Uint32s) Xor(y Uint32s) Uint32s {
return Uint32s{a: x.a ^ y.a, b: x.b ^ y.b}
}
// BitsToFloat32 reinterprets the vector bits as a Float32s vector.
func (x Uint32s) BitsToFloat32() Float32s {
return Float32s{a: x.a, b: x.b}
}
// BitsToInt32 reinterprets the vector bits as an Int32s vector.
func (x Uint32s) BitsToInt32() Int32s {
return Int32s{a: x.a, b: x.b}
}
// ConvertToInt32 converts the vector elements to int32.
func (x Uint32s) ConvertToInt32() Int32s {
return Int32s{a: x.a, b: x.b}
}
// ReshapeToUint16s reinterprets the vector bits as a Uint16s vector.
func (x Uint32s) ReshapeToUint16s() Uint16s {
return Uint16s{a: x.a, b: x.b}
}
// ReshapeToUint64s reinterprets the vector bits as a Uint64s vector.
func (x Uint32s) ReshapeToUint64s() Uint64s {
return Uint64s{a: x.a, b: x.b}
}
// ReshapeToUint8s reinterprets the vector bits as a Uint8s vector.
func (x Uint32s) ReshapeToUint8s() Uint8s {
return Uint8s{a: x.a, b: x.b}
}
// Uint64s represents a 128-bit vector of 2 uint64 elements.
type Uint64s struct {
_ _simd
a, b uint64
}
// LoadUint64s loads a slice of uint64 into an Uint64s vector.
func LoadUint64s(s []uint64) Uint64s {
var a, b uint64
a = s[0]
b = s[1]
return Uint64s{a: a, b: b}
}
// LoadUint64sPart loads a partial slice of uint64 into an Uint64s vector.
func LoadUint64sPart(s []uint64) (Uint64s, int) {
n := len(s)
var a, b uint64
if n > 0 {
a = s[0]
}
if n > 1 {
b = s[1]
}
return Uint64s{a: a, b: b}, n
}
func (x Uint64s) get(i int) uint64 {
if i == 0 {
return x.a
}
return x.b
}
func (x *Uint64s) set(i int, v uint64) {
if i == 0 {
x.a = v
} else {
x.b = v
}
}
// Add returns the element-wise sum of x and y.
func (x Uint64s) Add(y Uint64s) Uint64s {
return Uint64s{a: x.a + y.a, b: x.b + y.b}
}
// And returns the bitwise AND of x and y.
func (x Uint64s) And(y Uint64s) Uint64s {
return Uint64s{a: x.a & y.a, b: x.b & y.b}
}
// AndNot returns the bitwise AND NOT of x and y.
func (x Uint64s) AndNot(y Uint64s) Uint64s {
return Uint64s{a: x.a &^ y.a, b: x.b &^ y.b}
}
// Equal returns a mask indicating where x and y are equal.
func (x Uint64s) Equal(y Uint64s) Mask64s {
var res Mask64s
if x.a == y.a {
res.a = ^uint64(0)
}
if x.b == y.b {
res.b = ^uint64(0)
}
return res
}
// Greater returns a mask indicating where x is greater than y.
func (x Uint64s) Greater(y Uint64s) Mask64s {
var res Mask64s
for i := 0; i < 2; i++ {
if x.get(i) > y.get(i) {
res.set(i, true)
}
}
return res
}
// GreaterEqual returns a mask indicating where x is greater than or equal to y.
func (x Uint64s) GreaterEqual(y Uint64s) Mask64s {
var res Mask64s
for i := 0; i < 2; i++ {
if x.get(i) >= y.get(i) {
res.set(i, true)
}
}
return res
}
// Less returns a mask indicating where x is less than y.
func (x Uint64s) Less(y Uint64s) Mask64s {
var res Mask64s
for i := 0; i < 2; i++ {
if x.get(i) < y.get(i) {
res.set(i, true)
}
}
return res
}
// LessEqual returns a mask indicating where x is less than or equal to y.
func (x Uint64s) LessEqual(y Uint64s) Mask64s {
var res Mask64s
for i := 0; i < 2; i++ {
if x.get(i) <= y.get(i) {
res.set(i, true)
}
}
return res
}
// NotEqual returns a mask indicating where x and y are not equal.
func (x Uint64s) NotEqual(y Uint64s) Mask64s {
var res Mask64s
if x.a != y.a {
res.a = ^uint64(0)
}
if x.b != y.b {
res.b = ^uint64(0)
}
return res
}
// Len returns the number of elements in the vector.
func (x Uint64s) Len() int {
return 2
}
// Masked returns a new vector with elements from x where mask is true, and zero elsewhere.
func (x Uint64s) Masked(mask Mask64s) Uint64s {
return Uint64s{a: x.a & mask.a, b: x.b & mask.b}
}
// IfElse returns a new vector with elements from x where mask is true, and y where mask is false.
func (x Uint64s) IfElse(mask Mask64s, y Uint64s) Uint64s {
return Uint64s{
a: (x.a & mask.a) | (y.a &^ mask.a),
b: (x.b & mask.b) | (y.b &^ mask.b),
}
}
// Not returns the bitwise NOT of x.
func (x Uint64s) Not() Uint64s {
return Uint64s{a: ^x.a, b: ^x.b}
}
// Or returns the bitwise OR of x and y.
func (x Uint64s) Or(y Uint64s) Uint64s {
return Uint64s{a: x.a | y.a, b: x.b | y.b}
}
// ShiftAllLeft shifts all elements left by y bits.
func (x Uint64s) ShiftAllLeft(y uint8) Uint64s {
return Uint64s{a: x.a << y, b: x.b << y}
}
// ShiftAllRight shifts all elements right by y bits.
func (x Uint64s) ShiftAllRight(y uint8) Uint64s {
return Uint64s{a: x.a >> y, b: x.b >> y}
}
// RotateAllLeft rotates all elements left by dist bits.
func (x Uint64s) RotateAllLeft(dist uint64) Uint64s {
d := dist & 63
return Uint64s{
a: (x.a << d) | (x.a >> ((64 - d) & 63)),
b: (x.b << d) | (x.b >> ((64 - d) & 63)),
}
}
// RotateAllRight rotates all elements right by dist bits.
func (x Uint64s) RotateAllRight(dist uint64) Uint64s {
d := dist & 63
return Uint64s{
a: (x.a >> d) | (x.a << ((64 - d) & 63)),
b: (x.b >> d) | (x.b << ((64 - d) & 63)),
}
}
// Store stores the vector elements into the slice s.
func (x Uint64s) Store(s []uint64) {
if len(s) > 0 {
s[0] = x.a
}
if len(s) > 1 {
s[1] = x.b
}
}
// StorePart stores a partial vector into the slice s.
func (x Uint64s) StorePart(s []uint64) int {
x.Store(s)
return min(len(s), x.Len())
}
// String returns a string representation of the vector.
func (x Uint64s) String() string {
return fmt.Sprint([2]uint64{x.a, x.b})
}
// Sub returns the element-wise difference of x and y.
func (x Uint64s) Sub(y Uint64s) Uint64s {
return Uint64s{a: x.a - y.a, b: x.b - y.b}
}
// Xor returns the bitwise XOR of x and y.
func (x Uint64s) Xor(y Uint64s) Uint64s {
return Uint64s{a: x.a ^ y.a, b: x.b ^ y.b}
}
// BitsToFloat64 reinterprets the vector bits as a Float64s vector.
func (x Uint64s) BitsToFloat64() Float64s {
return Float64s{a: x.a, b: x.b}
}
// BitsToInt64 reinterprets the vector bits as an Int64s vector.
func (x Uint64s) BitsToInt64() Int64s {
return Int64s{a: x.a, b: x.b}
}
// ConvertToInt64 converts the vector elements to int64.
func (x Uint64s) ConvertToInt64() Int64s {
return Int64s{a: x.a, b: x.b}
}
// ReshapeToUint16s reinterprets the vector bits as a Uint16s vector.
func (x Uint64s) ReshapeToUint16s() Uint16s {
return Uint16s{a: x.a, b: x.b}
}
// ReshapeToUint32s reinterprets the vector bits as a Uint32s vector.
func (x Uint64s) ReshapeToUint32s() Uint32s {
return Uint32s{a: x.a, b: x.b}
}
// ReshapeToUint8s reinterprets the vector bits as a Uint8s vector.
func (x Uint64s) ReshapeToUint8s() Uint8s {
return Uint8s{a: x.a, b: x.b}
}
// Float32s represents a 128-bit vector of 4 float32 elements.
type Float32s struct {
_ _simd
a, b uint64
}
// LoadFloat32s loads a slice of float32 into an Float32s vector.
func LoadFloat32s(s []float32) Float32s {
var a, b uint64
for i := 0; i < 4; i++ {
val := uint64(math.Float32bits(s[i]))
if i < 2 {
a |= val << (32 * i)
} else {
b |= val << (32 * (i - 2))
}
}
return Float32s{a: a, b: b}
}
// LoadFloat32sPart loads a partial slice of float32 into an Float32s vector.
func LoadFloat32sPart(s []float32) (Float32s, int) {
var a, b uint64
n := len(s)
if n > 4 {
n = 4
}
for i := 0; i < n; i++ {
val := uint64(math.Float32bits(s[i]))
if i < 2 {
a |= val << (32 * i)
} else {
b |= val << (32 * (i - 2))
}
}
return Float32s{a: a, b: b}, n
}
func (x Float32s) get(i int) float32 {
if i < 2 {
return math.Float32frombits(uint32(x.a >> (32 * i)))
}
return math.Float32frombits(uint32(x.b >> (32 * (i - 2))))
}
func (x *Float32s) set(i int, v float32) {
val := uint64(math.Float32bits(v))
if i < 2 {
mask := uint64(0xffffffff) << (32 * i)
x.a = (x.a &^ mask) | (val << (32 * i))
} else {
mask := uint64(0xffffffff) << (32 * (i - 2))
x.b = (x.b &^ mask) | (val << (32 * (i - 2)))
}
}
// Abs returns the element-wise absolute value of x.
func (x Float32s) Abs() Float32s {
var res Float32s
for i := 0; i < 4; i++ {
v := x.get(i)
if v < 0 {
res.set(i, -v)
} else {
res.set(i, v)
}
}
return res
}
// Add returns the element-wise sum of x and y.
func (x Float32s) Add(y Float32s) Float32s {
var res Float32s
for i := 0; i < 4; i++ {
res.set(i, x.get(i)+y.get(i))
}
return res
}
// ConvertToInt32 converts the vector elements to int32.
func (x Float32s) ConvertToInt32() Int32s {
var res Int32s
for i := 0; i < 4; i++ {
res.set(i, int32(x.get(i)))
}
return res
}
// Div returns the element-wise quotient of x and y.
func (x Float32s) Div(y Float32s) Float32s {
var res Float32s
for i := 0; i < 4; i++ {
res.set(i, x.get(i)/y.get(i))
}
return res
}
// Equal returns a mask indicating where x and y are equal.
func (x Float32s) Equal(y Float32s) Mask32s {
var res Mask32s
for i := 0; i < 4; i++ {
if x.get(i) == y.get(i) {
res.set(i, true)
}
}
return res
}
// Greater returns a mask indicating where x is greater than y.
func (x Float32s) Greater(y Float32s) Mask32s {
var res Mask32s
for i := 0; i < 4; i++ {
if x.get(i) > y.get(i) {
res.set(i, true)
}
}
return res
}
// GreaterEqual returns a mask indicating where x is greater than or equal to y.
func (x Float32s) GreaterEqual(y Float32s) Mask32s {
var res Mask32s
for i := 0; i < 4; i++ {
if x.get(i) >= y.get(i) {
res.set(i, true)
}
}
return res
}
// Len returns the number of elements in the vector.
func (x Float32s) Len() int {
return 4
}
// Less returns a mask indicating where x is less than y.
func (x Float32s) Less(y Float32s) Mask32s {
var res Mask32s
for i := 0; i < 4; i++ {
if x.get(i) < y.get(i) {
res.set(i, true)
}
}
return res
}
// LessEqual returns a mask indicating where x is less than or equal to y.
func (x Float32s) LessEqual(y Float32s) Mask32s {
var res Mask32s
for i := 0; i < 4; i++ {
if x.get(i) <= y.get(i) {
res.set(i, true)
}
}
return res
}
// Masked returns a new vector with elements from x where mask is true, and zero elsewhere.
func (x Float32s) Masked(mask Mask32s) Float32s {
return Float32s{a: x.a & mask.a, b: x.b & mask.b}
}
// Max returns the element-wise maximum of x and y.
func (x Float32s) Max(y Float32s) Float32s {
var res Float32s
for i := 0; i < 4; i++ {
vx := x.get(i)
vy := y.get(i)
if vx > vy {
res.set(i, vx)
} else {
res.set(i, vy)
}
}
return res
}
// IfElse returns a new vector with elements from x where mask is true, and y where mask is false.
func (x Float32s) IfElse(mask Mask32s, y Float32s) Float32s {
return Float32s{
a: (x.a & mask.a) | (y.a &^ mask.a),
b: (x.b & mask.b) | (y.b &^ mask.b),
}
}
// Min returns the element-wise minimum of x and y.
func (x Float32s) Min(y Float32s) Float32s {
var res Float32s
for i := 0; i < 4; i++ {
vx := x.get(i)
vy := y.get(i)
if vx < vy {
res.set(i, vx)
} else {
res.set(i, vy)
}
}
return res
}
// Mul returns the element-wise product of x and y.
func (x Float32s) Mul(y Float32s) Float32s {
var res Float32s
for i := 0; i < 4; i++ {
res.set(i, x.get(i)*y.get(i))
}
return res
}
// MulAdd returns x * y + z element-wise.
func (x Float32s) MulAdd(y, z Float32s) Float32s {
var res Float32s
for i := 0; i < 4; i++ {
res.set(i, x.get(i)+y.get(i)*z.get(i))
}
return res
}
// Neg returns the element-wise negation of x.
func (x Float32s) Neg() Float32s {
var res Float32s
for i := 0; i < 4; i++ {
res.set(i, -(x.get(i)))
}
return res
}
// NotEqual returns a mask indicating where x and y are not equal.
func (x Float32s) NotEqual(y Float32s) Mask32s {
var res Mask32s
for i := 0; i < 4; i++ {
if x.get(i) != y.get(i) {
res.set(i, true)
}
}
return res
}
// Sqrt returns the element-wise square root of x.
func (x Float32s) Sqrt() Float32s {
var res Float32s
for i := 0; i < 4; i++ {
res.set(i, float32(math.Sqrt(float64(x.get(i)))))
}
return res
}
// Store stores the vector elements into the slice s.
func (x Float32s) Store(s []float32) {
for i := 0; i < 4 && i < len(s); i++ {
s[i] = x.get(i)
}
}
// StorePart stores a partial vector into the slice s.
func (x Float32s) StorePart(s []float32) int {
x.Store(s)
return min(len(s), x.Len())
}
// String returns a string representation of the vector.
func (x Float32s) String() string {
var parts [4]float32
for i := 0; i < 4; i++ {
parts[i] = x.get(i)
}
return fmt.Sprint(parts)
}
// Sub returns the element-wise difference of x and y.
func (x Float32s) Sub(y Float32s) Float32s {
var res Float32s
for i := 0; i < 4; i++ {
res.set(i, x.get(i)-y.get(i))
}
return res
}
// ToBits reinterprets the vector bits as a Uint32s vector.
func (x Float32s) ToBits() Uint32s {
return Uint32s{a: x.a, b: x.b}
}
// Float64s represents a 128-bit vector of 2 float64 elements.
type Float64s struct {
_ _simd
a, b uint64
}
// LoadFloat64s loads a slice of float64 into an Float64s vector.
func LoadFloat64s(s []float64) Float64s {
var a, b uint64
a = math.Float64bits(s[0])
b = math.Float64bits(s[1])
return Float64s{a: a, b: b}
}
// LoadFloat64sPart loads a partial slice of float64 into an Float64s vector.
func LoadFloat64sPart(s []float64) (Float64s, int) {
n := len(s)
var a, b uint64
if n > 0 {
a = math.Float64bits(s[0])
}
if n > 1 {
b = math.Float64bits(s[1])
}
return Float64s{a: a, b: b}, n
}
func (x Float64s) get(i int) float64 {
if i == 0 {
return math.Float64frombits(x.a)
}
return math.Float64frombits(x.b)
}
func (x *Float64s) set(i int, v float64) {
if i == 0 {
x.a = math.Float64bits(v)
} else {
x.b = math.Float64bits(v)
}
}
// Abs returns the element-wise absolute value of x.
func (x Float64s) Abs() Float64s {
var res Float64s
for i := 0; i < 4; i++ {
v := x.get(i)
if v < 0 {
res.set(i, -v)
} else {
res.set(i, v)
}
}
return res
}
// Add returns the element-wise sum of x and y.
func (x Float64s) Add(y Float64s) Float64s {
var res Float64s
res.set(0, x.get(0)+y.get(0))
res.set(1, x.get(1)+y.get(1))
return res
}
// Div returns the element-wise quotient of x and y.
func (x Float64s) Div(y Float64s) Float64s {
var res Float64s
res.set(0, x.get(0)/y.get(0))
res.set(1, x.get(1)/y.get(1))
return res
}
// Equal returns a mask indicating where x and y are equal.
func (x Float64s) Equal(y Float64s) Mask64s {
var res Mask64s
if x.get(0) == y.get(0) {
res.a = ^uint64(0)
}
if x.get(1) == y.get(1) {
res.b = ^uint64(0)
}
return res
}
// Greater returns a mask indicating where x is greater than y.
func (x Float64s) Greater(y Float64s) Mask64s {
var res Mask64s
if x.get(0) > y.get(0) {
res.a = ^uint64(0)
}
if x.get(1) > y.get(1) {
res.b = ^uint64(0)
}
return res
}
// GreaterEqual returns a mask indicating where x is greater than or equal to y.
func (x Float64s) GreaterEqual(y Float64s) Mask64s {
var res Mask64s
if x.get(0) >= y.get(0) {
res.a = ^uint64(0)
}
if x.get(1) >= y.get(1) {
res.b = ^uint64(0)
}
return res
}
// Len returns the number of elements in the vector.
func (x Float64s) Len() int {
return 2
}
// Less returns a mask indicating where x is less than y.
func (x Float64s) Less(y Float64s) Mask64s {
var res Mask64s
if x.get(0) < y.get(0) {
res.a = ^uint64(0)
}
if x.get(1) < y.get(1) {
res.b = ^uint64(0)
}
return res
}
// LessEqual returns a mask indicating where x is less than or equal to y.
func (x Float64s) LessEqual(y Float64s) Mask64s {
var res Mask64s
if x.get(0) <= y.get(0) {
res.a = ^uint64(0)
}
if x.get(1) <= y.get(1) {
res.b = ^uint64(0)
}
return res
}
// Masked returns a new vector with elements from x where mask is true, and zero elsewhere.
func (x Float64s) Masked(mask Mask64s) Float64s {
return Float64s{a: x.a & mask.a, b: x.b & mask.b}
}
// Max returns the element-wise maximum of x and y.
func (x Float64s) Max(y Float64s) Float64s {
var res Float64s
vx := x.get(0)
vy := y.get(0)
if vx > vy {
res.set(0, vx)
} else {
res.set(0, vy)
}
vx = x.get(1)
vy = y.get(1)
if vx > vy {
res.set(1, vx)
} else {
res.set(1, vy)
}
return res
}
// IfElse returns a new vector with elements from x where mask is true, and y where mask is false.
func (x Float64s) IfElse(mask Mask64s, y Float64s) Float64s {
return Float64s{
a: (x.a & mask.a) | (y.a &^ mask.a),
b: (x.b & mask.b) | (y.b &^ mask.b),
}
}
// Min returns the element-wise minimum of x and y.
func (x Float64s) Min(y Float64s) Float64s {
var res Float64s
vx := x.get(0)
vy := y.get(0)
if vx < vy {
res.set(0, vx)
} else {
res.set(0, vy)
}
vx = x.get(1)
vy = y.get(1)
if vx < vy {
res.set(1, vx)
} else {
res.set(1, vy)
}
return res
}
// Mul returns the element-wise product of x and y.
func (x Float64s) Mul(y Float64s) Float64s {
var res Float64s
res.set(0, x.get(0)*y.get(0))
res.set(1, x.get(1)*y.get(1))
return res
}
// MulAdd returns x * y + z element-wise.
func (x Float64s) MulAdd(y, z Float64s) Float64s {
var res Float64s
res.set(0, x.get(0)+y.get(0)*z.get(0))
res.set(1, x.get(1)+y.get(1)*z.get(1))
return res
}
// Neg returns the element-wise negation of x.
func (x Float64s) Neg() Float64s {
var res Float64s
for i := 0; i < 4; i++ {
res.set(i, -(x.get(i)))
}
return res
}
// NotEqual returns a mask indicating where x and y are not equal.
func (x Float64s) NotEqual(y Float64s) Mask64s {
var res Mask64s
if x.get(0) != y.get(0) {
res.a = ^uint64(0)
}
if x.get(1) != y.get(1) {
res.b = ^uint64(0)
}
return res
}
// Sqrt returns the element-wise square root of x.
func (x Float64s) Sqrt() Float64s {
var res Float64s
res.set(0, math.Sqrt(x.get(0)))
res.set(1, math.Sqrt(x.get(1)))
return res
}
// Store stores the vector elements into the slice s.
func (x Float64s) Store(s []float64) {
if len(s) > 0 {
s[0] = x.get(0)
}
if len(s) > 1 {
s[1] = x.get(1)
}
}
// StorePart stores a partial vector into the slice s.
func (x Float64s) StorePart(s []float64) int {
x.Store(s)
return min(len(s), x.Len())
}
// String returns a string representation of the vector.
func (x Float64s) String() string {
return fmt.Sprint([2]float64{x.get(0), x.get(1)})
}
// Sub returns the element-wise difference of x and y.
func (x Float64s) Sub(y Float64s) Float64s {
var res Float64s
res.set(0, x.get(0)-y.get(0))
res.set(1, x.get(1)-y.get(1))
return res
}
// ToBits reinterprets the vector bits as a Uint64s vector.
func (x Float64s) ToBits() Uint64s {
return Uint64s{a: x.a, b: x.b}
}
// Mask8s represents a 128-bit mask vector for 16 int8/uint8 elements.
type Mask8s struct {
_ _simd
a, b uint64
}
func (x *Mask8s) set(i int, v bool) {
if v {
if i < 8 {
mask := uint64(0xff) << (8 * i)
x.a |= mask
} else {
mask := uint64(0xff) << (8 * (i - 8))
x.b |= mask
}
}
}
// And returns the bitwise AND of x and y.
func (x Mask8s) And(y Mask8s) Mask8s {
return Mask8s{a: x.a & y.a, b: x.b & y.b}
}
// Or returns the bitwise OR of x and y.
func (x Mask8s) Or(y Mask8s) Mask8s {
return Mask8s{a: x.a | y.a, b: x.b | y.b}
}
// String returns a string representation of the vector.
func (x Mask8s) String() string {
return fmt.Sprintf("{a:%#x, b:%#x}", x.a, x.b)
}
// ToInt8s converts the mask to an Int8s vector.
func (x Mask8s) ToInt8s() Int8s {
return Int8s{a: x.a, b: x.b}
}
// Mask16s represents a 128-bit mask vector for 8 int16/uint16 elements.
type Mask16s struct {
_ _simd
a, b uint64
}
func (x *Mask16s) set(i int, v bool) {
if v {
if i < 4 {
mask := uint64(0xffff) << (16 * i)
x.a |= mask
} else {
mask := uint64(0xffff) << (16 * (i - 4))
x.b |= mask
}
}
}
// And returns the bitwise AND of x and y.
func (x Mask16s) And(y Mask16s) Mask16s {
return Mask16s{a: x.a & y.a, b: x.b & y.b}
}
// Or returns the bitwise OR of x and y.
func (x Mask16s) Or(y Mask16s) Mask16s {
return Mask16s{a: x.a | y.a, b: x.b | y.b}
}
// String returns a string representation of the vector.
func (x Mask16s) String() string {
return fmt.Sprintf("{a:%#x, b:%#x}", x.a, x.b)
}
// ToInt16s converts the mask to an Int16s vector.
func (x Mask16s) ToInt16s() Int16s {
return Int16s{a: x.a, b: x.b}
}
// Mask32s represents a 128-bit mask vector for 4 int32/uint32/float32 elements.
type Mask32s struct {
_ _simd
a, b uint64
}
func (x *Mask32s) set(i int, v bool) {
if v {
if i < 2 {
mask := uint64(0xffffffff) << (32 * i)
x.a |= mask
} else {
mask := uint64(0xffffffff) << (32 * (i - 2))
x.b |= mask
}
}
}
// And returns the bitwise AND of x and y.
func (x Mask32s) And(y Mask32s) Mask32s {
return Mask32s{a: x.a & y.a, b: x.b & y.b}
}
// Or returns the bitwise OR of x and y.
func (x Mask32s) Or(y Mask32s) Mask32s {
return Mask32s{a: x.a | y.a, b: x.b | y.b}
}
// String returns a string representation of the vector.
func (x Mask32s) String() string {
return fmt.Sprintf("{a:%#x, b:%#x}", x.a, x.b)
}
// ToInt32s converts the mask to an Int32s vector.
func (x Mask32s) ToInt32s() Int32s {
return Int32s{a: x.a, b: x.b}
}
// Mask64s represents a 128-bit mask vector for 2 int64/uint64/float64 elements.
type Mask64s struct {
_ _simd
a, b uint64
}
func (x *Mask64s) set(i int, v bool) {
if v {
if i == 0 {
x.a = ^uint64(0)
} else {
x.b = ^uint64(0)
}
}
}
// And returns the bitwise AND of x and y.
func (x Mask64s) And(y Mask64s) Mask64s {
return Mask64s{a: x.a & y.a, b: x.b & y.b}
}
// Or returns the bitwise OR of x and y.
func (x Mask64s) Or(y Mask64s) Mask64s {
return Mask64s{a: x.a | y.a, b: x.b | y.b}
}
// String returns a string representation of the vector.
func (x Mask64s) String() string {
return fmt.Sprintf("{a:%#x, b:%#x}", x.a, x.b)
}
// ToInt64s converts the mask to an Int64s vector.
func (x Mask64s) ToInt64s() Int64s {
return Int64s{a: x.a, b: x.b}
}
func newT(lo, hi uint64) Uint64s {
return Uint64s{a: lo, b: hi}
}
// mwl returns the 128-bit product of the lower halves of x and y
func (x Uint64s) mwl(y Uint64s) Uint64s {
hi, lo := bits.Mul64(x.a, y.a)
return Uint64s{a: lo, b: hi}
}
var (
// For mK, bits J such that J mod 5 == K are set
m0 = newT(0x1084210842108421, 0x2108421084210842)
m1 = newT(0x2108421084210842, 0x4210842108421084)
m2 = newT(0x4210842108421084, 0x8421084210842108)
m3 = newT(0x8421084210842108, 0x0842108421084210)
m4 = newT(0x0842108421084210, 0x1084210842108421)
)
func (x Uint64s) clmul(y Uint64s) Uint64s {
x0 := x.And(m0)
x1 := x.And(m1)
x2 := x.And(m2)
x3 := x.And(m3)
x4 := x.And(m4)
y0 := y.And(m0)
y1 := y.And(m1)
y2 := y.And(m2)
y3 := y.And(m3)
y4 := y.And(m4)
// sum of x, y indices == K mod 5; mask index = K
z := (x0.mwl(y0)).Xor(x1.mwl(y4)).Xor(x4.mwl(y1)).Xor(x2.mwl(y3)).Xor(x3.mwl(y2)).And(m0)
z = (x3.mwl(y3)).Xor(x2.mwl(y4)).Xor(x4.mwl(y2)).Xor(x0.mwl(y1)).Xor(x1.mwl(y0)).And(m1).Or(z)
z = (x1.mwl(y1)).Xor(x3.mwl(y4)).Xor(x4.mwl(y3)).Xor(x0.mwl(y2)).Xor(x2.mwl(y0)).And(m2).Or(z)
z = (x4.mwl(y4)).Xor(x0.mwl(y3)).Xor(x3.mwl(y0)).Xor(x1.mwl(y2)).Xor(x2.mwl(y1)).And(m3).Or(z)
z = (x2.mwl(y2)).Xor(x0.mwl(y4)).Xor(x4.mwl(y0)).Xor(x1.mwl(y3)).Xor(x3.mwl(y1)).And(m4).Or(z)
return z
}
// CarrylessMultiplyEven computes the carryless
// multiplications of selected even halves of the elements of x and y.
// The result fills the 128 bits of each even-odd pair.
//
// A carryless multiplication uses bitwise XOR instead of
// add-with-carry, for example (in base two):
//
// 11 * 11 = 11 * (10 ^ 1) = (11 * 10) ^ (11 * 1) = 110 ^ 11 = 101
//
// This also models multiplication of polynomials with coefficients
// from GF(2) -- 11 * 11 models (x+1)*(x+1) = x**2 + (1^1)x + 1 =
// x**2 + 0x + 1 = x**2 + 1 modeled by 101. (Note that "+" adds
// polynomial terms, but coefficients "add" with XOR.)
func (x Uint64s) CarrylessMultiplyEven(y Uint64s) Uint64s {
return x.clmul(y)
}
// CarrylessMultiplyOdd computes the carryless
// multiplications of selected odd halves of the elements of x and y.
// The result fills the 128 bits of each even-odd pair.
//
// A carryless multiplication uses bitwise XOR instead of
// add-with-carry, for example (in base two):
//
// 11 * 11 = 11 * (10 ^ 1) = (11 * 10) ^ (11 * 1) = 110 ^ 11 = 101
//
// This also models multiplication of polynomials with coefficients
// from GF(2) -- 11 * 11 models (x+1)*(x+1) = x**2 + (1^1)x + 1 =
// x**2 + 0x + 1 = x**2 + 1 modeled by 101. (Note that "+" adds
// polynomial terms, but coefficients "add" with XOR.)
func (x Uint64s) CarrylessMultiplyOdd(y Uint64s) Uint64s {
x.a = x.b
y.a = y.b
return x.clmul(y)
}
const (
by8 = 0x0101010101010101
by16 = 0x0001000100010001
)
// BroadcastInt8 fills the elements of a slice with its argument value.
func BroadcastInt8s(x int8) Int8s {
v := (255 & uint64(x)) * by8
return Int8s{a: v, b: v}
}
// BroadcastInt16 fills the elements of a slice with its argument value.
func BroadcastInt16s(x int16) Int16s {
v := (65535 & uint64(x)) * by16
return Int16s{a: v, b: v}
}
// BroadcastInt32 fills the elements of a slice with its argument value.
func BroadcastInt32s(x int32) Int32s {
v := uint64(x) & 0xffffffff
v = v<<32 | v
return Int32s{a: v, b: v}
}
// BroadcastInt64 fills the elements of a slice with its argument value.
func BroadcastInt64s(x int64) Int64s {
v := uint64(x)
return Int64s{a: v, b: v}
}
// BroadcastUint8 fills the elements of a slice with its argument value.
func BroadcastUint8s(x uint8) Uint8s {
v := uint64(x) * by8
return Uint8s{a: v, b: v}
}
// BroadcastUint16 fills the elements of a slice with its argument value.
func BroadcastUint16s(x uint16) Uint16s {
v := uint64(x) * by16
return Uint16s{a: v, b: v}
}
// BroadcastUint32 fills the elements of a slice with its argument value.
func BroadcastUint32s(x uint32) Uint32s {
v := uint64(x)
v = v<<32 | v
return Uint32s{a: v, b: v}
}
// BroadcastUint64 fills the elements of a slice with its argument value.
func BroadcastUint64s(x uint64) Uint64s {
return Uint64s{a: x, b: x}
}
// BroadcastFloat32 fills the elements of a slice with its argument value.
func BroadcastFloat32s(x float32) Float32s {
v := uint64(math.Float32bits(x))
v = v<<32 | v
return Float32s{a: v, b: v}
}
// BroadcastFloat64 fills the elements of a slice with its argument value.
func BroadcastFloat64s(x float64) Float64s {
v := math.Float64bits(x)
return Float64s{a: v, b: v}
}