| // Copyright 2026 The Go Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style |
| // license that can be found in the LICENSE file. |
| |
| //go:build goexperiment.simd && wasm |
| |
| package archsimd |
| |
| var nn = [2]int64{-1 << 63, -1 << 63} |
| var f0s = [16]int8{-1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0} |
| var ff00s = [8]int16{-1, 0, -1, 0, -1, 0, -1, 0} |
| var ffff0000s = [4]int32{-1, 0, -1, 0} |
| |
| // For unsigned comparison, the trick for converting it into |
| // signed comparisonm is to notice that the unsigned range is |
| // the same as the signed range plus 1 << bitwidth-1. |
| // And adding or subtracting the sign bit is the same as XORing |
| // it. Thus, XOR both sign bits and then used the signed |
| // comparison operations. |
| |
| // Less return a mask vector of x[i] < y[i] |
| func (x Uint64x2) Less(y Uint64x2) Mask64x2 { |
| signs := LoadInt64x2Array(&nn) |
| ix := x.BitsToInt64().Xor(signs) |
| iy := y.BitsToInt64().Xor(signs) |
| return ix.Less(iy) |
| } |
| |
| // LessEqual return a mask vector of x[i] <= y[i] |
| func (x Uint64x2) LessEqual(y Uint64x2) Mask64x2 { |
| signs := LoadInt64x2Array(&nn) |
| ix := x.BitsToInt64().Xor(signs) |
| iy := y.BitsToInt64().Xor(signs) |
| return ix.LessEqual(iy) |
| } |
| |
| // Greater return a mask vector of x[i] > y[i] |
| func (x Uint64x2) Greater(y Uint64x2) Mask64x2 { |
| signs := LoadInt64x2Array(&nn) |
| ix := x.BitsToInt64().Xor(signs) |
| iy := y.BitsToInt64().Xor(signs) |
| return ix.Greater(iy) |
| } |
| |
| // GreaterEqual return a mask vector of x[i] >= y[i] |
| func (x Uint64x2) GreaterEqual(y Uint64x2) Mask64x2 { |
| signs := LoadInt64x2Array(&nn) |
| ix := x.BitsToInt64().Xor(signs) |
| iy := y.BitsToInt64().Xor(signs) |
| return ix.GreaterEqual(iy) |
| } |
| |
| // Max returns the elementswise maximum of elements in x and y |
| func (x Int64x2) Max(y Int64x2) Int64x2 { |
| mask := x.Greater(y).ToInt64x2() |
| return x.And(mask).Or(y.AndNot(mask)) |
| } |
| |
| // Min returns the elementswise minimum of elements in x and y |
| func (x Int64x2) Min(y Int64x2) Int64x2 { |
| mask := x.Less(y).ToInt64x2() |
| return x.And(mask).Or(y.AndNot(mask)) |
| } |
| |
| // Max returns the elementswise maximum of elements in x and y |
| func (x Uint64x2) Max(y Uint64x2) Uint64x2 { |
| mask := x.Greater(y).ToInt64x2().ToBits() |
| return x.And(mask).Or(y.AndNot(mask)) |
| } |
| |
| // Min returns the elementswise minimum of elements in x and y |
| func (x Uint64x2) Min(y Uint64x2) Uint64x2 { |
| mask := x.Less(y).ToInt64x2().ToBits() |
| return x.And(mask).Or(y.AndNot(mask)) |
| } |
| |
| // Mul returns the elementswise product of elements in x and y |
| func (x Int8x16) Mul(y Int8x16) Int8x16 { |
| // To obtain an 8-bit multiply, split the vectors into even and odd |
| // elements, shift odds into even position, widen elements in both |
| // vectors, multiply, discard high parts, realign the odd results |
| // and combine. |
| mask := LoadInt8x16Array(&f0s) |
| mask16 := mask.ToBits().ReshapeToUint16s() |
| xe := x.And(mask).ToBits().ReshapeToUint16s() |
| xo := x.AndNot(mask).ToBits().ReshapeToUint16s().ShiftAllRight(8) |
| ye := y.And(mask).ToBits().ReshapeToUint16s() |
| yo := y.AndNot(mask).ToBits().ReshapeToUint16s().ShiftAllRight(8) |
| pe := xe.Mul(ye).And(mask16) |
| po := xo.Mul(yo).And(mask16).ShiftAllLeft(8) |
| return pe.Or(po).ReshapeToUint8s().BitsToInt8() |
| } |
| |
| // Mul returns the elementswise product of elements in x and y |
| func (x Uint8x16) Mul(y Uint8x16) Uint8x16 { |
| mask := LoadInt8x16Array(&f0s).ToBits() |
| mask16 := mask.ReshapeToUint16s() |
| xe := x.And(mask).ReshapeToUint16s() |
| xo := x.AndNot(mask).ReshapeToUint16s().ShiftAllRight(8) |
| ye := y.And(mask).ReshapeToUint16s() |
| yo := y.AndNot(mask).ReshapeToUint16s().ShiftAllRight(8) |
| pe := xe.Mul(ye).And(mask16) |
| po := xo.Mul(yo).And(mask16).ShiftAllLeft(8) |
| return pe.Or(po).ReshapeToUint8s() |
| } |
| |
| // OnesCount returns the number of set bits in each vector element |
| func (x Int16x8) OnesCount() Int16x8 { |
| mask := LoadInt8x16Array(&f0s) |
| c := x.ToBits().ReshapeToUint8s().BitsToInt8().OnesCount() // per-byte counts |
| ce := c.And(mask).ToBits().ReshapeToUint16s().BitsToInt16() // even-element per-byte counts, as 16-bit elements |
| co := c.AndNot(mask).ToBits().ReshapeToUint16s().BitsToInt16().ShiftAllRight(8) // odd-element per-byte counts, as 16-bit elements, aligned |
| return ce.Add(co) // return their elementwise sum |
| } |
| |
| // OnesCount returns the number of set bits in each vector element |
| func (x Int32x4) OnesCount() Int32x4 { |
| mask := LoadInt8x16Array(&f0s) |
| c := x.ToBits().ReshapeToUint8s().BitsToInt8().OnesCount() // per-byte counts |
| ce := c.And(mask).ToBits().ReshapeToUint16s().BitsToInt16() // even-element per-byte counts, as 16-bit elements |
| co := c.AndNot(mask).ToBits().ReshapeToUint16s().BitsToInt16().ShiftAllRight(8) // odd-element per-byte counts, as 16-bit elements, aligned |
| mask16 := LoadInt16x8Array(&ff00s) |
| y := ce.Add(co) // per int16 counts, etc. |
| ye := y.And(mask16).ToBits().ReshapeToUint32s().BitsToInt32() |
| yo := y.AndNot(mask16).ToBits().ReshapeToUint32s().BitsToInt32().ShiftAllRight(16) |
| return ye.Add(yo) |
| } |
| |
| // OnesCount returns the number of set bits in each vector element |
| func (x Int64x2) OnesCount() Int64x2 { |
| mask := LoadInt8x16Array(&f0s) |
| c := x.ToBits().ReshapeToUint8s().BitsToInt8().OnesCount() |
| ce := c.And(mask).ToBits().ReshapeToUint16s().BitsToInt16() |
| co := c.AndNot(mask).ToBits().ReshapeToUint16s().BitsToInt16().ShiftAllRight(8) |
| mask16 := LoadInt16x8Array(&ff00s) |
| y := ce.Add(co) |
| ye := y.And(mask16).ToBits().ReshapeToUint32s().BitsToInt32() |
| yo := y.AndNot(mask16).ToBits().ReshapeToUint32s().BitsToInt32().ShiftAllRight(16) |
| mask32 := LoadInt32x4Array(&ffff0000s) |
| z := ye.Add(yo) |
| ze := z.And(mask32).ToBits().ReshapeToUint64s().BitsToInt64() |
| zo := z.AndNot(mask32).ToBits().ReshapeToUint64s().BitsToInt64().ShiftAllRight(32) |
| return ze.Add(zo) |
| } |
| |
| // OnesCount returns the number of set bits in each vector element |
| func (x Uint8x16) OnesCount() Uint8x16 { |
| return x.BitsToInt8().OnesCount().ToBits() |
| } |
| |
| // OnesCount returns the number of set bits in each vector element |
| func (x Uint16x8) OnesCount() Uint16x8 { |
| return x.BitsToInt16().OnesCount().ToBits() |
| } |
| |
| // OnesCount returns the number of set bits in each vector element |
| func (x Uint32x4) OnesCount() Uint32x4 { |
| return x.BitsToInt32().OnesCount().ToBits() |
| } |
| |
| // OnesCount returns the number of set bits in each vector element |
| func (x Uint64x2) OnesCount() Uint64x2 { |
| return x.BitsToInt64().OnesCount().ToBits() |
| } |
| |
| // CarrylessMultiplyEven computes the carryless |
| // multiplications of selected even halves of the elements of x and y. |
| // |
| // A carryless multiplication uses bitwise XOR instead of |
| // add-with-carry, for example (in base two): |
| // |
| // 11 * 11 = 11 * (10 ^ 1) = (11 * 10) ^ (11 * 1) = 110 ^ 11 = 101 |
| // |
| // This also models multiplication of polynomials with coefficients |
| // from GF(2) -- 11 * 11 models (x+1)*(x+1) = x**2 + (1^1)x + 1 = |
| // x**2 + 0x + 1 = x**2 + 1 modeled by 101. (Note that "+" adds |
| // polynomial terms, but coefficients "add" with XOR.) |
| // |
| // Emulated |
| func (x Uint64x2) CarrylessMultiplyEven(y Uint64x2) Uint64x2 { |
| return x.carrylessMultiply(y) |
| } |
| |
| // CarrylessMultiplyOdd computes the carryless |
| // multiplications of selected odd halves of the elements of x and y. |
| // |
| // A carryless multiplication uses bitwise XOR instead of |
| // add-with-carry, for example (in base two): |
| // |
| // 11 * 11 = 11 * (10 ^ 1) = (11 * 10) ^ (11 * 1) = 110 ^ 11 = 101 |
| // |
| // This also models multiplication of polynomials with coefficients |
| // from GF(2) -- 11 * 11 models (x+1)*(x+1) = x**2 + (1^1)x + 1 = |
| // x**2 + 0x + 1 = x**2 + 1 modeled by 101. (Note that "+" adds |
| // polynomial terms, but coefficients "add" with XOR.) |
| // |
| // Emulated |
| func (x Uint64x2) CarrylessMultiplyOdd(y Uint64x2) Uint64x2 { |
| x = x.SetElem(0, x.GetElem(1)) |
| y = y.SetElem(0, x.GetElem(1)) |
| return x.carrylessMultiply(y) |
| } |