src/simd/archsimd/ops_emulated_wasm.go - go - Git at Google

 // Copyright 2026 The Go Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.

 //go:build goexperiment.simd && wasm

 package archsimd

 var nn = [2]int64{-1 << 63, -1 << 63}
 var f0s = [16]int8{-1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0}
 var ff00s = [8]int16{-1, 0, -1, 0, -1, 0, -1, 0}
 var ffff0000s = [4]int32{-1, 0, -1, 0}

 // For unsigned comparison, the trick for converting it into
 // signed comparisonm is to notice that the unsigned range is
 // the same as the signed range plus 1 << bitwidth-1.
 // And adding or subtracting the sign bit is the same as XORing
 // it.  Thus, XOR both sign bits and then used the signed
 // comparison operations.

 // Less return a mask vector of x[i] < y[i]
 func (x Uint64x2) Less(y Uint64x2) Mask64x2 {
 	signs := LoadInt64x2Array(&nn)
 	ix := x.BitsToInt64().Xor(signs)
 	iy := y.BitsToInt64().Xor(signs)
 	return ix.Less(iy)
 }

 // LessEqual return a mask vector of x[i] <= y[i]
 func (x Uint64x2) LessEqual(y Uint64x2) Mask64x2 {
 	signs := LoadInt64x2Array(&nn)
 	ix := x.BitsToInt64().Xor(signs)
 	iy := y.BitsToInt64().Xor(signs)
 	return ix.LessEqual(iy)
 }

 // Greater return a mask vector of x[i] > y[i]
 func (x Uint64x2) Greater(y Uint64x2) Mask64x2 {
 	signs := LoadInt64x2Array(&nn)
 	ix := x.BitsToInt64().Xor(signs)
 	iy := y.BitsToInt64().Xor(signs)
 	return ix.Greater(iy)
 }

 // GreaterEqual return a mask vector of x[i] >= y[i]
 func (x Uint64x2) GreaterEqual(y Uint64x2) Mask64x2 {
 	signs := LoadInt64x2Array(&nn)
 	ix := x.BitsToInt64().Xor(signs)
 	iy := y.BitsToInt64().Xor(signs)
 	return ix.GreaterEqual(iy)
 }

 // Max returns the elementswise maximum of elements in x and y
 func (x Int64x2) Max(y Int64x2) Int64x2 {
 	mask := x.Greater(y).ToInt64x2()
 	return x.And(mask).Or(y.AndNot(mask))
 }

 // Min returns the elementswise minimum of elements in x and y
 func (x Int64x2) Min(y Int64x2) Int64x2 {
 	mask := x.Less(y).ToInt64x2()
 	return x.And(mask).Or(y.AndNot(mask))
 }

 // Max returns the elementswise maximum of elements in x and y
 func (x Uint64x2) Max(y Uint64x2) Uint64x2 {
 	mask := x.Greater(y).ToInt64x2().ToBits()
 	return x.And(mask).Or(y.AndNot(mask))
 }

 // Min returns the elementswise minimum of elements in x and y
 func (x Uint64x2) Min(y Uint64x2) Uint64x2 {
 	mask := x.Less(y).ToInt64x2().ToBits()
 	return x.And(mask).Or(y.AndNot(mask))
 }

 // Mul returns the elementswise product of elements in x and y
 func (x Int8x16) Mul(y Int8x16) Int8x16 {
 	// To obtain an 8-bit multiply, split the vectors into even and odd
 	// elements, shift odds into even position, widen elements in both
 	// vectors, multiply, discard high parts, realign the odd results
 	// and combine.
 	mask := LoadInt8x16Array(&f0s)
 	mask16 := mask.ToBits().ReshapeToUint16s()
 	xe := x.And(mask).ToBits().ReshapeToUint16s()
 	xo := x.AndNot(mask).ToBits().ReshapeToUint16s().ShiftAllRight(8)
 	ye := y.And(mask).ToBits().ReshapeToUint16s()
 	yo := y.AndNot(mask).ToBits().ReshapeToUint16s().ShiftAllRight(8)
 	pe := xe.Mul(ye).And(mask16)
 	po := xo.Mul(yo).And(mask16).ShiftAllLeft(8)
 	return pe.Or(po).ReshapeToUint8s().BitsToInt8()
 }

 // Mul returns the elementswise product of elements in x and y
 func (x Uint8x16) Mul(y Uint8x16) Uint8x16 {
 	mask := LoadInt8x16Array(&f0s).ToBits()
 	mask16 := mask.ReshapeToUint16s()
 	xe := x.And(mask).ReshapeToUint16s()
 	xo := x.AndNot(mask).ReshapeToUint16s().ShiftAllRight(8)
 	ye := y.And(mask).ReshapeToUint16s()
 	yo := y.AndNot(mask).ReshapeToUint16s().ShiftAllRight(8)
 	pe := xe.Mul(ye).And(mask16)
 	po := xo.Mul(yo).And(mask16).ShiftAllLeft(8)
 	return pe.Or(po).ReshapeToUint8s()
 }

 // OnesCount returns the number of set bits in each vector element
 func (x Int16x8) OnesCount() Int16x8 {
 	mask := LoadInt8x16Array(&f0s)
 	c := x.ToBits().ReshapeToUint8s().BitsToInt8().OnesCount()                      // per-byte counts
 	ce := c.And(mask).ToBits().ReshapeToUint16s().BitsToInt16()                     // even-element per-byte counts, as 16-bit elements
 	co := c.AndNot(mask).ToBits().ReshapeToUint16s().BitsToInt16().ShiftAllRight(8) // odd-element per-byte counts, as 16-bit elements, aligned
 	return ce.Add(co)                                                               // return their elementwise sum
 }

 // OnesCount returns the number of set bits in each vector element
 func (x Int32x4) OnesCount() Int32x4 {
 	mask := LoadInt8x16Array(&f0s)
 	c := x.ToBits().ReshapeToUint8s().BitsToInt8().OnesCount()                      // per-byte counts
 	ce := c.And(mask).ToBits().ReshapeToUint16s().BitsToInt16()                     // even-element per-byte counts, as 16-bit elements
 	co := c.AndNot(mask).ToBits().ReshapeToUint16s().BitsToInt16().ShiftAllRight(8) // odd-element per-byte counts, as 16-bit elements, aligned
 	mask16 := LoadInt16x8Array(&ff00s)
 	y := ce.Add(co) // per int16 counts, etc.
 	ye := y.And(mask16).ToBits().ReshapeToUint32s().BitsToInt32()
 	yo := y.AndNot(mask16).ToBits().ReshapeToUint32s().BitsToInt32().ShiftAllRight(16)
 	return ye.Add(yo)
 }

 // OnesCount returns the number of set bits in each vector element
 func (x Int64x2) OnesCount() Int64x2 {
 	mask := LoadInt8x16Array(&f0s)
 	c := x.ToBits().ReshapeToUint8s().BitsToInt8().OnesCount()
 	ce := c.And(mask).ToBits().ReshapeToUint16s().BitsToInt16()
 	co := c.AndNot(mask).ToBits().ReshapeToUint16s().BitsToInt16().ShiftAllRight(8)
 	mask16 := LoadInt16x8Array(&ff00s)
 	y := ce.Add(co)
 	ye := y.And(mask16).ToBits().ReshapeToUint32s().BitsToInt32()
 	yo := y.AndNot(mask16).ToBits().ReshapeToUint32s().BitsToInt32().ShiftAllRight(16)
 	mask32 := LoadInt32x4Array(&ffff0000s)
 	z := ye.Add(yo)
 	ze := z.And(mask32).ToBits().ReshapeToUint64s().BitsToInt64()
 	zo := z.AndNot(mask32).ToBits().ReshapeToUint64s().BitsToInt64().ShiftAllRight(32)
 	return ze.Add(zo)
 }

 // OnesCount returns the number of set bits in each vector element
 func (x Uint8x16) OnesCount() Uint8x16 {
 	return x.BitsToInt8().OnesCount().ToBits()
 }

 // OnesCount returns the number of set bits in each vector element
 func (x Uint16x8) OnesCount() Uint16x8 {
 	return x.BitsToInt16().OnesCount().ToBits()
 }

 // OnesCount returns the number of set bits in each vector element
 func (x Uint32x4) OnesCount() Uint32x4 {
 	return x.BitsToInt32().OnesCount().ToBits()
 }

 // OnesCount returns the number of set bits in each vector element
 func (x Uint64x2) OnesCount() Uint64x2 {
 	return x.BitsToInt64().OnesCount().ToBits()
 }

 // CarrylessMultiplyEven computes the carryless
 // multiplications of selected even halves of the elements of x and y.
 //
 // A carryless multiplication uses bitwise XOR instead of
 // add-with-carry, for example (in base two):
 //
 //	11 * 11 = 11 * (10 ^ 1) = (11 * 10) ^ (11 * 1) = 110 ^ 11 = 101
 //
 // This also models multiplication of polynomials with coefficients
 // from GF(2) -- 11 * 11 models (x+1)*(x+1) = x**2 + (1^1)x + 1 =
 // x**2 + 0x + 1 = x**2 + 1 modeled by 101.  (Note that "+" adds
 // polynomial terms, but coefficients "add" with XOR.)
 //
 // Emulated
 func (x Uint64x2) CarrylessMultiplyEven(y Uint64x2) Uint64x2 {
 	return x.carrylessMultiply(y)
 }

 // CarrylessMultiplyOdd computes the carryless
 // multiplications of selected odd halves of the elements of x and y.
 //
 // A carryless multiplication uses bitwise XOR instead of
 // add-with-carry, for example (in base two):
 //
 //	11 * 11 = 11 * (10 ^ 1) = (11 * 10) ^ (11 * 1) = 110 ^ 11 = 101
 //
 // This also models multiplication of polynomials with coefficients
 // from GF(2) -- 11 * 11 models (x+1)*(x+1) = x**2 + (1^1)x + 1 =
 // x**2 + 0x + 1 = x**2 + 1 modeled by 101.  (Note that "+" adds
 // polynomial terms, but coefficients "add" with XOR.)
 //
 // Emulated
 func (x Uint64x2) CarrylessMultiplyOdd(y Uint64x2) Uint64x2 {
 	x = x.SetElem(0, x.GetElem(1))
 	y = y.SetElem(0, x.GetElem(1))
 	return x.carrylessMultiply(y)
 }
	// Copyright 2026 The Go Authors. All rights reserved.
	// Use of this source code is governed by a BSD-style
	// license that can be found in the LICENSE file.

	//go:build goexperiment.simd && wasm

	package archsimd

	var nn = [2]int64{-1 << 63, -1 << 63}
	var f0s = [16]int8{-1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0}
	var ff00s = [8]int16{-1, 0, -1, 0, -1, 0, -1, 0}
	var ffff0000s = [4]int32{-1, 0, -1, 0}

	// For unsigned comparison, the trick for converting it into
	// signed comparisonm is to notice that the unsigned range is
	// the same as the signed range plus 1 << bitwidth-1.
	// And adding or subtracting the sign bit is the same as XORing
	// it. Thus, XOR both sign bits and then used the signed
	// comparison operations.

	// Less return a mask vector of x[i] < y[i]
	func (x Uint64x2) Less(y Uint64x2) Mask64x2 {
	signs := LoadInt64x2Array(&nn)
	ix := x.BitsToInt64().Xor(signs)
	iy := y.BitsToInt64().Xor(signs)
	return ix.Less(iy)
	}

	// LessEqual return a mask vector of x[i] <= y[i]
	func (x Uint64x2) LessEqual(y Uint64x2) Mask64x2 {
	signs := LoadInt64x2Array(&nn)
	ix := x.BitsToInt64().Xor(signs)
	iy := y.BitsToInt64().Xor(signs)
	return ix.LessEqual(iy)
	}

	// Greater return a mask vector of x[i] > y[i]
	func (x Uint64x2) Greater(y Uint64x2) Mask64x2 {
	signs := LoadInt64x2Array(&nn)
	ix := x.BitsToInt64().Xor(signs)
	iy := y.BitsToInt64().Xor(signs)
	return ix.Greater(iy)
	}

	// GreaterEqual return a mask vector of x[i] >= y[i]
	func (x Uint64x2) GreaterEqual(y Uint64x2) Mask64x2 {
	signs := LoadInt64x2Array(&nn)
	ix := x.BitsToInt64().Xor(signs)
	iy := y.BitsToInt64().Xor(signs)
	return ix.GreaterEqual(iy)
	}

	// Max returns the elementswise maximum of elements in x and y
	func (x Int64x2) Max(y Int64x2) Int64x2 {
	mask := x.Greater(y).ToInt64x2()
	return x.And(mask).Or(y.AndNot(mask))
	}

	// Min returns the elementswise minimum of elements in x and y
	func (x Int64x2) Min(y Int64x2) Int64x2 {
	mask := x.Less(y).ToInt64x2()
	return x.And(mask).Or(y.AndNot(mask))
	}

	// Max returns the elementswise maximum of elements in x and y
	func (x Uint64x2) Max(y Uint64x2) Uint64x2 {
	mask := x.Greater(y).ToInt64x2().ToBits()
	return x.And(mask).Or(y.AndNot(mask))
	}

	// Min returns the elementswise minimum of elements in x and y
	func (x Uint64x2) Min(y Uint64x2) Uint64x2 {
	mask := x.Less(y).ToInt64x2().ToBits()
	return x.And(mask).Or(y.AndNot(mask))
	}

	// Mul returns the elementswise product of elements in x and y
	func (x Int8x16) Mul(y Int8x16) Int8x16 {
	// To obtain an 8-bit multiply, split the vectors into even and odd
	// elements, shift odds into even position, widen elements in both
	// vectors, multiply, discard high parts, realign the odd results
	// and combine.
	mask := LoadInt8x16Array(&f0s)
	mask16 := mask.ToBits().ReshapeToUint16s()
	xe := x.And(mask).ToBits().ReshapeToUint16s()
	xo := x.AndNot(mask).ToBits().ReshapeToUint16s().ShiftAllRight(8)
	ye := y.And(mask).ToBits().ReshapeToUint16s()
	yo := y.AndNot(mask).ToBits().ReshapeToUint16s().ShiftAllRight(8)
	pe := xe.Mul(ye).And(mask16)
	po := xo.Mul(yo).And(mask16).ShiftAllLeft(8)
	return pe.Or(po).ReshapeToUint8s().BitsToInt8()
	}

	// Mul returns the elementswise product of elements in x and y
	func (x Uint8x16) Mul(y Uint8x16) Uint8x16 {
	mask := LoadInt8x16Array(&f0s).ToBits()
	mask16 := mask.ReshapeToUint16s()
	xe := x.And(mask).ReshapeToUint16s()
	xo := x.AndNot(mask).ReshapeToUint16s().ShiftAllRight(8)
	ye := y.And(mask).ReshapeToUint16s()
	yo := y.AndNot(mask).ReshapeToUint16s().ShiftAllRight(8)
	pe := xe.Mul(ye).And(mask16)
	po := xo.Mul(yo).And(mask16).ShiftAllLeft(8)
	return pe.Or(po).ReshapeToUint8s()
	}

	// OnesCount returns the number of set bits in each vector element
	func (x Int16x8) OnesCount() Int16x8 {
	mask := LoadInt8x16Array(&f0s)
	c := x.ToBits().ReshapeToUint8s().BitsToInt8().OnesCount() // per-byte counts
	ce := c.And(mask).ToBits().ReshapeToUint16s().BitsToInt16() // even-element per-byte counts, as 16-bit elements
	co := c.AndNot(mask).ToBits().ReshapeToUint16s().BitsToInt16().ShiftAllRight(8) // odd-element per-byte counts, as 16-bit elements, aligned
	return ce.Add(co) // return their elementwise sum
	}

	// OnesCount returns the number of set bits in each vector element
	func (x Int32x4) OnesCount() Int32x4 {
	mask := LoadInt8x16Array(&f0s)
	c := x.ToBits().ReshapeToUint8s().BitsToInt8().OnesCount() // per-byte counts
	ce := c.And(mask).ToBits().ReshapeToUint16s().BitsToInt16() // even-element per-byte counts, as 16-bit elements
	co := c.AndNot(mask).ToBits().ReshapeToUint16s().BitsToInt16().ShiftAllRight(8) // odd-element per-byte counts, as 16-bit elements, aligned
	mask16 := LoadInt16x8Array(&ff00s)
	y := ce.Add(co) // per int16 counts, etc.
	ye := y.And(mask16).ToBits().ReshapeToUint32s().BitsToInt32()
	yo := y.AndNot(mask16).ToBits().ReshapeToUint32s().BitsToInt32().ShiftAllRight(16)
	return ye.Add(yo)
	}

	// OnesCount returns the number of set bits in each vector element
	func (x Int64x2) OnesCount() Int64x2 {
	mask := LoadInt8x16Array(&f0s)
	c := x.ToBits().ReshapeToUint8s().BitsToInt8().OnesCount()
	ce := c.And(mask).ToBits().ReshapeToUint16s().BitsToInt16()
	co := c.AndNot(mask).ToBits().ReshapeToUint16s().BitsToInt16().ShiftAllRight(8)
	mask16 := LoadInt16x8Array(&ff00s)
	y := ce.Add(co)
	ye := y.And(mask16).ToBits().ReshapeToUint32s().BitsToInt32()
	yo := y.AndNot(mask16).ToBits().ReshapeToUint32s().BitsToInt32().ShiftAllRight(16)
	mask32 := LoadInt32x4Array(&ffff0000s)
	z := ye.Add(yo)
	ze := z.And(mask32).ToBits().ReshapeToUint64s().BitsToInt64()
	zo := z.AndNot(mask32).ToBits().ReshapeToUint64s().BitsToInt64().ShiftAllRight(32)
	return ze.Add(zo)
	}

	// OnesCount returns the number of set bits in each vector element
	func (x Uint8x16) OnesCount() Uint8x16 {
	return x.BitsToInt8().OnesCount().ToBits()
	}

	// OnesCount returns the number of set bits in each vector element
	func (x Uint16x8) OnesCount() Uint16x8 {
	return x.BitsToInt16().OnesCount().ToBits()
	}

	// OnesCount returns the number of set bits in each vector element
	func (x Uint32x4) OnesCount() Uint32x4 {
	return x.BitsToInt32().OnesCount().ToBits()
	}

	// OnesCount returns the number of set bits in each vector element
	func (x Uint64x2) OnesCount() Uint64x2 {
	return x.BitsToInt64().OnesCount().ToBits()
	}

	// CarrylessMultiplyEven computes the carryless
	// multiplications of selected even halves of the elements of x and y.
	//
	// A carryless multiplication uses bitwise XOR instead of
	// add-with-carry, for example (in base two):
	//
	// 11 * 11 = 11 * (10 ^ 1) = (11 * 10) ^ (11 * 1) = 110 ^ 11 = 101
	//
	// This also models multiplication of polynomials with coefficients
	// from GF(2) -- 11 * 11 models (x+1)(x+1) = x*2 + (1^1)x + 1 =
	// x2 + 0x + 1 = x2 + 1 modeled by 101. (Note that "+" adds
	// polynomial terms, but coefficients "add" with XOR.)
	//
	// Emulated
	func (x Uint64x2) CarrylessMultiplyEven(y Uint64x2) Uint64x2 {
	return x.carrylessMultiply(y)
	}

	// CarrylessMultiplyOdd computes the carryless
	// multiplications of selected odd halves of the elements of x and y.
	//
	// A carryless multiplication uses bitwise XOR instead of
	// add-with-carry, for example (in base two):
	//
	// 11 * 11 = 11 * (10 ^ 1) = (11 * 10) ^ (11 * 1) = 110 ^ 11 = 101
	//
	// This also models multiplication of polynomials with coefficients
	// from GF(2) -- 11 * 11 models (x+1)(x+1) = x*2 + (1^1)x + 1 =
	// x2 + 0x + 1 = x2 + 1 modeled by 101. (Note that "+" adds
	// polynomial terms, but coefficients "add" with XOR.)
	//
	// Emulated
	func (x Uint64x2) CarrylessMultiplyOdd(y Uint64x2) Uint64x2 {
	x = x.SetElem(0, x.GetElem(1))
	y = y.SetElem(0, x.GetElem(1))
	return x.carrylessMultiply(y)
	}