src/simd/archsimd/ops_emulated_amd64.go - go - Git at Google

 // Copyright 2026 The Go Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.

 //go:build goexperiment.simd && amd64

 package archsimd

 // Abs returns the absolute values of the elements of x
 //
 // Emulated, CPU Feature AVX
 func (x Float32x4) Abs() Float32x4 {
 	mask := BroadcastUint32x4(0x80000000)
 	return x.ToBits().AndNot(mask).BitsToFloat32()
 }

 // Abs returns the absolute values of the elements of x
 //
 // Emulated, CPU Feature AVX2
 func (x Float32x8) Abs() Float32x8 {
 	// mask will have a 1 in the sign bit UNLESS x is NaN
 	mask := BroadcastUint32x8(0x80000000)
 	return x.ToBits().AndNot(mask).BitsToFloat32()
 }

 // Abs returns the absolute values of the elements of x
 //
 // Emulated, CPU Feature AVX512
 func (x Float32x16) Abs() Float32x16 {
 	mask := BroadcastUint32x16(0x80000000)
 	return x.ToBits().AndNot(mask).BitsToFloat32()
 }

 // Abs returns the absolute values of the elements of x
 //
 // Emulated, CPU Feature AVX
 func (x Float64x2) Abs() Float64x2 {
 	// mask will have a 1 in the sign bit UNLESS x is NaN
 	mask := BroadcastUint64x2(0x8000000000000000)
 	return x.ToBits().AndNot(mask).BitsToFloat64()
 }

 // Abs returns the absolute values of the elements of x
 //
 // Emulated, CPU Feature AVX2
 func (x Float64x4) Abs() Float64x4 {
 	mask := BroadcastUint64x4(0x8000000000000000)
 	return x.ToBits().AndNot(mask).BitsToFloat64()
 }

 // Abs returns the absolute values of the elements of x
 //
 // Emulated, CPU Feature AVX512
 func (x Float64x8) Abs() Float64x8 {
 	mask := BroadcastUint64x8(0x8000000000000000)
 	return x.ToBits().AndNot(mask).BitsToFloat64()
 }

 // Neg returns the negation of the elements of x
 //
 // Emulated, CPU Feature AVX
 func (x Float32x4) Neg() Float32x4 {
 	mask := BroadcastUint32x4(0x80000000)
 	return x.ToBits().Xor(mask).BitsToFloat32()
 }

 // Neg returns the negation of the elements of x
 //
 // Emulated, CPU Feature AVX2
 func (x Float32x8) Neg() Float32x8 {
 	// mask will have a 1 in the sign bit UNLESS x is NaN
 	mask := BroadcastUint32x8(0x80000000)
 	return x.ToBits().Xor(mask).BitsToFloat32()
 }

 // Neg returns the negation of the elements of x
 //
 // Emulated, CPU Feature AVX512
 func (x Float32x16) Neg() Float32x16 {
 	mask := BroadcastUint32x16(0x80000000)
 	return x.ToBits().Xor(mask).BitsToFloat32()
 }

 // Neg returns the negation of the elements of x
 //
 // Emulated, CPU Feature AVX
 func (x Float64x2) Neg() Float64x2 {
 	// mask will have a 1 in the sign bit UNLESS x is NaN
 	mask := BroadcastUint64x2(0x8000000000000000)
 	return x.ToBits().Xor(mask).BitsToFloat64()
 }

 // Neg returns the negation of the elements of x
 //
 // Emulated, CPU Feature AVX2
 func (x Float64x4) Neg() Float64x4 {
 	mask := BroadcastUint64x4(0x8000000000000000)
 	return x.ToBits().Xor(mask).BitsToFloat64()
 }

 // Neg returns the negation of the elements of x
 //
 // Emulated, CPU Feature AVX512
 func (x Float64x8) Neg() Float64x8 {
 	mask := BroadcastUint64x8(0x8000000000000000)
 	return x.ToBits().Xor(mask).BitsToFloat64()
 }

 var f0x16 = [16]int8{-1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0}
 var f0x32 = [32]int8{-1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0,
 	-1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0}
 var f0x64 = [64]int8{-1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0,
 	-1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0,
 	-1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0,
 	-1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0}

 // Mul multiplies corresponding elements of two vectors, modulo 2ⁿ.
 //
 // Emulated, CPU Feature: AVX
 func (x Int8x16) Mul(y Int8x16) Int8x16 {
 	mask := LoadInt8x16Array(&f0x16)
 	mask16 := mask.ToBits().ReshapeToUint16s()
 	xe := x.And(mask).ToBits().ReshapeToUint16s()
 	xo := x.AndNot(mask).ToBits().ReshapeToUint16s().ShiftAllRight(8)
 	ye := y.And(mask).ToBits().ReshapeToUint16s()
 	yo := y.AndNot(mask).ToBits().ReshapeToUint16s().ShiftAllRight(8)
 	pe := xe.Mul(ye).And(mask16)
 	po := xo.Mul(yo).And(mask16).ShiftAllLeft(8)
 	return pe.Or(po).ReshapeToUint8s().BitsToInt8()
 }

 // Mul multiplies corresponding elements of two vectors, modulo 2ⁿ.
 //
 // Emulated, CPU Feature: AVX
 func (x Uint8x16) Mul(y Uint8x16) Uint8x16 {
 	mask := LoadInt8x16Array(&f0x16).ToBits()
 	mask16 := mask.ReshapeToUint16s()
 	xe := x.And(mask).ReshapeToUint16s()
 	xo := x.AndNot(mask).ReshapeToUint16s().ShiftAllRight(8)
 	ye := y.And(mask).ReshapeToUint16s()
 	yo := y.AndNot(mask).ReshapeToUint16s().ShiftAllRight(8)
 	pe := xe.Mul(ye).And(mask16)
 	po := xo.Mul(yo).And(mask16).ShiftAllLeft(8)
 	return pe.Or(po).ReshapeToUint8s()
 }

 // Mul multiplies corresponding elements of two vectors, modulo 2ⁿ.
 //
 // Emulated, CPU Feature: AVX2
 func (x Int8x32) Mul(y Int8x32) Int8x32 {
 	mask := LoadInt8x32Array(&f0x32)
 	mask16 := mask.ToBits().ReshapeToUint16s()
 	xe := x.And(mask).ToBits().ReshapeToUint16s()
 	xo := x.AndNot(mask).ToBits().ReshapeToUint16s().ShiftAllRight(8)
 	ye := y.And(mask).ToBits().ReshapeToUint16s()
 	yo := y.AndNot(mask).ToBits().ReshapeToUint16s().ShiftAllRight(8)
 	pe := xe.Mul(ye).And(mask16)
 	po := xo.Mul(yo).And(mask16).ShiftAllLeft(8)
 	return pe.Or(po).ReshapeToUint8s().BitsToInt8()
 }

 // Mul multiplies corresponding elements of two vectors, modulo 2ⁿ.
 //
 // Emulated, CPU Feature: AVX512
 func (x Int8x64) Mul(y Int8x64) Int8x64 {
 	mask := LoadInt8x64Array(&f0x64)
 	mask16 := mask.ToBits().ReshapeToUint16s()
 	xe := x.And(mask).ToBits().ReshapeToUint16s()
 	xo := x.AndNot(mask).ToBits().ReshapeToUint16s().ShiftAllRight(8)
 	ye := y.And(mask).ToBits().ReshapeToUint16s()
 	yo := y.AndNot(mask).ToBits().ReshapeToUint16s().ShiftAllRight(8)
 	pe := xe.Mul(ye).And(mask16)
 	po := xo.Mul(yo).And(mask16).ShiftAllLeft(8)
 	return pe.Or(po).ReshapeToUint8s().BitsToInt8()
 }

 // Mul multiplies corresponding elements of two vectors, modulo 2ⁿ.
 //
 // Emulated, CPU Feature: AVX2
 func (x Uint8x32) Mul(y Uint8x32) Uint8x32 {
 	mask := LoadInt8x32Array(&f0x32).ToBits()
 	mask16 := mask.ReshapeToUint16s()
 	xe := x.And(mask).ReshapeToUint16s()
 	xo := x.AndNot(mask).ReshapeToUint16s().ShiftAllRight(8)
 	ye := y.And(mask).ReshapeToUint16s()
 	yo := y.AndNot(mask).ReshapeToUint16s().ShiftAllRight(8)
 	pe := xe.Mul(ye).And(mask16)
 	po := xo.Mul(yo).And(mask16).ShiftAllLeft(8)
 	return pe.Or(po).ReshapeToUint8s()
 }

 // Mul multiplies corresponding elements of two vectors, modulo 2ⁿ.
 //
 // Emulated, CPU Feature: AVX512
 func (x Uint8x64) Mul(y Uint8x64) Uint8x64 {
 	mask := LoadInt8x64Array(&f0x64).ToBits()
 	mask16 := mask.ReshapeToUint16s()
 	xe := x.And(mask).ReshapeToUint16s()
 	xo := x.AndNot(mask).ReshapeToUint16s().ShiftAllRight(8)
 	ye := y.And(mask).ReshapeToUint16s()
 	yo := y.AndNot(mask).ReshapeToUint16s().ShiftAllRight(8)
 	pe := xe.Mul(ye).And(mask16)
 	po := xo.Mul(yo).And(mask16).ShiftAllLeft(8)
 	return pe.Or(po).ReshapeToUint8s()
 }
	// Copyright 2026 The Go Authors. All rights reserved.
	// Use of this source code is governed by a BSD-style
	// license that can be found in the LICENSE file.

	//go:build goexperiment.simd && amd64

	package archsimd

	// Abs returns the absolute values of the elements of x
	//
	// Emulated, CPU Feature AVX
	func (x Float32x4) Abs() Float32x4 {
	mask := BroadcastUint32x4(0x80000000)
	return x.ToBits().AndNot(mask).BitsToFloat32()
	}

	// Abs returns the absolute values of the elements of x
	//
	// Emulated, CPU Feature AVX2
	func (x Float32x8) Abs() Float32x8 {
	// mask will have a 1 in the sign bit UNLESS x is NaN
	mask := BroadcastUint32x8(0x80000000)
	return x.ToBits().AndNot(mask).BitsToFloat32()
	}

	// Abs returns the absolute values of the elements of x
	//
	// Emulated, CPU Feature AVX512
	func (x Float32x16) Abs() Float32x16 {
	mask := BroadcastUint32x16(0x80000000)
	return x.ToBits().AndNot(mask).BitsToFloat32()
	}

	// Abs returns the absolute values of the elements of x
	//
	// Emulated, CPU Feature AVX
	func (x Float64x2) Abs() Float64x2 {
	// mask will have a 1 in the sign bit UNLESS x is NaN
	mask := BroadcastUint64x2(0x8000000000000000)
	return x.ToBits().AndNot(mask).BitsToFloat64()
	}

	// Abs returns the absolute values of the elements of x
	//
	// Emulated, CPU Feature AVX2
	func (x Float64x4) Abs() Float64x4 {
	mask := BroadcastUint64x4(0x8000000000000000)
	return x.ToBits().AndNot(mask).BitsToFloat64()
	}

	// Abs returns the absolute values of the elements of x
	//
	// Emulated, CPU Feature AVX512
	func (x Float64x8) Abs() Float64x8 {
	mask := BroadcastUint64x8(0x8000000000000000)
	return x.ToBits().AndNot(mask).BitsToFloat64()
	}

	// Neg returns the negation of the elements of x
	//
	// Emulated, CPU Feature AVX
	func (x Float32x4) Neg() Float32x4 {
	mask := BroadcastUint32x4(0x80000000)
	return x.ToBits().Xor(mask).BitsToFloat32()
	}

	// Neg returns the negation of the elements of x
	//
	// Emulated, CPU Feature AVX2
	func (x Float32x8) Neg() Float32x8 {
	// mask will have a 1 in the sign bit UNLESS x is NaN
	mask := BroadcastUint32x8(0x80000000)
	return x.ToBits().Xor(mask).BitsToFloat32()
	}

	// Neg returns the negation of the elements of x
	//
	// Emulated, CPU Feature AVX512
	func (x Float32x16) Neg() Float32x16 {
	mask := BroadcastUint32x16(0x80000000)
	return x.ToBits().Xor(mask).BitsToFloat32()
	}

	// Neg returns the negation of the elements of x
	//
	// Emulated, CPU Feature AVX
	func (x Float64x2) Neg() Float64x2 {
	// mask will have a 1 in the sign bit UNLESS x is NaN
	mask := BroadcastUint64x2(0x8000000000000000)
	return x.ToBits().Xor(mask).BitsToFloat64()
	}

	// Neg returns the negation of the elements of x
	//
	// Emulated, CPU Feature AVX2
	func (x Float64x4) Neg() Float64x4 {
	mask := BroadcastUint64x4(0x8000000000000000)
	return x.ToBits().Xor(mask).BitsToFloat64()
	}

	// Neg returns the negation of the elements of x
	//
	// Emulated, CPU Feature AVX512
	func (x Float64x8) Neg() Float64x8 {
	mask := BroadcastUint64x8(0x8000000000000000)
	return x.ToBits().Xor(mask).BitsToFloat64()
	}

	var f0x16 = [16]int8{-1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0}
	var f0x32 = [32]int8{-1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0,
	-1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0}
	var f0x64 = [64]int8{-1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0,
	-1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0,
	-1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0,
	-1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0}

	// Mul multiplies corresponding elements of two vectors, modulo 2ⁿ.
	//
	// Emulated, CPU Feature: AVX
	func (x Int8x16) Mul(y Int8x16) Int8x16 {
	mask := LoadInt8x16Array(&f0x16)
	mask16 := mask.ToBits().ReshapeToUint16s()
	xe := x.And(mask).ToBits().ReshapeToUint16s()
	xo := x.AndNot(mask).ToBits().ReshapeToUint16s().ShiftAllRight(8)
	ye := y.And(mask).ToBits().ReshapeToUint16s()
	yo := y.AndNot(mask).ToBits().ReshapeToUint16s().ShiftAllRight(8)
	pe := xe.Mul(ye).And(mask16)
	po := xo.Mul(yo).And(mask16).ShiftAllLeft(8)
	return pe.Or(po).ReshapeToUint8s().BitsToInt8()
	}

	// Mul multiplies corresponding elements of two vectors, modulo 2ⁿ.
	//
	// Emulated, CPU Feature: AVX
	func (x Uint8x16) Mul(y Uint8x16) Uint8x16 {
	mask := LoadInt8x16Array(&f0x16).ToBits()
	mask16 := mask.ReshapeToUint16s()
	xe := x.And(mask).ReshapeToUint16s()
	xo := x.AndNot(mask).ReshapeToUint16s().ShiftAllRight(8)
	ye := y.And(mask).ReshapeToUint16s()
	yo := y.AndNot(mask).ReshapeToUint16s().ShiftAllRight(8)
	pe := xe.Mul(ye).And(mask16)
	po := xo.Mul(yo).And(mask16).ShiftAllLeft(8)
	return pe.Or(po).ReshapeToUint8s()
	}

	// Mul multiplies corresponding elements of two vectors, modulo 2ⁿ.
	//
	// Emulated, CPU Feature: AVX2
	func (x Int8x32) Mul(y Int8x32) Int8x32 {
	mask := LoadInt8x32Array(&f0x32)
	mask16 := mask.ToBits().ReshapeToUint16s()
	xe := x.And(mask).ToBits().ReshapeToUint16s()
	xo := x.AndNot(mask).ToBits().ReshapeToUint16s().ShiftAllRight(8)
	ye := y.And(mask).ToBits().ReshapeToUint16s()
	yo := y.AndNot(mask).ToBits().ReshapeToUint16s().ShiftAllRight(8)
	pe := xe.Mul(ye).And(mask16)
	po := xo.Mul(yo).And(mask16).ShiftAllLeft(8)
	return pe.Or(po).ReshapeToUint8s().BitsToInt8()
	}

	// Mul multiplies corresponding elements of two vectors, modulo 2ⁿ.
	//
	// Emulated, CPU Feature: AVX512
	func (x Int8x64) Mul(y Int8x64) Int8x64 {
	mask := LoadInt8x64Array(&f0x64)
	mask16 := mask.ToBits().ReshapeToUint16s()
	xe := x.And(mask).ToBits().ReshapeToUint16s()
	xo := x.AndNot(mask).ToBits().ReshapeToUint16s().ShiftAllRight(8)
	ye := y.And(mask).ToBits().ReshapeToUint16s()
	yo := y.AndNot(mask).ToBits().ReshapeToUint16s().ShiftAllRight(8)
	pe := xe.Mul(ye).And(mask16)
	po := xo.Mul(yo).And(mask16).ShiftAllLeft(8)
	return pe.Or(po).ReshapeToUint8s().BitsToInt8()
	}

	// Mul multiplies corresponding elements of two vectors, modulo 2ⁿ.
	//
	// Emulated, CPU Feature: AVX2
	func (x Uint8x32) Mul(y Uint8x32) Uint8x32 {
	mask := LoadInt8x32Array(&f0x32).ToBits()
	mask16 := mask.ReshapeToUint16s()
	xe := x.And(mask).ReshapeToUint16s()
	xo := x.AndNot(mask).ReshapeToUint16s().ShiftAllRight(8)
	ye := y.And(mask).ReshapeToUint16s()
	yo := y.AndNot(mask).ReshapeToUint16s().ShiftAllRight(8)
	pe := xe.Mul(ye).And(mask16)
	po := xo.Mul(yo).And(mask16).ShiftAllLeft(8)
	return pe.Or(po).ReshapeToUint8s()
	}

	// Mul multiplies corresponding elements of two vectors, modulo 2ⁿ.
	//
	// Emulated, CPU Feature: AVX512
	func (x Uint8x64) Mul(y Uint8x64) Uint8x64 {
	mask := LoadInt8x64Array(&f0x64).ToBits()
	mask16 := mask.ReshapeToUint16s()
	xe := x.And(mask).ReshapeToUint16s()
	xo := x.AndNot(mask).ReshapeToUint16s().ShiftAllRight(8)
	ye := y.And(mask).ReshapeToUint16s()
	yo := y.AndNot(mask).ReshapeToUint16s().ShiftAllRight(8)
	pe := xe.Mul(ye).And(mask16)
	po := xo.Mul(yo).And(mask16).ShiftAllLeft(8)
	return pe.Or(po).ReshapeToUint8s()
	}