| // Copyright 2026 The Go Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style |
| // license that can be found in the LICENSE file. |
| |
| //go:build goexperiment.simd && amd64 |
| |
| package archsimd |
| |
| // Abs returns the absolute values of the elements of x |
| // |
| // Emulated, CPU Feature AVX |
| func (x Float32x4) Abs() Float32x4 { |
| mask := BroadcastUint32x4(0x80000000) |
| return x.ToBits().AndNot(mask).BitsToFloat32() |
| } |
| |
| // Abs returns the absolute values of the elements of x |
| // |
| // Emulated, CPU Feature AVX2 |
| func (x Float32x8) Abs() Float32x8 { |
| // mask will have a 1 in the sign bit UNLESS x is NaN |
| mask := BroadcastUint32x8(0x80000000) |
| return x.ToBits().AndNot(mask).BitsToFloat32() |
| } |
| |
| // Abs returns the absolute values of the elements of x |
| // |
| // Emulated, CPU Feature AVX512 |
| func (x Float32x16) Abs() Float32x16 { |
| mask := BroadcastUint32x16(0x80000000) |
| return x.ToBits().AndNot(mask).BitsToFloat32() |
| } |
| |
| // Abs returns the absolute values of the elements of x |
| // |
| // Emulated, CPU Feature AVX |
| func (x Float64x2) Abs() Float64x2 { |
| // mask will have a 1 in the sign bit UNLESS x is NaN |
| mask := BroadcastUint64x2(0x8000000000000000) |
| return x.ToBits().AndNot(mask).BitsToFloat64() |
| } |
| |
| // Abs returns the absolute values of the elements of x |
| // |
| // Emulated, CPU Feature AVX2 |
| func (x Float64x4) Abs() Float64x4 { |
| mask := BroadcastUint64x4(0x8000000000000000) |
| return x.ToBits().AndNot(mask).BitsToFloat64() |
| } |
| |
| // Abs returns the absolute values of the elements of x |
| // |
| // Emulated, CPU Feature AVX512 |
| func (x Float64x8) Abs() Float64x8 { |
| mask := BroadcastUint64x8(0x8000000000000000) |
| return x.ToBits().AndNot(mask).BitsToFloat64() |
| } |
| |
| // Neg returns the negation of the elements of x |
| // |
| // Emulated, CPU Feature AVX |
| func (x Float32x4) Neg() Float32x4 { |
| mask := BroadcastUint32x4(0x80000000) |
| return x.ToBits().Xor(mask).BitsToFloat32() |
| } |
| |
| // Neg returns the negation of the elements of x |
| // |
| // Emulated, CPU Feature AVX2 |
| func (x Float32x8) Neg() Float32x8 { |
| // mask will have a 1 in the sign bit UNLESS x is NaN |
| mask := BroadcastUint32x8(0x80000000) |
| return x.ToBits().Xor(mask).BitsToFloat32() |
| } |
| |
| // Neg returns the negation of the elements of x |
| // |
| // Emulated, CPU Feature AVX512 |
| func (x Float32x16) Neg() Float32x16 { |
| mask := BroadcastUint32x16(0x80000000) |
| return x.ToBits().Xor(mask).BitsToFloat32() |
| } |
| |
| // Neg returns the negation of the elements of x |
| // |
| // Emulated, CPU Feature AVX |
| func (x Float64x2) Neg() Float64x2 { |
| // mask will have a 1 in the sign bit UNLESS x is NaN |
| mask := BroadcastUint64x2(0x8000000000000000) |
| return x.ToBits().Xor(mask).BitsToFloat64() |
| } |
| |
| // Neg returns the negation of the elements of x |
| // |
| // Emulated, CPU Feature AVX2 |
| func (x Float64x4) Neg() Float64x4 { |
| mask := BroadcastUint64x4(0x8000000000000000) |
| return x.ToBits().Xor(mask).BitsToFloat64() |
| } |
| |
| // Neg returns the negation of the elements of x |
| // |
| // Emulated, CPU Feature AVX512 |
| func (x Float64x8) Neg() Float64x8 { |
| mask := BroadcastUint64x8(0x8000000000000000) |
| return x.ToBits().Xor(mask).BitsToFloat64() |
| } |
| |
| var f0x16 = [16]int8{-1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0} |
| var f0x32 = [32]int8{-1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, |
| -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0} |
| var f0x64 = [64]int8{-1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, |
| -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, |
| -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, |
| -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0} |
| |
| // Mul multiplies corresponding elements of two vectors, modulo 2ⁿ. |
| // |
| // Emulated, CPU Feature: AVX |
| func (x Int8x16) Mul(y Int8x16) Int8x16 { |
| mask := LoadInt8x16Array(&f0x16) |
| mask16 := mask.ToBits().ReshapeToUint16s() |
| xe := x.And(mask).ToBits().ReshapeToUint16s() |
| xo := x.AndNot(mask).ToBits().ReshapeToUint16s().ShiftAllRight(8) |
| ye := y.And(mask).ToBits().ReshapeToUint16s() |
| yo := y.AndNot(mask).ToBits().ReshapeToUint16s().ShiftAllRight(8) |
| pe := xe.Mul(ye).And(mask16) |
| po := xo.Mul(yo).And(mask16).ShiftAllLeft(8) |
| return pe.Or(po).ReshapeToUint8s().BitsToInt8() |
| } |
| |
| // Mul multiplies corresponding elements of two vectors, modulo 2ⁿ. |
| // |
| // Emulated, CPU Feature: AVX |
| func (x Uint8x16) Mul(y Uint8x16) Uint8x16 { |
| mask := LoadInt8x16Array(&f0x16).ToBits() |
| mask16 := mask.ReshapeToUint16s() |
| xe := x.And(mask).ReshapeToUint16s() |
| xo := x.AndNot(mask).ReshapeToUint16s().ShiftAllRight(8) |
| ye := y.And(mask).ReshapeToUint16s() |
| yo := y.AndNot(mask).ReshapeToUint16s().ShiftAllRight(8) |
| pe := xe.Mul(ye).And(mask16) |
| po := xo.Mul(yo).And(mask16).ShiftAllLeft(8) |
| return pe.Or(po).ReshapeToUint8s() |
| } |
| |
| // Mul multiplies corresponding elements of two vectors, modulo 2ⁿ. |
| // |
| // Emulated, CPU Feature: AVX2 |
| func (x Int8x32) Mul(y Int8x32) Int8x32 { |
| mask := LoadInt8x32Array(&f0x32) |
| mask16 := mask.ToBits().ReshapeToUint16s() |
| xe := x.And(mask).ToBits().ReshapeToUint16s() |
| xo := x.AndNot(mask).ToBits().ReshapeToUint16s().ShiftAllRight(8) |
| ye := y.And(mask).ToBits().ReshapeToUint16s() |
| yo := y.AndNot(mask).ToBits().ReshapeToUint16s().ShiftAllRight(8) |
| pe := xe.Mul(ye).And(mask16) |
| po := xo.Mul(yo).And(mask16).ShiftAllLeft(8) |
| return pe.Or(po).ReshapeToUint8s().BitsToInt8() |
| } |
| |
| // Mul multiplies corresponding elements of two vectors, modulo 2ⁿ. |
| // |
| // Emulated, CPU Feature: AVX512 |
| func (x Int8x64) Mul(y Int8x64) Int8x64 { |
| mask := LoadInt8x64Array(&f0x64) |
| mask16 := mask.ToBits().ReshapeToUint16s() |
| xe := x.And(mask).ToBits().ReshapeToUint16s() |
| xo := x.AndNot(mask).ToBits().ReshapeToUint16s().ShiftAllRight(8) |
| ye := y.And(mask).ToBits().ReshapeToUint16s() |
| yo := y.AndNot(mask).ToBits().ReshapeToUint16s().ShiftAllRight(8) |
| pe := xe.Mul(ye).And(mask16) |
| po := xo.Mul(yo).And(mask16).ShiftAllLeft(8) |
| return pe.Or(po).ReshapeToUint8s().BitsToInt8() |
| } |
| |
| // Mul multiplies corresponding elements of two vectors, modulo 2ⁿ. |
| // |
| // Emulated, CPU Feature: AVX2 |
| func (x Uint8x32) Mul(y Uint8x32) Uint8x32 { |
| mask := LoadInt8x32Array(&f0x32).ToBits() |
| mask16 := mask.ReshapeToUint16s() |
| xe := x.And(mask).ReshapeToUint16s() |
| xo := x.AndNot(mask).ReshapeToUint16s().ShiftAllRight(8) |
| ye := y.And(mask).ReshapeToUint16s() |
| yo := y.AndNot(mask).ReshapeToUint16s().ShiftAllRight(8) |
| pe := xe.Mul(ye).And(mask16) |
| po := xo.Mul(yo).And(mask16).ShiftAllLeft(8) |
| return pe.Or(po).ReshapeToUint8s() |
| } |
| |
| // Mul multiplies corresponding elements of two vectors, modulo 2ⁿ. |
| // |
| // Emulated, CPU Feature: AVX512 |
| func (x Uint8x64) Mul(y Uint8x64) Uint8x64 { |
| mask := LoadInt8x64Array(&f0x64).ToBits() |
| mask16 := mask.ReshapeToUint16s() |
| xe := x.And(mask).ReshapeToUint16s() |
| xo := x.AndNot(mask).ReshapeToUint16s().ShiftAllRight(8) |
| ye := y.And(mask).ReshapeToUint16s() |
| yo := y.AndNot(mask).ReshapeToUint16s().ShiftAllRight(8) |
| pe := xe.Mul(ye).And(mask16) |
| po := xo.Mul(yo).And(mask16).ShiftAllLeft(8) |
| return pe.Or(po).ReshapeToUint8s() |
| } |