| // Copyright 2025 The Go Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style |
| // license that can be found in the LICENSE file. |
| |
| //go:build goexperiment.simd && amd64 |
| |
| package simd_test |
| |
| import ( |
| "reflect" |
| "simd/archsimd" |
| "slices" |
| "testing" |
| ) |
| |
| var sink any |
| |
| func TestType(t *testing.T) { |
| // Testing: |
| // - Defined as another struct's field is ok |
| // - Pointer is ok |
| // - Type defition is ok |
| // - Type alias is ok |
| // - Type conversion is ok |
| // - Conversion to interface is ok |
| type alias = archsimd.Int32x4 |
| type maskT archsimd.Mask32x4 |
| type myStruct struct { |
| x alias |
| y *archsimd.Int32x4 |
| z maskT |
| } |
| vals := [4]int32{1, 2, 3, 4} |
| v := myStruct{x: archsimd.LoadInt32x4(&vals)} |
| // masking elements 1 and 2. |
| want := []int32{2, 4, 0, 0} |
| y := archsimd.LoadInt32x4(&vals) |
| v.y = &y |
| sink = y |
| |
| if !archsimd.X86.AVX512GFNI() { |
| t.Skip("Test requires X86.AVX512, not available on this hardware") |
| return |
| } |
| v.z = maskT(archsimd.Mask32x4FromBits(0b0011)) |
| *v.y = v.y.Add(v.x).Masked(archsimd.Mask32x4(v.z)) |
| |
| got := [4]int32{} |
| v.y.Store(&got) |
| checkSlices(t, got[:], want) |
| } |
| |
| func TestUncomparable(t *testing.T) { |
| // Test that simd vectors are not comparable |
| var x, y any = archsimd.LoadUint32x4(&[4]uint32{1, 2, 3, 4}), archsimd.LoadUint32x4(&[4]uint32{5, 6, 7, 8}) |
| shouldPanic := func(fn func()) { |
| defer func() { |
| if recover() == nil { |
| panic("did not panic") |
| } |
| }() |
| fn() |
| } |
| shouldPanic(func() { _ = x == y }) |
| } |
| |
| func TestFuncValue(t *testing.T) { |
| // Test that simd intrinsic can be used as a function value. |
| xv := [4]int32{1, 2, 3, 4} |
| yv := [4]int32{5, 6, 7, 8} |
| want := []int32{6, 8, 10, 12} |
| x := archsimd.LoadInt32x4(&xv) |
| y := archsimd.LoadInt32x4(&yv) |
| fn := archsimd.Int32x4.Add |
| sink = fn |
| x = fn(x, y) |
| got := [4]int32{} |
| x.Store(&got) |
| checkSlices(t, got[:], want) |
| } |
| |
| func TestReflectMethod(t *testing.T) { |
| // Test that simd intrinsic can be accessed via reflection. |
| // NOTE: we don't yet support reflect method.Call. |
| xv := [4]int32{1, 2, 3, 4} |
| yv := [4]int32{5, 6, 7, 8} |
| want := []int32{6, 8, 10, 12} |
| x := archsimd.LoadInt32x4(&xv) |
| y := archsimd.LoadInt32x4(&yv) |
| m, ok := reflect.TypeOf(x).MethodByName("Add") |
| if !ok { |
| t.Fatal("Add method not found") |
| } |
| fn := m.Func.Interface().(func(x, y archsimd.Int32x4) archsimd.Int32x4) |
| x = fn(x, y) |
| got := [4]int32{} |
| x.Store(&got) |
| checkSlices(t, got[:], want) |
| } |
| |
| func TestVectorConversion(t *testing.T) { |
| if !archsimd.X86.AVX512GFNI() { |
| t.Skip("Test requires X86.AVX512, not available on this hardware") |
| return |
| } |
| xv := [4]int32{1, 2, 3, 4} |
| x := archsimd.LoadInt32x4(&xv) |
| xPromoted := x.AsInt64x2() |
| xPromotedDemoted := xPromoted.AsInt32x4() |
| got := [4]int32{} |
| xPromotedDemoted.Store(&got) |
| for i := range 4 { |
| if xv[i] != got[i] { |
| t.Errorf("Result at %d incorrect: want %d, got %d", i, xv[i], got[i]) |
| } |
| } |
| } |
| |
| func TestMaskConversion(t *testing.T) { |
| if !archsimd.X86.AVX512GFNI() { |
| t.Skip("Test requires X86.AVX512, not available on this hardware") |
| return |
| } |
| x := archsimd.LoadInt32x4Slice([]int32{5, 0, 7, 0}) |
| mask := archsimd.Int32x4{}.Sub(x).ToMask() |
| y := archsimd.LoadInt32x4Slice([]int32{1, 2, 3, 4}).Add(x).Masked(mask) |
| want := [4]int32{6, 0, 10, 0} |
| got := make([]int32, 4) |
| y.StoreSlice(got) |
| checkSlices(t, got[:], want[:]) |
| } |
| |
| func TestPermute(t *testing.T) { |
| if !archsimd.X86.AVX512() { |
| t.Skip("Test requires X86.AVX512, not available on this hardware") |
| return |
| } |
| x := []int64{1, 2, 3, 4, 5, 6, 7, 8} |
| indices := []uint64{7, 6, 5, 4, 3, 2, 1, 0} |
| want := []int64{8, 7, 6, 5, 4, 3, 2, 1} |
| got := make([]int64, 8) |
| archsimd.LoadInt64x8Slice(x).Permute(archsimd.LoadUint64x8Slice(indices)).StoreSlice(got) |
| checkSlices(t, got, want) |
| } |
| |
| func TestPermuteOrZero(t *testing.T) { |
| x := []uint8{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16} |
| indices := []int8{7, 6, 5, 4, 3, 2, 1, 0, -1, 8, -1, 9, -1, 10, -1, 11} |
| want := []uint8{8, 7, 6, 5, 4, 3, 2, 1, 0, 9, 0, 10, 0, 11, 0, 12} |
| got := make([]uint8, len(x)) |
| archsimd.LoadUint8x16Slice(x).PermuteOrZero(archsimd.LoadInt8x16Slice(indices)).StoreSlice(got) |
| checkSlices(t, got, want) |
| } |
| |
| func TestConcatPermute(t *testing.T) { |
| if !archsimd.X86.AVX512() { |
| t.Skip("Test requires X86.AVX512, not available on this hardware") |
| return |
| } |
| x := []int64{1, 2, 3, 4, 5, 6, 7, 8} |
| y := []int64{-1, -2, -3, -4, -5, -6, -7, -8} |
| indices := []uint64{7 + 8, 6, 5 + 8, 4, 3 + 8, 2, 1 + 8, 0} |
| want := []int64{-8, 7, -6, 5, -4, 3, -2, 1} |
| got := make([]int64, 8) |
| archsimd.LoadInt64x8Slice(x).ConcatPermute(archsimd.LoadInt64x8Slice(y), archsimd.LoadUint64x8Slice(indices)).StoreSlice(got) |
| checkSlices(t, got, want) |
| } |
| |
| func TestCompress(t *testing.T) { |
| if !archsimd.X86.AVX512() { |
| t.Skip("Test requires X86.AVX512, not available on this hardware") |
| return |
| } |
| v1234 := archsimd.LoadInt32x4Slice([]int32{1, 2, 3, 4}) |
| v2400 := v1234.Compress(archsimd.Mask32x4FromBits(0b1010)) |
| got := make([]int32, 4) |
| v2400.StoreSlice(got) |
| want := []int32{2, 4, 0, 0} |
| if !slices.Equal(got, want) { |
| t.Errorf("want and got differ, want=%v, got=%v", want, got) |
| } |
| } |
| |
| func TestExpand(t *testing.T) { |
| if !archsimd.X86.AVX512() { |
| t.Skip("Test requires X86.AVX512, not available on this hardware") |
| return |
| } |
| v3400 := archsimd.LoadInt32x4Slice([]int32{3, 4, 0, 0}) |
| v2400 := v3400.Expand(archsimd.Mask32x4FromBits(0b1010)) |
| got := make([]int32, 4) |
| v2400.StoreSlice(got) |
| want := []int32{0, 3, 0, 4} |
| if !slices.Equal(got, want) { |
| t.Errorf("want and got differ, want=%v, got=%v", want, got) |
| } |
| } |
| |
| var testShiftAllVal uint64 = 3 |
| |
| func TestShiftAll(t *testing.T) { |
| got := make([]int32, 4) |
| archsimd.LoadInt32x4Slice([]int32{0b11, 0b11, 0b11, 0b11}).ShiftAllLeft(2).StoreSlice(got) |
| for _, v := range got { |
| if v != 0b1100 { |
| t.Errorf("expect 0b1100, got %b", v) |
| } |
| } |
| archsimd.LoadInt32x4Slice([]int32{0b11, 0b11, 0b11, 0b11}).ShiftAllLeft(testShiftAllVal).StoreSlice(got) |
| for _, v := range got { |
| if v != 0b11000 { |
| t.Errorf("expect 0b11000, got %b", v) |
| } |
| } |
| } |
| |
| func TestSlicesInt8(t *testing.T) { |
| a := []int8{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, |
| 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32} |
| v := archsimd.LoadInt8x32Slice(a) |
| b := make([]int8, 32, 32) |
| v.StoreSlice(b) |
| checkSlices(t, a, b) |
| } |
| |
| func TestSlicesInt8SetElem(t *testing.T) { |
| a := []int8{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, |
| 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32} |
| v := archsimd.LoadInt8x16Slice(a) |
| |
| v = v.SetElem(3, 13) |
| a[3] = 13 |
| |
| b := make([]int8, 16, 16) |
| v.StoreSlice(b) |
| checkSlices(t, a, b) |
| } |
| |
| func TestSlicesInt8GetElem(t *testing.T) { |
| a := []int8{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, |
| 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32} |
| v := archsimd.LoadInt8x16Slice(a) |
| e := v.GetElem(2) |
| if e != a[2] { |
| t.Errorf("GetElem(2) = %d != a[2] = %d", e, a[2]) |
| } |
| |
| } |
| |
| func TestSlicesInt8TooShortLoad(t *testing.T) { |
| defer func() { |
| if r := recover(); r != nil { |
| t.Logf("Saw EXPECTED panic %v", r) |
| } else { |
| t.Errorf("Did not see expected panic") |
| } |
| }() |
| a := []int8{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, |
| 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31} // TOO SHORT, should panic |
| v := archsimd.LoadInt8x32Slice(a) |
| b := make([]int8, 32, 32) |
| v.StoreSlice(b) |
| checkSlices(t, a, b) |
| } |
| |
| func TestSlicesInt8TooShortStore(t *testing.T) { |
| defer func() { |
| if r := recover(); r != nil { |
| t.Logf("Saw EXPECTED panic %v", r) |
| } else { |
| t.Errorf("Did not see expected panic") |
| } |
| }() |
| a := []int8{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, |
| 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32} |
| v := archsimd.LoadInt8x32Slice(a) |
| b := make([]int8, 31) // TOO SHORT, should panic |
| v.StoreSlice(b) |
| checkSlices(t, a, b) |
| } |
| |
| func TestSlicesFloat64(t *testing.T) { |
| a := []float64{1, 2, 3, 4, 5, 6, 7, 8} // too long, should be fine |
| v := archsimd.LoadFloat64x4Slice(a) |
| b := make([]float64, 4, 4) |
| v.StoreSlice(b) |
| for i := range b { |
| if a[i] != b[i] { |
| t.Errorf("a and b differ at index %d, a=%f, b=%f", i, a[i], b[i]) |
| } |
| } |
| } |
| |
| // TODO: try to reduce this test to be smaller. |
| func TestMergeLocals(t *testing.T) { |
| testMergeLocalswrapper(t, archsimd.Int64x4.Add) |
| } |
| |
| //go:noinline |
| func forceSpill() {} |
| |
| func testMergeLocalswrapper(t *testing.T, op func(archsimd.Int64x4, archsimd.Int64x4) archsimd.Int64x4) { |
| t.Helper() |
| s0 := []int64{0, 1, 2, 3} |
| s1 := []int64{-1, 0, -1, 0} |
| want := []int64{-1, 1, 1, 3} |
| v := archsimd.LoadInt64x4Slice(s0) |
| m := archsimd.LoadInt64x4Slice(s1) |
| forceSpill() |
| got := make([]int64, 4) |
| gotv := op(v, m) |
| gotv.StoreSlice(got) |
| for i := range len(want) { |
| if !(got[i] == want[i]) { |
| t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) |
| } |
| } |
| } |
| |
| func TestBitMaskFromBits(t *testing.T) { |
| if !archsimd.X86.AVX512() { |
| t.Skip("Test requires X86.AVX512, not available on this hardware") |
| return |
| } |
| results := [2]int64{} |
| want := [2]int64{0, 6} |
| m := archsimd.Mask64x2FromBits(0b10) |
| archsimd.LoadInt64x2Slice([]int64{1, 2}).Add(archsimd.LoadInt64x2Slice([]int64{3, 4})).Masked(m).Store(&results) |
| for i := range 2 { |
| if results[i] != want[i] { |
| t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], results[i]) |
| } |
| } |
| } |
| |
| var maskForTestBitMaskFromBitsLoad = uint8(0b10) |
| |
| func TestBitMaskFromBitsLoad(t *testing.T) { |
| if !archsimd.X86.AVX512() { |
| t.Skip("Test requires X86.AVX512, not available on this hardware") |
| return |
| } |
| results := [2]int64{} |
| want := [2]int64{0, 6} |
| m := archsimd.Mask64x2FromBits(maskForTestBitMaskFromBitsLoad) |
| archsimd.LoadInt64x2Slice([]int64{1, 2}).Add(archsimd.LoadInt64x2Slice([]int64{3, 4})).Masked(m).Store(&results) |
| for i := range 2 { |
| if results[i] != want[i] { |
| t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], results[i]) |
| } |
| } |
| } |
| |
| func TestBitMaskToBits(t *testing.T) { |
| if !archsimd.X86.AVX512() { |
| t.Skip("Test requires X86.AVX512, not available on this hardware") |
| return |
| } |
| if v := archsimd.LoadInt16x8Slice([]int16{1, 0, 1, 0, 0, 0, 0, 0}).ToMask().ToBits(); v != 0b101 { |
| t.Errorf("Want 0b101, got %b", v) |
| } |
| } |
| |
| var maskForTestBitMaskFromBitsStore uint8 |
| |
| func TestBitMaskToBitsStore(t *testing.T) { |
| if !archsimd.X86.AVX512() { |
| t.Skip("Test requires X86.AVX512, not available on this hardware") |
| return |
| } |
| maskForTestBitMaskFromBitsStore = archsimd.LoadInt16x8Slice([]int16{1, 0, 1, 0, 0, 0, 0, 0}).ToMask().ToBits() |
| if maskForTestBitMaskFromBitsStore != 0b101 { |
| t.Errorf("Want 0b101, got %b", maskForTestBitMaskFromBitsStore) |
| } |
| } |
| |
| func TestMergeFloat(t *testing.T) { |
| k := make([]int64, 4, 4) |
| s := make([]float64, 4, 4) |
| |
| a := archsimd.LoadFloat64x4Slice([]float64{1, 2, 3, 4}) |
| b := archsimd.LoadFloat64x4Slice([]float64{4, 2, 3, 1}) |
| g := a.Greater(b) |
| g.ToInt64x4().StoreSlice(k) |
| c := a.Merge(b, g) |
| |
| c.StoreSlice(s) |
| |
| checkSlices[int64](t, k, []int64{0, 0, 0, -1}) |
| checkSlices[float64](t, s, []float64{4, 2, 3, 4}) |
| } |
| |
| func TestMergeFloat512(t *testing.T) { |
| if !archsimd.X86.AVX512() { |
| t.Skip("Test requires X86.AVX512, not available on this hardware") |
| return |
| } |
| |
| k := make([]int64, 8, 8) |
| s := make([]float64, 8, 8) |
| |
| a := archsimd.LoadFloat64x8Slice([]float64{1, 2, 3, 4, 5, 6, 7, 8}) |
| b := archsimd.LoadFloat64x8Slice([]float64{8, 7, 6, 5, 4, 2, 3, 1}) |
| g := a.Greater(b) |
| g.ToInt64x8().StoreSlice(k) |
| c := a.Merge(b, g) |
| d := a.Masked(g) |
| |
| checkSlices[int64](t, k, []int64{0, 0, 0, 0, -1, -1, -1, -1}) |
| |
| c.StoreSlice(s) |
| checkSlices[float64](t, s, []float64{8, 7, 6, 5, 5, 6, 7, 8}) |
| |
| d.StoreSlice(s) |
| checkSlices[float64](t, s, []float64{0, 0, 0, 0, 5, 6, 7, 8}) |
| } |
| |
| var ro uint8 = 2 |
| |
| func TestRotateAllVariable(t *testing.T) { |
| if !archsimd.X86.AVX512() { |
| t.Skip("Test requires X86.AVX512, not available on this hardware") |
| return |
| } |
| got := make([]int32, 4) |
| archsimd.LoadInt32x4Slice([]int32{0b11, 0b11, 0b11, 0b11}).RotateAllLeft(ro).StoreSlice(got) |
| for _, v := range got { |
| if v != 0b1100 { |
| t.Errorf("Want 0b1100, got %b", v) |
| } |
| } |
| } |
| |
| func TestBroadcastUint32x4(t *testing.T) { |
| s := make([]uint32, 4, 4) |
| archsimd.BroadcastUint32x4(123456789).StoreSlice(s) |
| checkSlices(t, s, []uint32{123456789, 123456789, 123456789, 123456789}) |
| } |
| |
| func TestBroadcastFloat32x8(t *testing.T) { |
| s := make([]float32, 8, 8) |
| archsimd.BroadcastFloat32x8(123456789).StoreSlice(s) |
| checkSlices(t, s, []float32{123456789, 123456789, 123456789, 123456789, 123456789, 123456789, 123456789, 123456789}) |
| } |
| |
| func TestBroadcastFloat64x2(t *testing.T) { |
| s := make([]float64, 2, 2) |
| archsimd.BroadcastFloat64x2(123456789).StoreSlice(s) |
| checkSlices(t, s, []float64{123456789, 123456789}) |
| } |
| |
| func TestBroadcastUint64x2(t *testing.T) { |
| s := make([]uint64, 2, 2) |
| archsimd.BroadcastUint64x2(123456789).StoreSlice(s) |
| checkSlices(t, s, []uint64{123456789, 123456789}) |
| } |
| |
| func TestBroadcastUint16x8(t *testing.T) { |
| s := make([]uint16, 8, 8) |
| archsimd.BroadcastUint16x8(12345).StoreSlice(s) |
| checkSlices(t, s, []uint16{12345, 12345, 12345, 12345}) |
| } |
| |
| func TestBroadcastInt8x32(t *testing.T) { |
| s := make([]int8, 32, 32) |
| archsimd.BroadcastInt8x32(-123).StoreSlice(s) |
| checkSlices(t, s, []int8{-123, -123, -123, -123, -123, -123, -123, -123, |
| -123, -123, -123, -123, -123, -123, -123, -123, |
| -123, -123, -123, -123, -123, -123, -123, -123, |
| -123, -123, -123, -123, -123, -123, -123, -123, |
| }) |
| } |
| |
| func TestMaskOpt512(t *testing.T) { |
| if !archsimd.X86.AVX512() { |
| t.Skip("Test requires X86.AVX512, not available on this hardware") |
| return |
| } |
| |
| k := make([]int64, 8, 8) |
| s := make([]float64, 8, 8) |
| |
| a := archsimd.LoadFloat64x8Slice([]float64{2, 0, 2, 0, 2, 0, 2, 0}) |
| b := archsimd.LoadFloat64x8Slice([]float64{1, 1, 1, 1, 1, 1, 1, 1}) |
| c := archsimd.LoadFloat64x8Slice([]float64{1, 2, 3, 4, 5, 6, 7, 8}) |
| d := archsimd.LoadFloat64x8Slice([]float64{2, 4, 6, 8, 10, 12, 14, 16}) |
| g := a.Greater(b) |
| e := c.Add(d).Masked(g) |
| e.StoreSlice(s) |
| g.ToInt64x8().StoreSlice(k) |
| checkSlices[int64](t, k, []int64{-1, 0, -1, 0, -1, 0, -1, 0}) |
| checkSlices[float64](t, s, []float64{3, 0, 9, 0, 15, 0, 21, 0}) |
| } |
| |
| // flattenedTranspose tranposes x and y, regarded as a pair of 2x2 |
| // matrices, but then flattens the rows in order, i.e |
| // x: ABCD ==> a: A1B2 |
| // y: 1234 b: C3D4 |
| func flattenedTranspose(x, y archsimd.Int32x4) (a, b archsimd.Int32x4) { |
| return x.InterleaveLo(y), x.InterleaveHi(y) |
| } |
| |
| func TestFlattenedTranspose(t *testing.T) { |
| r := make([]int32, 4, 4) |
| s := make([]int32, 4, 4) |
| |
| x := archsimd.LoadInt32x4Slice([]int32{0xA, 0xB, 0xC, 0xD}) |
| y := archsimd.LoadInt32x4Slice([]int32{1, 2, 3, 4}) |
| a, b := flattenedTranspose(x, y) |
| |
| a.StoreSlice(r) |
| b.StoreSlice(s) |
| |
| checkSlices[int32](t, r, []int32{0xA, 1, 0xB, 2}) |
| checkSlices[int32](t, s, []int32{0xC, 3, 0xD, 4}) |
| |
| } |
| |
| func TestClearAVXUpperBits(t *testing.T) { |
| // Test that ClearAVXUpperBits is safe even if there are SIMD values |
| // alive (although usually one should not do this). |
| if !archsimd.X86.AVX2() { |
| t.Skip("Test requires X86.AVX2, not available on this hardware") |
| return |
| } |
| |
| r := make([]int64, 4) |
| s := make([]int64, 4) |
| |
| x := archsimd.LoadInt64x4Slice([]int64{10, 20, 30, 40}) |
| y := archsimd.LoadInt64x4Slice([]int64{1, 2, 3, 4}) |
| |
| x.Add(y).StoreSlice(r) |
| archsimd.ClearAVXUpperBits() |
| x.Sub(y).StoreSlice(s) |
| |
| checkSlices[int64](t, r, []int64{11, 22, 33, 44}) |
| checkSlices[int64](t, s, []int64{9, 18, 27, 36}) |
| } |
| |
| func TestLeadingZeros(t *testing.T) { |
| if !archsimd.X86.AVX512() { |
| t.Skip("Test requires X86.AVX512, not available on this hardware") |
| return |
| } |
| |
| src := []uint64{0b1111, 0} |
| want := []uint64{60, 64} |
| got := make([]uint64, 2) |
| archsimd.LoadUint64x2Slice(src).LeadingZeros().StoreSlice(got) |
| for i := range 2 { |
| if want[i] != got[i] { |
| t.Errorf("Result incorrect at %d: want %d, got %d", i, want[i], got[i]) |
| } |
| } |
| } |
| |
| func TestIsZero(t *testing.T) { |
| v1 := archsimd.LoadUint64x2Slice([]uint64{0, 1}) |
| v2 := archsimd.LoadUint64x2Slice([]uint64{0, 0}) |
| if v1.IsZero() { |
| t.Errorf("Result incorrect, want false, got true") |
| } |
| if !v2.IsZero() { |
| t.Errorf("Result incorrect, want true, got false") |
| } |
| if !v1.And(v2).IsZero() { |
| t.Errorf("Result incorrect, want true, got false") |
| } |
| if v1.AndNot(v2).IsZero() { |
| t.Errorf("Result incorrect, want false, got true") |
| } |
| if !v2.And(v1).IsZero() { |
| t.Errorf("Result incorrect, want true, got false") |
| } |
| if !v2.AndNot(v1).IsZero() { |
| t.Errorf("Result incorrect, want true, got false") |
| } |
| } |
| |
| func TestSelect4FromPairConst(t *testing.T) { |
| x := archsimd.LoadInt32x4Slice([]int32{0, 1, 2, 3}) |
| y := archsimd.LoadInt32x4Slice([]int32{4, 5, 6, 7}) |
| |
| llll := x.SelectFromPair(0, 1, 2, 3, y) |
| hhhh := x.SelectFromPair(4, 5, 6, 7, y) |
| llhh := x.SelectFromPair(0, 1, 6, 7, y) |
| hhll := x.SelectFromPair(6, 7, 0, 1, y) |
| |
| lllh := x.SelectFromPair(0, 1, 2, 7, y) |
| llhl := x.SelectFromPair(0, 1, 7, 2, y) |
| lhll := x.SelectFromPair(0, 7, 1, 2, y) |
| hlll := x.SelectFromPair(7, 0, 1, 2, y) |
| |
| hhhl := x.SelectFromPair(4, 5, 6, 0, y) |
| hhlh := x.SelectFromPair(4, 5, 0, 6, y) |
| hlhh := x.SelectFromPair(4, 0, 5, 6, y) |
| lhhh := x.SelectFromPair(0, 4, 5, 6, y) |
| |
| lhlh := x.SelectFromPair(0, 4, 1, 5, y) |
| hlhl := x.SelectFromPair(4, 0, 5, 1, y) |
| lhhl := x.SelectFromPair(0, 4, 5, 1, y) |
| hllh := x.SelectFromPair(4, 0, 1, 5, y) |
| |
| r := make([]int32, 4, 4) |
| |
| foo := func(v archsimd.Int32x4, a, b, c, d int32) { |
| v.StoreSlice(r) |
| checkSlices[int32](t, r, []int32{a, b, c, d}) |
| } |
| |
| foo(llll, 0, 1, 2, 3) |
| foo(hhhh, 4, 5, 6, 7) |
| foo(llhh, 0, 1, 6, 7) |
| foo(hhll, 6, 7, 0, 1) |
| |
| foo(lllh, 0, 1, 2, 7) |
| foo(llhl, 0, 1, 7, 2) |
| foo(lhll, 0, 7, 1, 2) |
| foo(hlll, 7, 0, 1, 2) |
| |
| foo(hhhl, 4, 5, 6, 0) |
| foo(hhlh, 4, 5, 0, 6) |
| foo(hlhh, 4, 0, 5, 6) |
| foo(lhhh, 0, 4, 5, 6) |
| |
| foo(lhlh, 0, 4, 1, 5) |
| foo(hlhl, 4, 0, 5, 1) |
| foo(lhhl, 0, 4, 5, 1) |
| foo(hllh, 4, 0, 1, 5) |
| } |
| |
| //go:noinline |
| func selectFromPairInt32x4(x archsimd.Int32x4, a, b, c, d uint8, y archsimd.Int32x4) archsimd.Int32x4 { |
| return x.SelectFromPair(a, b, c, d, y) |
| } |
| |
| func TestSelect4FromPairVar(t *testing.T) { |
| x := archsimd.LoadInt32x4Slice([]int32{0, 1, 2, 3}) |
| y := archsimd.LoadInt32x4Slice([]int32{4, 5, 6, 7}) |
| |
| llll := selectFromPairInt32x4(x, 0, 1, 2, 3, y) |
| hhhh := selectFromPairInt32x4(x, 4, 5, 6, 7, y) |
| llhh := selectFromPairInt32x4(x, 0, 1, 6, 7, y) |
| hhll := selectFromPairInt32x4(x, 6, 7, 0, 1, y) |
| |
| lllh := selectFromPairInt32x4(x, 0, 1, 2, 7, y) |
| llhl := selectFromPairInt32x4(x, 0, 1, 7, 2, y) |
| lhll := selectFromPairInt32x4(x, 0, 7, 1, 2, y) |
| hlll := selectFromPairInt32x4(x, 7, 0, 1, 2, y) |
| |
| hhhl := selectFromPairInt32x4(x, 4, 5, 6, 0, y) |
| hhlh := selectFromPairInt32x4(x, 4, 5, 0, 6, y) |
| hlhh := selectFromPairInt32x4(x, 4, 0, 5, 6, y) |
| lhhh := selectFromPairInt32x4(x, 0, 4, 5, 6, y) |
| |
| lhlh := selectFromPairInt32x4(x, 0, 4, 1, 5, y) |
| hlhl := selectFromPairInt32x4(x, 4, 0, 5, 1, y) |
| lhhl := selectFromPairInt32x4(x, 0, 4, 5, 1, y) |
| hllh := selectFromPairInt32x4(x, 4, 0, 1, 5, y) |
| |
| r := make([]int32, 4, 4) |
| |
| foo := func(v archsimd.Int32x4, a, b, c, d int32) { |
| v.StoreSlice(r) |
| checkSlices[int32](t, r, []int32{a, b, c, d}) |
| } |
| |
| foo(llll, 0, 1, 2, 3) |
| foo(hhhh, 4, 5, 6, 7) |
| foo(llhh, 0, 1, 6, 7) |
| foo(hhll, 6, 7, 0, 1) |
| |
| foo(lllh, 0, 1, 2, 7) |
| foo(llhl, 0, 1, 7, 2) |
| foo(lhll, 0, 7, 1, 2) |
| foo(hlll, 7, 0, 1, 2) |
| |
| foo(hhhl, 4, 5, 6, 0) |
| foo(hhlh, 4, 5, 0, 6) |
| foo(hlhh, 4, 0, 5, 6) |
| foo(lhhh, 0, 4, 5, 6) |
| |
| foo(lhlh, 0, 4, 1, 5) |
| foo(hlhl, 4, 0, 5, 1) |
| foo(lhhl, 0, 4, 5, 1) |
| foo(hllh, 4, 0, 1, 5) |
| } |
| |
| func TestSelect4FromPairConstGrouped(t *testing.T) { |
| x := archsimd.LoadFloat32x8Slice([]float32{0, 1, 2, 3, 10, 11, 12, 13}) |
| y := archsimd.LoadFloat32x8Slice([]float32{4, 5, 6, 7, 14, 15, 16, 17}) |
| |
| llll := x.SelectFromPairGrouped(0, 1, 2, 3, y) |
| hhhh := x.SelectFromPairGrouped(4, 5, 6, 7, y) |
| llhh := x.SelectFromPairGrouped(0, 1, 6, 7, y) |
| hhll := x.SelectFromPairGrouped(6, 7, 0, 1, y) |
| |
| lllh := x.SelectFromPairGrouped(0, 1, 2, 7, y) |
| llhl := x.SelectFromPairGrouped(0, 1, 7, 2, y) |
| lhll := x.SelectFromPairGrouped(0, 7, 1, 2, y) |
| hlll := x.SelectFromPairGrouped(7, 0, 1, 2, y) |
| |
| hhhl := x.SelectFromPairGrouped(4, 5, 6, 0, y) |
| hhlh := x.SelectFromPairGrouped(4, 5, 0, 6, y) |
| hlhh := x.SelectFromPairGrouped(4, 0, 5, 6, y) |
| lhhh := x.SelectFromPairGrouped(0, 4, 5, 6, y) |
| |
| lhlh := x.SelectFromPairGrouped(0, 4, 1, 5, y) |
| hlhl := x.SelectFromPairGrouped(4, 0, 5, 1, y) |
| lhhl := x.SelectFromPairGrouped(0, 4, 5, 1, y) |
| hllh := x.SelectFromPairGrouped(4, 0, 1, 5, y) |
| |
| r := make([]float32, 8, 8) |
| |
| foo := func(v archsimd.Float32x8, a, b, c, d float32) { |
| v.StoreSlice(r) |
| checkSlices[float32](t, r, []float32{a, b, c, d, 10 + a, 10 + b, 10 + c, 10 + d}) |
| } |
| |
| foo(llll, 0, 1, 2, 3) |
| foo(hhhh, 4, 5, 6, 7) |
| foo(llhh, 0, 1, 6, 7) |
| foo(hhll, 6, 7, 0, 1) |
| |
| foo(lllh, 0, 1, 2, 7) |
| foo(llhl, 0, 1, 7, 2) |
| foo(lhll, 0, 7, 1, 2) |
| foo(hlll, 7, 0, 1, 2) |
| |
| foo(hhhl, 4, 5, 6, 0) |
| foo(hhlh, 4, 5, 0, 6) |
| foo(hlhh, 4, 0, 5, 6) |
| foo(lhhh, 0, 4, 5, 6) |
| |
| foo(lhlh, 0, 4, 1, 5) |
| foo(hlhl, 4, 0, 5, 1) |
| foo(lhhl, 0, 4, 5, 1) |
| foo(hllh, 4, 0, 1, 5) |
| } |
| |
| func TestSelectFromPairConstGroupedUint32x16(t *testing.T) { |
| if !archsimd.X86.AVX512() { |
| t.Skip("Test requires X86.AVX512, not available on this hardware") |
| return |
| } |
| x := archsimd.LoadUint32x16Slice([]uint32{0, 1, 2, 3, 10, 11, 12, 13, 20, 21, 22, 23, 30, 31, 32, 33}) |
| y := archsimd.LoadUint32x16Slice([]uint32{4, 5, 6, 7, 14, 15, 16, 17, 24, 25, 26, 27, 34, 35, 36, 37}) |
| |
| llll := x.SelectFromPairGrouped(0, 1, 2, 3, y) |
| hhhh := x.SelectFromPairGrouped(4, 5, 6, 7, y) |
| llhh := x.SelectFromPairGrouped(0, 1, 6, 7, y) |
| hhll := x.SelectFromPairGrouped(6, 7, 0, 1, y) |
| |
| lllh := x.SelectFromPairGrouped(0, 1, 2, 7, y) |
| llhl := x.SelectFromPairGrouped(0, 1, 7, 2, y) |
| lhll := x.SelectFromPairGrouped(0, 7, 1, 2, y) |
| hlll := x.SelectFromPairGrouped(7, 0, 1, 2, y) |
| |
| hhhl := x.SelectFromPairGrouped(4, 5, 6, 0, y) |
| hhlh := x.SelectFromPairGrouped(4, 5, 0, 6, y) |
| hlhh := x.SelectFromPairGrouped(4, 0, 5, 6, y) |
| lhhh := x.SelectFromPairGrouped(0, 4, 5, 6, y) |
| |
| lhlh := x.SelectFromPairGrouped(0, 4, 1, 5, y) |
| hlhl := x.SelectFromPairGrouped(4, 0, 5, 1, y) |
| lhhl := x.SelectFromPairGrouped(0, 4, 5, 1, y) |
| hllh := x.SelectFromPairGrouped(4, 0, 1, 5, y) |
| |
| r := make([]uint32, 16, 16) |
| |
| foo := func(v archsimd.Uint32x16, a, b, c, d uint32) { |
| v.StoreSlice(r) |
| checkSlices[uint32](t, r, []uint32{a, b, c, d, |
| 10 + a, 10 + b, 10 + c, 10 + d, |
| 20 + a, 20 + b, 20 + c, 20 + d, |
| 30 + a, 30 + b, 30 + c, 30 + d, |
| }) |
| } |
| |
| foo(llll, 0, 1, 2, 3) |
| foo(hhhh, 4, 5, 6, 7) |
| foo(llhh, 0, 1, 6, 7) |
| foo(hhll, 6, 7, 0, 1) |
| |
| foo(lllh, 0, 1, 2, 7) |
| foo(llhl, 0, 1, 7, 2) |
| foo(lhll, 0, 7, 1, 2) |
| foo(hlll, 7, 0, 1, 2) |
| |
| foo(hhhl, 4, 5, 6, 0) |
| foo(hhlh, 4, 5, 0, 6) |
| foo(hlhh, 4, 0, 5, 6) |
| foo(lhhh, 0, 4, 5, 6) |
| |
| foo(lhlh, 0, 4, 1, 5) |
| foo(hlhl, 4, 0, 5, 1) |
| foo(lhhl, 0, 4, 5, 1) |
| foo(hllh, 4, 0, 1, 5) |
| } |
| |
| func TestSelect128FromPair(t *testing.T) { |
| x := archsimd.LoadUint64x4Slice([]uint64{0, 1, 2, 3}) |
| y := archsimd.LoadUint64x4Slice([]uint64{4, 5, 6, 7}) |
| |
| aa := x.Select128FromPair(0, 0, y) |
| ab := x.Select128FromPair(0, 1, y) |
| bc := x.Select128FromPair(1, 2, y) |
| cd := x.Select128FromPair(2, 3, y) |
| da := x.Select128FromPair(3, 0, y) |
| dc := x.Select128FromPair(3, 2, y) |
| |
| r := make([]uint64, 4, 4) |
| |
| foo := func(v archsimd.Uint64x4, a, b uint64) { |
| a, b = 2*a, 2*b |
| v.StoreSlice(r) |
| checkSlices[uint64](t, r, []uint64{a, a + 1, b, b + 1}) |
| } |
| |
| foo(aa, 0, 0) |
| foo(ab, 0, 1) |
| foo(bc, 1, 2) |
| foo(cd, 2, 3) |
| foo(da, 3, 0) |
| foo(dc, 3, 2) |
| } |
| |
| func TestSelect128FromPairError(t *testing.T) { |
| x := archsimd.LoadUint64x4Slice([]uint64{0, 1, 2, 3}) |
| y := archsimd.LoadUint64x4Slice([]uint64{4, 5, 6, 7}) |
| |
| defer func() { |
| if r := recover(); r != nil { |
| t.Logf("Saw expected panic %v", r) |
| } |
| }() |
| _ = x.Select128FromPair(0, 4, y) |
| |
| t.Errorf("Should have panicked") |
| } |
| |
| //go:noinline |
| func select128FromPair(x archsimd.Uint64x4, lo, hi uint8, y archsimd.Uint64x4) archsimd.Uint64x4 { |
| return x.Select128FromPair(lo, hi, y) |
| } |
| |
| func TestSelect128FromPairVar(t *testing.T) { |
| x := archsimd.LoadUint64x4Slice([]uint64{0, 1, 2, 3}) |
| y := archsimd.LoadUint64x4Slice([]uint64{4, 5, 6, 7}) |
| |
| aa := select128FromPair(x, 0, 0, y) |
| ab := select128FromPair(x, 0, 1, y) |
| bc := select128FromPair(x, 1, 2, y) |
| cd := select128FromPair(x, 2, 3, y) |
| da := select128FromPair(x, 3, 0, y) |
| dc := select128FromPair(x, 3, 2, y) |
| |
| r := make([]uint64, 4, 4) |
| |
| foo := func(v archsimd.Uint64x4, a, b uint64) { |
| a, b = 2*a, 2*b |
| v.StoreSlice(r) |
| checkSlices[uint64](t, r, []uint64{a, a + 1, b, b + 1}) |
| } |
| |
| foo(aa, 0, 0) |
| foo(ab, 0, 1) |
| foo(bc, 1, 2) |
| foo(cd, 2, 3) |
| foo(da, 3, 0) |
| foo(dc, 3, 2) |
| } |
| |
| func TestSelect2FromPairConst(t *testing.T) { |
| x := archsimd.LoadUint64x2Slice([]uint64{0, 1}) |
| y := archsimd.LoadUint64x2Slice([]uint64{2, 3}) |
| |
| ll := x.SelectFromPair(0, 1, y) |
| hh := x.SelectFromPair(3, 2, y) |
| lh := x.SelectFromPair(0, 3, y) |
| hl := x.SelectFromPair(2, 1, y) |
| |
| r := make([]uint64, 2, 2) |
| |
| foo := func(v archsimd.Uint64x2, a, b uint64) { |
| v.StoreSlice(r) |
| checkSlices[uint64](t, r, []uint64{a, b}) |
| } |
| |
| foo(ll, 0, 1) |
| foo(hh, 3, 2) |
| foo(lh, 0, 3) |
| foo(hl, 2, 1) |
| } |
| |
| func TestSelect2FromPairConstGroupedUint(t *testing.T) { |
| x := archsimd.LoadUint64x4Slice([]uint64{0, 1, 10, 11}) |
| y := archsimd.LoadUint64x4Slice([]uint64{2, 3, 12, 13}) |
| |
| ll := x.SelectFromPairGrouped(0, 1, y) |
| hh := x.SelectFromPairGrouped(3, 2, y) |
| lh := x.SelectFromPairGrouped(0, 3, y) |
| hl := x.SelectFromPairGrouped(2, 1, y) |
| |
| r := make([]uint64, 4, 4) |
| |
| foo := func(v archsimd.Uint64x4, a, b uint64) { |
| v.StoreSlice(r) |
| checkSlices[uint64](t, r, []uint64{a, b, a + 10, b + 10}) |
| } |
| |
| foo(ll, 0, 1) |
| foo(hh, 3, 2) |
| foo(lh, 0, 3) |
| foo(hl, 2, 1) |
| } |
| |
| func TestSelect2FromPairConstGroupedFloat(t *testing.T) { |
| x := archsimd.LoadFloat64x4Slice([]float64{0, 1, 10, 11}) |
| y := archsimd.LoadFloat64x4Slice([]float64{2, 3, 12, 13}) |
| |
| ll := x.SelectFromPairGrouped(0, 1, y) |
| hh := x.SelectFromPairGrouped(3, 2, y) |
| lh := x.SelectFromPairGrouped(0, 3, y) |
| hl := x.SelectFromPairGrouped(2, 1, y) |
| |
| r := make([]float64, 4, 4) |
| |
| foo := func(v archsimd.Float64x4, a, b float64) { |
| v.StoreSlice(r) |
| checkSlices[float64](t, r, []float64{a, b, a + 10, b + 10}) |
| } |
| |
| foo(ll, 0, 1) |
| foo(hh, 3, 2) |
| foo(lh, 0, 3) |
| foo(hl, 2, 1) |
| } |
| |
| func TestSelect2FromPairConstGroupedInt(t *testing.T) { |
| x := archsimd.LoadInt64x4Slice([]int64{0, 1, 10, 11}) |
| y := archsimd.LoadInt64x4Slice([]int64{2, 3, 12, 13}) |
| |
| ll := x.SelectFromPairGrouped(0, 1, y) |
| hh := x.SelectFromPairGrouped(3, 2, y) |
| lh := x.SelectFromPairGrouped(0, 3, y) |
| hl := x.SelectFromPairGrouped(2, 1, y) |
| |
| r := make([]int64, 4, 4) |
| |
| foo := func(v archsimd.Int64x4, a, b int64) { |
| v.StoreSlice(r) |
| checkSlices[int64](t, r, []int64{a, b, a + 10, b + 10}) |
| } |
| |
| foo(ll, 0, 1) |
| foo(hh, 3, 2) |
| foo(lh, 0, 3) |
| foo(hl, 2, 1) |
| } |
| |
| func TestSelect2FromPairConstGroupedInt512(t *testing.T) { |
| if !archsimd.X86.AVX512() { |
| t.Skip("Test requires X86.AVX512, not available on this hardware") |
| return |
| } |
| |
| x := archsimd.LoadInt64x8Slice([]int64{0, 1, 10, 11, 20, 21, 30, 31}) |
| y := archsimd.LoadInt64x8Slice([]int64{2, 3, 12, 13, 22, 23, 32, 33}) |
| |
| ll := x.SelectFromPairGrouped(0, 1, y) |
| hh := x.SelectFromPairGrouped(3, 2, y) |
| lh := x.SelectFromPairGrouped(0, 3, y) |
| hl := x.SelectFromPairGrouped(2, 1, y) |
| |
| r := make([]int64, 8, 8) |
| |
| foo := func(v archsimd.Int64x8, a, b int64) { |
| v.StoreSlice(r) |
| checkSlices[int64](t, r, []int64{a, b, a + 10, b + 10, a + 20, b + 20, a + 30, b + 30}) |
| } |
| |
| foo(ll, 0, 1) |
| foo(hh, 3, 2) |
| foo(lh, 0, 3) |
| foo(hl, 2, 1) |
| } |
| |
| func TestString(t *testing.T) { |
| x := archsimd.LoadUint32x4Slice([]uint32{0, 1, 2, 3}) |
| y := archsimd.LoadInt64x4Slice([]int64{-4, -5, -6, -7}) |
| z := archsimd.LoadFloat32x4Slice([]float32{0.5, 1.5, -2.5, 3.5e9}) |
| w := archsimd.LoadFloat64x4Slice([]float64{0.5, 1.5, -2.5, 3.5e9}) |
| |
| sx := "{0,1,2,3}" |
| sy := "{-4,-5,-6,-7}" |
| sz := "{0.5,1.5,-2.5,3.5e+09}" |
| sw := sz |
| |
| if x.String() != sx { |
| t.Errorf("x=%s wanted %s", x, sx) |
| } |
| if y.String() != sy { |
| t.Errorf("y=%s wanted %s", y, sy) |
| } |
| if z.String() != sz { |
| t.Errorf("z=%s wanted %s", z, sz) |
| } |
| if w.String() != sw { |
| t.Errorf("w=%s wanted %s", w, sw) |
| } |
| t.Logf("w=%s", w) |
| t.Logf("x=%s", x) |
| t.Logf("y=%s", y) |
| t.Logf("z=%s", z) |
| } |
| |
| // a returns an slice of 16 int32 |
| func a() []int32 { |
| return make([]int32, 16, 16) |
| } |
| |
| // applyTo3 returns a 16-element slice of the results of |
| // applying f to the respective elements of vectors x, y, and z. |
| func applyTo3(x, y, z archsimd.Int32x16, f func(x, y, z int32) int32) []int32 { |
| ax, ay, az := a(), a(), a() |
| x.StoreSlice(ax) |
| y.StoreSlice(ay) |
| z.StoreSlice(az) |
| |
| r := a() |
| for i := range r { |
| r[i] = f(ax[i], ay[i], az[i]) |
| } |
| return r |
| } |
| |
| // applyTo3 returns a 16-element slice of the results of |
| // applying f to the respective elements of vectors x, y, z, and w. |
| func applyTo4(x, y, z, w archsimd.Int32x16, f func(x, y, z, w int32) int32) []int32 { |
| ax, ay, az, aw := a(), a(), a(), a() |
| x.StoreSlice(ax) |
| y.StoreSlice(ay) |
| z.StoreSlice(az) |
| w.StoreSlice(aw) |
| |
| r := make([]int32, len(ax), len(ax)) |
| for i := range r { |
| r[i] = f(ax[i], ay[i], az[i], aw[i]) |
| } |
| return r |
| } |
| |
| func TestSelectTernOptInt32x16(t *testing.T) { |
| if !archsimd.X86.AVX512() { |
| t.Skip("Test requires X86.AVX512, not available on this hardware") |
| return |
| } |
| ax := []int32{0, 1, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1} |
| ay := []int32{0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1} |
| az := []int32{0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1} |
| aw := []int32{0, 1, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1} |
| am := []int32{1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1} |
| |
| x := archsimd.LoadInt32x16Slice(ax) |
| y := archsimd.LoadInt32x16Slice(ay) |
| z := archsimd.LoadInt32x16Slice(az) |
| w := archsimd.LoadInt32x16Slice(aw) |
| m := archsimd.LoadInt32x16Slice(am) |
| |
| foo := func(v archsimd.Int32x16, s []int32) { |
| r := make([]int32, 16, 16) |
| v.StoreSlice(r) |
| checkSlices[int32](t, r, s) |
| } |
| |
| t0 := w.Xor(y).Xor(z) |
| ft0 := func(w, y, z int32) int32 { |
| return w ^ y ^ z |
| } |
| foo(t0, applyTo3(w, y, z, ft0)) |
| |
| t1 := m.And(w.Xor(y).Xor(z.Not())) |
| ft1 := func(m, w, y, z int32) int32 { |
| return m & (w ^ y ^ ^z) |
| } |
| foo(t1, applyTo4(m, w, y, z, ft1)) |
| |
| t2 := x.Xor(y).Xor(z).And(x.Xor(y).Xor(z.Not())) |
| ft2 := func(x, y, z int32) int32 { |
| return (x ^ y ^ z) & (x ^ y ^ ^z) |
| } |
| foo(t2, applyTo3(x, y, z, ft2)) |
| } |
| |
| func TestMaskedMerge(t *testing.T) { |
| x := archsimd.LoadInt64x4Slice([]int64{1, 2, 3, 4}) |
| y := archsimd.LoadInt64x4Slice([]int64{5, 6, 1, 1}) |
| z := archsimd.LoadInt64x4Slice([]int64{-1, -2, -3, -4}) |
| res := make([]int64, 4) |
| expected := []int64{6, 8, -3, -4} |
| mask := x.Less(y) |
| if archsimd.X86.AVX512() { |
| x.Add(y).Merge(z, mask).StoreSlice(res) |
| } else { |
| x.Add(y).Merge(z, mask).StoreSlice(res) |
| } |
| for i := range 4 { |
| if res[i] != expected[i] { |
| t.Errorf("got %d wanted %d", res[i], expected[i]) |
| } |
| } |
| } |
| |
| func TestDotProductQuadruple(t *testing.T) { |
| if !archsimd.X86.AVXVNNI() { |
| t.Skip("Test requires X86.AVXVNNI, not available on this hardware") |
| return |
| } |
| xd := make([]int8, 16) |
| yd := make([]uint8, 16) |
| zd := make([]int32, 4) |
| wanted1 := make([]int32, 4) |
| wanted2 := make([]int32, 4) |
| res1 := make([]int32, 4) |
| res2 := make([]int32, 4) |
| for i := range 4 { |
| xd[i] = 5 |
| yd[i] = 6 |
| zd[i] = 3 |
| wanted1[i] = 30 |
| wanted2[i] = 30 |
| } |
| x := archsimd.LoadInt8x16Slice(xd) |
| y := archsimd.LoadUint8x16Slice(yd) |
| z := archsimd.LoadInt32x4Slice(zd) |
| x.DotProductQuadruple(y).StoreSlice(res1) |
| x.DotProductQuadruple(y).Add(z).StoreSlice(res1) |
| for i := range 4 { |
| if res1[i] != wanted1[i] { |
| t.Errorf("got %d wanted %d", res1[i], wanted1[i]) |
| } |
| if res2[i] != wanted2[i] { |
| t.Errorf("got %d wanted %d", res2[i], wanted2[i]) |
| } |
| } |
| } |
| |
| func TestPermuteScalars(t *testing.T) { |
| x := []int32{11, 12, 13, 14} |
| want := []int32{12, 13, 14, 11} |
| got := make([]int32, 4) |
| archsimd.LoadInt32x4Slice(x).PermuteScalars(1, 2, 3, 0).StoreSlice(got) |
| checkSlices(t, got, want) |
| } |
| |
| func TestPermuteScalarsGrouped(t *testing.T) { |
| x := []int32{11, 12, 13, 14, 21, 22, 23, 24} |
| want := []int32{12, 13, 14, 11, 22, 23, 24, 21} |
| got := make([]int32, 8) |
| archsimd.LoadInt32x8Slice(x).PermuteScalarsGrouped(1, 2, 3, 0).StoreSlice(got) |
| checkSlices(t, got, want) |
| } |
| |
| func TestPermuteScalarsHi(t *testing.T) { |
| x := []int16{-1, -2, -3, -4, 11, 12, 13, 14} |
| want := []int16{-1, -2, -3, -4, 12, 13, 14, 11} |
| got := make([]int16, len(x)) |
| archsimd.LoadInt16x8Slice(x).PermuteScalarsHi(1, 2, 3, 0).StoreSlice(got) |
| checkSlices(t, got, want) |
| } |
| |
| func TestPermuteScalarsLo(t *testing.T) { |
| x := []int16{11, 12, 13, 14, 4, 5, 6, 7} |
| want := []int16{12, 13, 14, 11, 4, 5, 6, 7} |
| got := make([]int16, len(x)) |
| archsimd.LoadInt16x8Slice(x).PermuteScalarsLo(1, 2, 3, 0).StoreSlice(got) |
| checkSlices(t, got, want) |
| } |
| |
| func TestPermuteScalarsHiGrouped(t *testing.T) { |
| x := []int16{-1, -2, -3, -4, 11, 12, 13, 14, -11, -12, -13, -14, 111, 112, 113, 114} |
| want := []int16{-1, -2, -3, -4, 12, 13, 14, 11, -11, -12, -13, -14, 112, 113, 114, 111} |
| got := make([]int16, len(x)) |
| archsimd.LoadInt16x16Slice(x).PermuteScalarsHiGrouped(1, 2, 3, 0).StoreSlice(got) |
| checkSlices(t, got, want) |
| } |
| |
| func TestPermuteScalarsLoGrouped(t *testing.T) { |
| x := []int16{11, 12, 13, 14, 4, 5, 6, 7, 111, 112, 113, 114, 14, 15, 16, 17} |
| want := []int16{12, 13, 14, 11, 4, 5, 6, 7, 112, 113, 114, 111, 14, 15, 16, 17} |
| got := make([]int16, len(x)) |
| archsimd.LoadInt16x16Slice(x).PermuteScalarsLoGrouped(1, 2, 3, 0).StoreSlice(got) |
| checkSlices(t, got, want) |
| } |
| |
| func TestClMul(t *testing.T) { |
| var x = archsimd.LoadUint64x2Slice([]uint64{1, 5}) |
| var y = archsimd.LoadUint64x2Slice([]uint64{3, 9}) |
| |
| foo := func(v archsimd.Uint64x2, s []uint64) { |
| r := make([]uint64, 2, 2) |
| v.StoreSlice(r) |
| checkSlices[uint64](t, r, s) |
| } |
| |
| foo(x.CarrylessMultiply(0, 0, y), []uint64{3, 0}) |
| foo(x.CarrylessMultiply(0, 1, y), []uint64{9, 0}) |
| foo(x.CarrylessMultiply(1, 0, y), []uint64{15, 0}) |
| foo(x.CarrylessMultiply(1, 1, y), []uint64{45, 0}) |
| foo(y.CarrylessMultiply(0, 0, y), []uint64{5, 0}) |
| |
| } |