| // Copyright 2026 The Go Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style |
| // license that can be found in the LICENSE file. |
| |
| //go:build goexperiment.simd |
| |
| package archsimd |
| |
| import "unsafe" |
| |
| // LoadUint32x4Part loads a Int32x4 from the slice s. |
| // If s has fewer than 4 elements, the remaining elements of the vector are filled with zeroes. |
| // If s has 4 or more elements, the function is equivalent to LoadUint32x4. |
| func LoadUint32x4Part(s []uint32) (Uint32x4, int) { |
| l := len(s) |
| if l >= 4 { |
| return LoadUint32x4(s), 4 |
| } |
| var x Uint32x4 |
| if l == 0 { |
| return x, 0 |
| } |
| if l >= 2 { // 2,3 |
| x = x.ReshapeToUint64s().SetElem(0, *uint64atP32(&s[0])).ReshapeToUint32s() |
| if l == 3 { |
| x = x.SetElem(2, s[2]) |
| } |
| } else { // l == 1 |
| x = x.SetElem(0, s[0]) |
| } |
| return x, l |
| } |
| |
| // StorePart stores the elements of x into the slice s. |
| // It stores as many elements as will fit in s. |
| // If s has 4 or more elements, the method is equivalent to x.Store. |
| func (x Uint32x4) StorePart(s []uint32) int { |
| l := len(s) |
| if l >= 4 { |
| x.Store(s) |
| return 4 |
| } |
| if l == 0 { |
| return 0 |
| } |
| if l >= 2 { // 2,3 |
| *uint64atP32(&s[0]) = x.ReshapeToUint64s().GetElem(0) |
| if l == 3 { |
| s[2] = x.GetElem(2) |
| } |
| } else { // l == 1 |
| s[0] = x.GetElem(0) |
| } |
| return l |
| } |
| |
| // LoadUint64x2Part loads a Int64x2 from the slice s. |
| // If s has fewer than 2 elements, the remaining elements of the vector are filled with zeroes. |
| // If s has 2 or more elements, the function is equivalent to LoadUint64x2. |
| func LoadUint64x2Part(s []uint64) (Uint64x2, int) { |
| l := len(s) |
| if l >= 2 { |
| return LoadUint64x2(s), 2 |
| } |
| var x Uint64x2 |
| if l == 0 { |
| return x, 0 |
| } |
| // l == 1 |
| x = x.SetElem(0, s[0]) |
| return x, 1 |
| } |
| |
| // StorePart stores the elements of x into the slice s. |
| // It stores as many elements as will fit in s. |
| // If s has 2 or more elements, the method is equivalent to x.Store. |
| func (x Uint64x2) StorePart(s []uint64) int { |
| l := len(s) |
| if l >= 2 { |
| x.Store(s) |
| return 2 |
| } |
| if l == 0 { |
| return 0 |
| } |
| // l == 1 |
| s[0] = x.GetElem(0) |
| return 1 |
| } |
| |
| // LoadInt32x4Part loads a Int32x4 from the slice s. |
| // If s has fewer than 4 elements, the remaining elements of the vector are filled with zeroes. |
| // If s has 4 or more elements, the function is equivalent to LoadInt32x4. |
| func LoadInt32x4Part(s []int32) (Int32x4, int) { |
| if len(s) == 0 { |
| var zero Int32x4 |
| return zero, 0 |
| } |
| t := unsafe.Slice((*uint32)(unsafe.Pointer(&s[0])), len(s)) |
| v, l := LoadUint32x4Part(t) |
| return v.BitsToInt32(), l |
| } |
| |
| // StorePart stores the 4 elements of x into the slice s. |
| // It stores as many elements as will fit in s. |
| // If s has 4 or more elements, the method is equivalent to x.Store. |
| func (x Int32x4) StorePart(s []int32) int { |
| if len(s) == 0 { |
| return 0 |
| } |
| t := unsafe.Slice((*uint32)(unsafe.Pointer(&s[0])), len(s)) |
| return x.ToBits().StorePart(t) |
| } |
| |
| // LoadInt64x2Part loads a Int64x2 from the slice s. |
| // If s has fewer than 2 elements, the remaining elements of the vector are filled with zeroes. |
| // If s has 2 or more elements, the function is equivalent to LoadInt64x2. |
| func LoadInt64x2Part(s []int64) (Int64x2, int) { |
| if len(s) == 0 { |
| var zero Int64x2 |
| return zero, 0 |
| } |
| t := unsafe.Slice((*uint64)(unsafe.Pointer(&s[0])), len(s)) |
| v, l := LoadUint64x2Part(t) |
| return v.BitsToInt64(), l |
| } |
| |
| // StorePart stores the 2 elements of x into the slice s. |
| // It stores as many elements as will fit in s. |
| // If s has 2 or more elements, the method is equivalent to x.Store. |
| func (x Int64x2) StorePart(s []int64) int { |
| if len(s) == 0 { |
| return 0 |
| } |
| t := unsafe.Slice((*uint64)(unsafe.Pointer(&s[0])), len(s)) |
| return x.ToBits().StorePart(t) |
| } |
| |
| // LoadFloat32x4Part loads a Float32x4 from the slice s. |
| // If s has fewer than 4 elements, the remaining elements of the vector are filled with zeroes. |
| // If s has 4 or more elements, the function is equivalent to LoadFloat32x4. |
| func LoadFloat32x4Part(s []float32) (Float32x4, int) { |
| l := len(s) |
| if l >= 4 { |
| return LoadFloat32x4(s), 4 |
| } |
| var x Float32x4 |
| if l == 0 { |
| return x, l |
| } |
| if l >= 2 { // 2,3 |
| x = x.ToBits().ReshapeToUint64s().BitsToFloat64().SetElem(0, *float64atP32(&s[0])).ToBits().ReshapeToUint32s().BitsToFloat32() |
| if l == 3 { |
| x = x.SetElem(2, s[2]) |
| } |
| } else { // l == 1 |
| x = x.SetElem(0, s[0]) |
| } |
| return x, l |
| } |
| |
| // StorePart stores the elements of x into the slice s. |
| // It stores as many elements as will fit in s. |
| // If s has 4 or more elements, the method is equivalent to x.Store. |
| func (x Float32x4) StorePart(s []float32) int { |
| l := len(s) |
| if l >= 4 { |
| x.Store(s) |
| return 4 |
| } |
| if l == 0 { |
| return 0 |
| } |
| if l >= 2 { // 2,3( |
| *float64atP32(&s[0]) = x.ToBits().ReshapeToUint64s().BitsToFloat64().GetElem(0) |
| if l == 3 { |
| s[2] = x.GetElem(2) |
| } |
| } else { // l == 1 |
| s[0] = x.GetElem(0) |
| } |
| return l |
| } |
| |
| // LoadFloat64x2Part loads a Float64x2 from the slice s. |
| // If s has fewer than 2 elements, the remaining elements of the vector are filled with zeroes. |
| // If s has 2 or more elements, the function is equivalent to LoadFloat64x2. |
| func LoadFloat64x2Part(s []float64) (Float64x2, int) { |
| l := len(s) |
| if l >= 2 { |
| return LoadFloat64x2(s), 2 |
| } |
| var x Float64x2 |
| if l == 0 { |
| return x, l |
| } |
| // l == 1 |
| x = x.SetElem(0, s[0]) |
| return x, l |
| } |
| |
| // StorePart stores the elements of x into the slice s. |
| // It stores as many elements as will fit in s. |
| // If s has 2 or more elements, the method is equivalent to x.Store. |
| func (x Float64x2) StorePart(s []float64) int { |
| l := len(s) |
| if l >= 2 { |
| x.Store(s) |
| return 2 |
| } |
| if l == 0 { |
| return 0 |
| } |
| // l == 1 |
| s[0] = x.GetElem(0) |
| return l |
| } |