| // Code generated by 'simdgen -o godefs -goroot $GOROOT -arch arm64 -arm64Path $ARM64_ISA_PATH go_arm64.yaml types.yaml categories.yaml'; DO NOT EDIT. |
| |
| //go:build goexperiment.simd |
| |
| package archsimd |
| |
| /* bitSelect */ |
| |
| // bitSelect selects bits from y where mask is 1, keeps bits from x where mask is 0. |
| // |
| // Asm: VBIT, CPU Feature: NEON |
| func (x Int8x16) bitSelect(y Int8x16, mask Int8x16) Int8x16 |
| |
| /* bitSelectNot */ |
| |
| // bitSelectNot selects bits from y where mask is 0, keeps bits from x where mask is 1. |
| // |
| // Asm: VBIF, CPU Feature: NEON |
| func (x Int8x16) bitSelectNot(y Int8x16, mask Int8x16) Int8x16 |
| |
| /* broadcast1To2 */ |
| |
| // broadcast1To2 copies the lowest element of its input to all 2 elements of |
| // the output vector. |
| // |
| // Asm: VDUP, CPU Feature: NEON |
| func (x Float64x2) broadcast1To2() Float64x2 |
| |
| // broadcast1To2 copies the lowest element of its input to all 2 elements of |
| // the output vector. |
| // |
| // Asm: VDUP, CPU Feature: NEON |
| func (x Int64x2) broadcast1To2() Int64x2 |
| |
| // broadcast1To2 copies the lowest element of its input to all 2 elements of |
| // the output vector. |
| // |
| // Asm: VDUP, CPU Feature: NEON |
| func (x Uint64x2) broadcast1To2() Uint64x2 |
| |
| /* broadcast1To4 */ |
| |
| // broadcast1To4 copies the lowest element of its input to all 4 elements of |
| // the output vector. |
| // |
| // Asm: VDUP, CPU Feature: NEON |
| func (x Float32x4) broadcast1To4() Float32x4 |
| |
| // broadcast1To4 copies the lowest element of its input to all 4 elements of |
| // the output vector. |
| // |
| // Asm: VDUP, CPU Feature: NEON |
| func (x Int32x4) broadcast1To4() Int32x4 |
| |
| // broadcast1To4 copies the lowest element of its input to all 4 elements of |
| // the output vector. |
| // |
| // Asm: VDUP, CPU Feature: NEON |
| func (x Uint32x4) broadcast1To4() Uint32x4 |
| |
| /* broadcast1To8 */ |
| |
| // broadcast1To8 copies the lowest element of its input to all 8 elements of |
| // the output vector. |
| // |
| // Asm: VDUP, CPU Feature: NEON |
| func (x Int16x8) broadcast1To8() Int16x8 |
| |
| // broadcast1To8 copies the lowest element of its input to all 8 elements of |
| // the output vector. |
| // |
| // Asm: VDUP, CPU Feature: NEON |
| func (x Uint16x8) broadcast1To8() Uint16x8 |
| |
| /* broadcast1To16 */ |
| |
| // broadcast1To16 copies the lowest element of its input to all 16 elements of |
| // the output vector. |
| // |
| // Asm: VDUP, CPU Feature: NEON |
| func (x Int8x16) broadcast1To16() Int8x16 |
| |
| // broadcast1To16 copies the lowest element of its input to all 16 elements of |
| // the output vector. |
| // |
| // Asm: VDUP, CPU Feature: NEON |
| func (x Uint8x16) broadcast1To16() Uint8x16 |
| |
| /* carrylessMultiplyWidenLo */ |
| |
| // carrylessMultiplyWidenLo returns the carryless (polynomial) product of the low halves |
| // of x and y. |
| // |
| // A carryless multiplication uses bitwise XOR instead of |
| // add-with-carry, for example (in base two): |
| // |
| // 11 * 11 = 11 * (10 ^ 1) = (11 * 10) ^ (11 * 1) = 110 ^ 11 = 101 |
| // |
| // This also models multiplication of polynomials with coefficients |
| // from GF(2) -- 11 * 11 models (x+1)*(x+1) = x**2 + (1^1)x + 1 = |
| // x**2 + 0x + 1 = x**2 + 1 modeled by 101. (Note that "+" adds |
| // polynomial terms, but coefficients "add" with XOR.) |
| // For the high-indexed elements, use HiToLo: |
| // |
| // x.HiToLo().carrylessMultiplyWidenLo(y.HiToLo()) |
| // |
| // Asm: VPMULL, CPU Feature: NEON |
| func (x Uint64x2) carrylessMultiplyWidenLo(y Uint64x2) Uint64x2 |
| |
| /* reduceMax */ |
| |
| // reduceMax reduces x by taking the maximum of all elements. The maximum is placed |
| // in element 0 of the result; other elements are zeroed. |
| // |
| // Asm: VFMAXV, CPU Feature: NEON |
| func (x Float32x4) reduceMax() Float32x4 |
| |
| // reduceMax reduces x by taking the maximum of all elements. The maximum is placed |
| // in element 0 of the result; other elements are zeroed. |
| // |
| // Asm: VSMAXV, CPU Feature: NEON |
| func (x Int8x16) reduceMax() Int8x16 |
| |
| // reduceMax reduces x by taking the maximum of all elements. The maximum is placed |
| // in element 0 of the result; other elements are zeroed. |
| // |
| // Asm: VSMAXV, CPU Feature: NEON |
| func (x Int16x8) reduceMax() Int16x8 |
| |
| // reduceMax reduces x by taking the maximum of all elements. The maximum is placed |
| // in element 0 of the result; other elements are zeroed. |
| // |
| // Asm: VSMAXV, CPU Feature: NEON |
| func (x Int32x4) reduceMax() Int32x4 |
| |
| // reduceMax reduces x by taking the maximum of all elements. The maximum is placed |
| // in element 0 of the result; other elements are zeroed. |
| // |
| // Asm: VUMAXV, CPU Feature: NEON |
| func (x Uint8x16) reduceMax() Uint8x16 |
| |
| // reduceMax reduces x by taking the maximum of all elements. The maximum is placed |
| // in element 0 of the result; other elements are zeroed. |
| // |
| // Asm: VUMAXV, CPU Feature: NEON |
| func (x Uint16x8) reduceMax() Uint16x8 |
| |
| // reduceMax reduces x by taking the maximum of all elements. The maximum is placed |
| // in element 0 of the result; other elements are zeroed. |
| // |
| // Asm: VUMAXV, CPU Feature: NEON |
| func (x Uint32x4) reduceMax() Uint32x4 |
| |
| /* reduceMin */ |
| |
| // reduceMin reduces x by taking the minimum of all elements. The minimum is placed |
| // in element 0 of the result; other elements are zeroed. |
| // |
| // Asm: VFMINV, CPU Feature: NEON |
| func (x Float32x4) reduceMin() Float32x4 |
| |
| // reduceMin reduces x by taking the minimum of all elements. The minimum is placed |
| // in element 0 of the result; other elements are zeroed. |
| // |
| // Asm: VSMINV, CPU Feature: NEON |
| func (x Int8x16) reduceMin() Int8x16 |
| |
| // reduceMin reduces x by taking the minimum of all elements. The minimum is placed |
| // in element 0 of the result; other elements are zeroed. |
| // |
| // Asm: VSMINV, CPU Feature: NEON |
| func (x Int16x8) reduceMin() Int16x8 |
| |
| // reduceMin reduces x by taking the minimum of all elements. The minimum is placed |
| // in element 0 of the result; other elements are zeroed. |
| // |
| // Asm: VSMINV, CPU Feature: NEON |
| func (x Int32x4) reduceMin() Int32x4 |
| |
| // reduceMin reduces x by taking the minimum of all elements. The minimum is placed |
| // in element 0 of the result; other elements are zeroed. |
| // |
| // Asm: VUMINV, CPU Feature: NEON |
| func (x Uint8x16) reduceMin() Uint8x16 |
| |
| // reduceMin reduces x by taking the minimum of all elements. The minimum is placed |
| // in element 0 of the result; other elements are zeroed. |
| // |
| // Asm: VUMINV, CPU Feature: NEON |
| func (x Uint16x8) reduceMin() Uint16x8 |
| |
| // reduceMin reduces x by taking the minimum of all elements. The minimum is placed |
| // in element 0 of the result; other elements are zeroed. |
| // |
| // Asm: VUMINV, CPU Feature: NEON |
| func (x Uint32x4) reduceMin() Uint32x4 |
| |
| /* reduceSum */ |
| |
| // reduceSum reduces x by summing all elements. The sum is placed in element 0 |
| // of the result; other elements are zeroed. |
| // |
| // Asm: VADDV, CPU Feature: NEON |
| func (x Int8x16) reduceSum() Int8x16 |
| |
| // reduceSum reduces x by summing all elements. The sum is placed in element 0 |
| // of the result; other elements are zeroed. |
| // |
| // Asm: VADDV, CPU Feature: NEON |
| func (x Int16x8) reduceSum() Int16x8 |
| |
| // reduceSum reduces x by summing all elements. The sum is placed in element 0 |
| // of the result; other elements are zeroed. |
| // |
| // Asm: VADDV, CPU Feature: NEON |
| func (x Int32x4) reduceSum() Int32x4 |
| |
| // reduceSum reduces x by summing all elements. The sum is placed in element 0 |
| // of the result; other elements are zeroed. |
| // |
| // Asm: VADDV, CPU Feature: NEON |
| func (x Uint8x16) reduceSum() Uint8x16 |
| |
| // reduceSum reduces x by summing all elements. The sum is placed in element 0 |
| // of the result; other elements are zeroed. |
| // |
| // Asm: VADDV, CPU Feature: NEON |
| func (x Uint16x8) reduceSum() Uint16x8 |
| |
| // reduceSum reduces x by summing all elements. The sum is placed in element 0 |
| // of the result; other elements are zeroed. |
| // |
| // Asm: VADDV, CPU Feature: NEON |
| func (x Uint32x4) reduceSum() Uint32x4 |