src/simd/archsimd/ops_internal_arm64.go - go - Git at Google

 // Code generated by 'simdgen -o godefs -goroot $GOROOT -arch arm64 -arm64Path $ARM64_ISA_PATH go_arm64.yaml types.yaml categories.yaml'; DO NOT EDIT.

 //go:build goexperiment.simd

 package archsimd

 /* bitSelect */

 // bitSelect selects bits from y where mask is 1, keeps bits from x where mask is 0.
 //
 // Asm: VBIT, CPU Feature: NEON
 func (x Int8x16) bitSelect(y Int8x16, mask Int8x16) Int8x16

 /* bitSelectNot */

 // bitSelectNot selects bits from y where mask is 0, keeps bits from x where mask is 1.
 //
 // Asm: VBIF, CPU Feature: NEON
 func (x Int8x16) bitSelectNot(y Int8x16, mask Int8x16) Int8x16

 /* broadcast1To2 */

 // broadcast1To2 copies the lowest element of its input to all 2 elements of
 // the output vector.
 //
 // Asm: VDUP, CPU Feature: NEON
 func (x Float64x2) broadcast1To2() Float64x2

 // broadcast1To2 copies the lowest element of its input to all 2 elements of
 // the output vector.
 //
 // Asm: VDUP, CPU Feature: NEON
 func (x Int64x2) broadcast1To2() Int64x2

 // broadcast1To2 copies the lowest element of its input to all 2 elements of
 // the output vector.
 //
 // Asm: VDUP, CPU Feature: NEON
 func (x Uint64x2) broadcast1To2() Uint64x2

 /* broadcast1To4 */

 // broadcast1To4 copies the lowest element of its input to all 4 elements of
 // the output vector.
 //
 // Asm: VDUP, CPU Feature: NEON
 func (x Float32x4) broadcast1To4() Float32x4

 // broadcast1To4 copies the lowest element of its input to all 4 elements of
 // the output vector.
 //
 // Asm: VDUP, CPU Feature: NEON
 func (x Int32x4) broadcast1To4() Int32x4

 // broadcast1To4 copies the lowest element of its input to all 4 elements of
 // the output vector.
 //
 // Asm: VDUP, CPU Feature: NEON
 func (x Uint32x4) broadcast1To4() Uint32x4

 /* broadcast1To8 */

 // broadcast1To8 copies the lowest element of its input to all 8 elements of
 // the output vector.
 //
 // Asm: VDUP, CPU Feature: NEON
 func (x Int16x8) broadcast1To8() Int16x8

 // broadcast1To8 copies the lowest element of its input to all 8 elements of
 // the output vector.
 //
 // Asm: VDUP, CPU Feature: NEON
 func (x Uint16x8) broadcast1To8() Uint16x8

 /* broadcast1To16 */

 // broadcast1To16 copies the lowest element of its input to all 16 elements of
 // the output vector.
 //
 // Asm: VDUP, CPU Feature: NEON
 func (x Int8x16) broadcast1To16() Int8x16

 // broadcast1To16 copies the lowest element of its input to all 16 elements of
 // the output vector.
 //
 // Asm: VDUP, CPU Feature: NEON
 func (x Uint8x16) broadcast1To16() Uint8x16

 /* carrylessMultiplyWidenLo */

 // carrylessMultiplyWidenLo returns the carryless (polynomial) product of the low halves
 // of x and y.
 //
 // A carryless multiplication uses bitwise XOR instead of
 // add-with-carry, for example (in base two):
 //
 //	11 * 11 = 11 * (10 ^ 1) = (11 * 10) ^ (11 * 1) = 110 ^ 11 = 101
 //
 // This also models multiplication of polynomials with coefficients
 // from GF(2) -- 11 * 11 models (x+1)*(x+1) = x**2 + (1^1)x + 1 =
 // x**2 + 0x + 1 = x**2 + 1 modeled by 101.  (Note that "+" adds
 // polynomial terms, but coefficients "add" with XOR.)
 // For the high-indexed elements, use HiToLo:
 //
 //	x.HiToLo().carrylessMultiplyWidenLo(y.HiToLo())
 //
 // Asm: VPMULL, CPU Feature: NEON
 func (x Uint64x2) carrylessMultiplyWidenLo(y Uint64x2) Uint64x2

 /* reduceMax */

 // reduceMax reduces x by taking the maximum of all elements. The maximum is placed
 // in element 0 of the result; other elements are zeroed.
 //
 // Asm: VFMAXV, CPU Feature: NEON
 func (x Float32x4) reduceMax() Float32x4

 // reduceMax reduces x by taking the maximum of all elements. The maximum is placed
 // in element 0 of the result; other elements are zeroed.
 //
 // Asm: VSMAXV, CPU Feature: NEON
 func (x Int8x16) reduceMax() Int8x16

 // reduceMax reduces x by taking the maximum of all elements. The maximum is placed
 // in element 0 of the result; other elements are zeroed.
 //
 // Asm: VSMAXV, CPU Feature: NEON
 func (x Int16x8) reduceMax() Int16x8

 // reduceMax reduces x by taking the maximum of all elements. The maximum is placed
 // in element 0 of the result; other elements are zeroed.
 //
 // Asm: VSMAXV, CPU Feature: NEON
 func (x Int32x4) reduceMax() Int32x4

 // reduceMax reduces x by taking the maximum of all elements. The maximum is placed
 // in element 0 of the result; other elements are zeroed.
 //
 // Asm: VUMAXV, CPU Feature: NEON
 func (x Uint8x16) reduceMax() Uint8x16

 // reduceMax reduces x by taking the maximum of all elements. The maximum is placed
 // in element 0 of the result; other elements are zeroed.
 //
 // Asm: VUMAXV, CPU Feature: NEON
 func (x Uint16x8) reduceMax() Uint16x8

 // reduceMax reduces x by taking the maximum of all elements. The maximum is placed
 // in element 0 of the result; other elements are zeroed.
 //
 // Asm: VUMAXV, CPU Feature: NEON
 func (x Uint32x4) reduceMax() Uint32x4

 /* reduceMin */

 // reduceMin reduces x by taking the minimum of all elements. The minimum is placed
 // in element 0 of the result; other elements are zeroed.
 //
 // Asm: VFMINV, CPU Feature: NEON
 func (x Float32x4) reduceMin() Float32x4

 // reduceMin reduces x by taking the minimum of all elements. The minimum is placed
 // in element 0 of the result; other elements are zeroed.
 //
 // Asm: VSMINV, CPU Feature: NEON
 func (x Int8x16) reduceMin() Int8x16

 // reduceMin reduces x by taking the minimum of all elements. The minimum is placed
 // in element 0 of the result; other elements are zeroed.
 //
 // Asm: VSMINV, CPU Feature: NEON
 func (x Int16x8) reduceMin() Int16x8

 // reduceMin reduces x by taking the minimum of all elements. The minimum is placed
 // in element 0 of the result; other elements are zeroed.
 //
 // Asm: VSMINV, CPU Feature: NEON
 func (x Int32x4) reduceMin() Int32x4

 // reduceMin reduces x by taking the minimum of all elements. The minimum is placed
 // in element 0 of the result; other elements are zeroed.
 //
 // Asm: VUMINV, CPU Feature: NEON
 func (x Uint8x16) reduceMin() Uint8x16

 // reduceMin reduces x by taking the minimum of all elements. The minimum is placed
 // in element 0 of the result; other elements are zeroed.
 //
 // Asm: VUMINV, CPU Feature: NEON
 func (x Uint16x8) reduceMin() Uint16x8

 // reduceMin reduces x by taking the minimum of all elements. The minimum is placed
 // in element 0 of the result; other elements are zeroed.
 //
 // Asm: VUMINV, CPU Feature: NEON
 func (x Uint32x4) reduceMin() Uint32x4

 /* reduceSum */

 // reduceSum reduces x by summing all elements. The sum is placed in element 0
 // of the result; other elements are zeroed.
 //
 // Asm: VADDV, CPU Feature: NEON
 func (x Int8x16) reduceSum() Int8x16

 // reduceSum reduces x by summing all elements. The sum is placed in element 0
 // of the result; other elements are zeroed.
 //
 // Asm: VADDV, CPU Feature: NEON
 func (x Int16x8) reduceSum() Int16x8

 // reduceSum reduces x by summing all elements. The sum is placed in element 0
 // of the result; other elements are zeroed.
 //
 // Asm: VADDV, CPU Feature: NEON
 func (x Int32x4) reduceSum() Int32x4

 // reduceSum reduces x by summing all elements. The sum is placed in element 0
 // of the result; other elements are zeroed.
 //
 // Asm: VADDV, CPU Feature: NEON
 func (x Uint8x16) reduceSum() Uint8x16

 // reduceSum reduces x by summing all elements. The sum is placed in element 0
 // of the result; other elements are zeroed.
 //
 // Asm: VADDV, CPU Feature: NEON
 func (x Uint16x8) reduceSum() Uint16x8

 // reduceSum reduces x by summing all elements. The sum is placed in element 0
 // of the result; other elements are zeroed.
 //
 // Asm: VADDV, CPU Feature: NEON
 func (x Uint32x4) reduceSum() Uint32x4
	// Code generated by 'simdgen -o godefs -goroot $GOROOT -arch arm64 -arm64Path $ARM64_ISA_PATH go_arm64.yaml types.yaml categories.yaml'; DO NOT EDIT.

	//go:build goexperiment.simd

	package archsimd

	/* bitSelect */

	// bitSelect selects bits from y where mask is 1, keeps bits from x where mask is 0.
	//
	// Asm: VBIT, CPU Feature: NEON
	func (x Int8x16) bitSelect(y Int8x16, mask Int8x16) Int8x16

	/* bitSelectNot */

	// bitSelectNot selects bits from y where mask is 0, keeps bits from x where mask is 1.
	//
	// Asm: VBIF, CPU Feature: NEON
	func (x Int8x16) bitSelectNot(y Int8x16, mask Int8x16) Int8x16

	/* broadcast1To2 */

	// broadcast1To2 copies the lowest element of its input to all 2 elements of
	// the output vector.
	//
	// Asm: VDUP, CPU Feature: NEON
	func (x Float64x2) broadcast1To2() Float64x2

	// broadcast1To2 copies the lowest element of its input to all 2 elements of
	// the output vector.
	//
	// Asm: VDUP, CPU Feature: NEON
	func (x Int64x2) broadcast1To2() Int64x2

	// broadcast1To2 copies the lowest element of its input to all 2 elements of
	// the output vector.
	//
	// Asm: VDUP, CPU Feature: NEON
	func (x Uint64x2) broadcast1To2() Uint64x2

	/* broadcast1To4 */

	// broadcast1To4 copies the lowest element of its input to all 4 elements of
	// the output vector.
	//
	// Asm: VDUP, CPU Feature: NEON
	func (x Float32x4) broadcast1To4() Float32x4

	// broadcast1To4 copies the lowest element of its input to all 4 elements of
	// the output vector.
	//
	// Asm: VDUP, CPU Feature: NEON
	func (x Int32x4) broadcast1To4() Int32x4

	// broadcast1To4 copies the lowest element of its input to all 4 elements of
	// the output vector.
	//
	// Asm: VDUP, CPU Feature: NEON
	func (x Uint32x4) broadcast1To4() Uint32x4

	/* broadcast1To8 */

	// broadcast1To8 copies the lowest element of its input to all 8 elements of
	// the output vector.
	//
	// Asm: VDUP, CPU Feature: NEON
	func (x Int16x8) broadcast1To8() Int16x8

	// broadcast1To8 copies the lowest element of its input to all 8 elements of
	// the output vector.
	//
	// Asm: VDUP, CPU Feature: NEON
	func (x Uint16x8) broadcast1To8() Uint16x8

	/* broadcast1To16 */

	// broadcast1To16 copies the lowest element of its input to all 16 elements of
	// the output vector.
	//
	// Asm: VDUP, CPU Feature: NEON
	func (x Int8x16) broadcast1To16() Int8x16

	// broadcast1To16 copies the lowest element of its input to all 16 elements of
	// the output vector.
	//
	// Asm: VDUP, CPU Feature: NEON
	func (x Uint8x16) broadcast1To16() Uint8x16

	/* carrylessMultiplyWidenLo */

	// carrylessMultiplyWidenLo returns the carryless (polynomial) product of the low halves
	// of x and y.
	//
	// A carryless multiplication uses bitwise XOR instead of
	// add-with-carry, for example (in base two):
	//
	// 11 * 11 = 11 * (10 ^ 1) = (11 * 10) ^ (11 * 1) = 110 ^ 11 = 101
	//
	// This also models multiplication of polynomials with coefficients
	// from GF(2) -- 11 * 11 models (x+1)(x+1) = x*2 + (1^1)x + 1 =
	// x2 + 0x + 1 = x2 + 1 modeled by 101. (Note that "+" adds
	// polynomial terms, but coefficients "add" with XOR.)
	// For the high-indexed elements, use HiToLo:
	//
	// x.HiToLo().carrylessMultiplyWidenLo(y.HiToLo())
	//
	// Asm: VPMULL, CPU Feature: NEON
	func (x Uint64x2) carrylessMultiplyWidenLo(y Uint64x2) Uint64x2

	/* reduceMax */

	// reduceMax reduces x by taking the maximum of all elements. The maximum is placed
	// in element 0 of the result; other elements are zeroed.
	//
	// Asm: VFMAXV, CPU Feature: NEON
	func (x Float32x4) reduceMax() Float32x4

	// reduceMax reduces x by taking the maximum of all elements. The maximum is placed
	// in element 0 of the result; other elements are zeroed.
	//
	// Asm: VSMAXV, CPU Feature: NEON
	func (x Int8x16) reduceMax() Int8x16

	// reduceMax reduces x by taking the maximum of all elements. The maximum is placed
	// in element 0 of the result; other elements are zeroed.
	//
	// Asm: VSMAXV, CPU Feature: NEON
	func (x Int16x8) reduceMax() Int16x8

	// reduceMax reduces x by taking the maximum of all elements. The maximum is placed
	// in element 0 of the result; other elements are zeroed.
	//
	// Asm: VSMAXV, CPU Feature: NEON
	func (x Int32x4) reduceMax() Int32x4

	// reduceMax reduces x by taking the maximum of all elements. The maximum is placed
	// in element 0 of the result; other elements are zeroed.
	//
	// Asm: VUMAXV, CPU Feature: NEON
	func (x Uint8x16) reduceMax() Uint8x16

	// reduceMax reduces x by taking the maximum of all elements. The maximum is placed
	// in element 0 of the result; other elements are zeroed.
	//
	// Asm: VUMAXV, CPU Feature: NEON
	func (x Uint16x8) reduceMax() Uint16x8

	// reduceMax reduces x by taking the maximum of all elements. The maximum is placed
	// in element 0 of the result; other elements are zeroed.
	//
	// Asm: VUMAXV, CPU Feature: NEON
	func (x Uint32x4) reduceMax() Uint32x4

	/* reduceMin */

	// reduceMin reduces x by taking the minimum of all elements. The minimum is placed
	// in element 0 of the result; other elements are zeroed.
	//
	// Asm: VFMINV, CPU Feature: NEON
	func (x Float32x4) reduceMin() Float32x4

	// reduceMin reduces x by taking the minimum of all elements. The minimum is placed
	// in element 0 of the result; other elements are zeroed.
	//
	// Asm: VSMINV, CPU Feature: NEON
	func (x Int8x16) reduceMin() Int8x16

	// reduceMin reduces x by taking the minimum of all elements. The minimum is placed
	// in element 0 of the result; other elements are zeroed.
	//
	// Asm: VSMINV, CPU Feature: NEON
	func (x Int16x8) reduceMin() Int16x8

	// reduceMin reduces x by taking the minimum of all elements. The minimum is placed
	// in element 0 of the result; other elements are zeroed.
	//
	// Asm: VSMINV, CPU Feature: NEON
	func (x Int32x4) reduceMin() Int32x4

	// reduceMin reduces x by taking the minimum of all elements. The minimum is placed
	// in element 0 of the result; other elements are zeroed.
	//
	// Asm: VUMINV, CPU Feature: NEON
	func (x Uint8x16) reduceMin() Uint8x16

	// reduceMin reduces x by taking the minimum of all elements. The minimum is placed
	// in element 0 of the result; other elements are zeroed.
	//
	// Asm: VUMINV, CPU Feature: NEON
	func (x Uint16x8) reduceMin() Uint16x8

	// reduceMin reduces x by taking the minimum of all elements. The minimum is placed
	// in element 0 of the result; other elements are zeroed.
	//
	// Asm: VUMINV, CPU Feature: NEON
	func (x Uint32x4) reduceMin() Uint32x4

	/* reduceSum */

	// reduceSum reduces x by summing all elements. The sum is placed in element 0
	// of the result; other elements are zeroed.
	//
	// Asm: VADDV, CPU Feature: NEON
	func (x Int8x16) reduceSum() Int8x16

	// reduceSum reduces x by summing all elements. The sum is placed in element 0
	// of the result; other elements are zeroed.
	//
	// Asm: VADDV, CPU Feature: NEON
	func (x Int16x8) reduceSum() Int16x8

	// reduceSum reduces x by summing all elements. The sum is placed in element 0
	// of the result; other elements are zeroed.
	//
	// Asm: VADDV, CPU Feature: NEON
	func (x Int32x4) reduceSum() Int32x4

	// reduceSum reduces x by summing all elements. The sum is placed in element 0
	// of the result; other elements are zeroed.
	//
	// Asm: VADDV, CPU Feature: NEON
	func (x Uint8x16) reduceSum() Uint8x16

	// reduceSum reduces x by summing all elements. The sum is placed in element 0
	// of the result; other elements are zeroed.
	//
	// Asm: VADDV, CPU Feature: NEON
	func (x Uint16x8) reduceSum() Uint16x8

	// reduceSum reduces x by summing all elements. The sum is placed in element 0
	// of the result; other elements are zeroed.
	//
	// Asm: VADDV, CPU Feature: NEON
	func (x Uint32x4) reduceSum() Uint32x4