internal/simdgen: use "NAME" for method names in doc strings
This will let us merge operations that differ only in their method
names. In particular, this will give us a robust way to insert
computed method names.
The YAML changes were done using the following Gemini CLI prompt:
In all of the files named "categories.yaml", for each operation in
the YAML list, find the Go method name from the "go" field, and
replace anywhere that operation name appears as a word in the
"documentation" field with the literal string "NAME". Please do this
using Go with the yaml.v3 library. The yaml.v3 library is already
imported in this module.
No effect on generated godefs.
Change-Id: Ifdac95c5d62475937fc33a8013d0b0c5c5dca312
Reviewed-on: https://go-review.googlesource.com/c/arch/+/691340
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Auto-Submit: Austin Clements <austin@google.com>
Reviewed-by: Junyang Shao <shaojunyang@google.com>
diff --git a/internal/simdgen/categories.yaml b/internal/simdgen/categories.yaml
index f839f69..5a7e711 100644
--- a/internal/simdgen/categories.yaml
+++ b/internal/simdgen/categories.yaml
@@ -3,114 +3,115 @@
commutative: true
extension: "AVX.*"
documentation: !string |-
- // Add adds corresponding elements of two vectors.
+ // NAME adds corresponding elements of two vectors.
- go: SaturatedAdd
commutative: true
extension: "AVX.*"
documentation: !string |-
- // SaturatedAdd adds corresponding elements of two vectors with saturation.
+ // NAME adds corresponding elements of two vectors with saturation.
- go: AddMasked
masked: true
commutative: true
extension: "AVX.*"
documentation: !string |-
- // AddMasked adds corresponding elements of two vectors.
+ // NAME adds corresponding elements of two vectors.
- go: SaturatedAddMasked
masked: true
commutative: true
extension: "AVX.*"
documentation: !string |-
- // SaturatedAddMasked adds corresponding elements of two vectors with saturation.
+ // NAME adds corresponding elements of two vectors with saturation.
- go: Sub
commutative: false
extension: "AVX.*"
documentation: !string |-
- // Sub subtracts corresponding elements of two vectors.
+ // NAME subtracts corresponding elements of two vectors.
- go: SaturatedSub
commutative: false
extension: "AVX.*"
documentation: !string |-
- // SaturatedSub subtracts corresponding elements of two vectors with saturation.
+ // NAME subtracts corresponding elements of two vectors with saturation.
- go: SubMasked
masked: true
commutative: false
extension: "AVX.*"
documentation: !string |-
- // SubMasked subtracts corresponding elements of two vectors.
+ // NAME subtracts corresponding elements of two vectors.
- go: SaturatedSubMasked
masked: true
commutative: false
extension: "AVX.*"
documentation: !string |-
- // SaturatedSubMasked subtracts corresponding elements of two vectors with saturation.
+ // NAME subtracts corresponding elements of two vectors with saturation.
- go: PairwiseAdd
commutative: false
extension: "AVX.*"
documentation: !string |-
- // PairwiseAdd horizontally adds adjacent pairs of elements.
+ // NAME horizontally adds adjacent pairs of elements.
// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...].
- go: PairwiseSub
commutative: false
extension: "AVX.*"
documentation: !string |-
- // PairwiseSub horizontally subtracts adjacent pairs of elements.
+ // NAME horizontally subtracts adjacent pairs of elements.
// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0-y1, y2-y3, ..., x0-x1, x2-x3, ...].
- go: SaturatedPairwiseAdd
commutative: false
extension: "AVX.*"
documentation: !string |-
- // SaturatedPairwiseAdd horizontally adds adjacent pairs of elements with saturation.
+ // NAME horizontally adds adjacent pairs of elements with saturation.
// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...].
- go: SaturatedPairwiseSub
commutative: false
extension: "AVX.*"
documentation: !string |-
- // SaturatedPairwiseSub horizontally subtracts adjacent pairs of elements with saturation.
+ // NAME horizontally subtracts adjacent pairs of elements with saturation.
// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0-y1, y2-y3, ..., x0-x1, x2-x3, ...].
- go: And
commutative: true
extension: "AVX.*"
documentation: !string |-
- // And performs a bitwise AND operation between two vectors.
+ // NAME performs a bitwise AND operation between two vectors.
- go: AndMasked
masked: true
commutative: true
extension: "AVX.*"
documentation: !string |-
- // AndMasked performs a bitwise AND operation between two vectors.
+ // NAME performs a bitwise AND operation between two vectors.
- go: Or
commutative: true
extension: "AVX.*"
documentation: !string |-
- // Or performs a bitwise OR operation between two vectors.
+ // NAME performs a bitwise OR operation between two vectors.
- go: OrMasked
masked: true
commutative: true
extension: "AVX.*"
documentation: !string |-
- // OrMasked performs a bitwise OR operation between two vectors.
+ // NAME performs a bitwise OR operation between two vectors.
- go: AndNot
commutative: false
extension: "AVX.*"
documentation: !string |-
- // AndNot performs a bitwise x &^ y.
+ // NAME performs a bitwise x &^ y.
- go: AndNotMasked
masked: true
commutative: false
extension: "AVX.*"
documentation: !string |-
- // AndNotMasked performs a bitwise x &^ y.
+ // NAME performs a bitwise x &^ y.
- go: Xor
commutative: true
extension: "AVX.*"
documentation: !string |-
- // Xor performs a bitwise XOR operation between two vectors.
+ // NAME performs a bitwise XOR operation between two vectors.
- go: XorMasked
masked: true
commutative: true
extension: "AVX.*"
documentation: !string |-
- // XorMasked performs a bitwise XOR operation between two vectors.
+ // NAME performs a bitwise XOR operation between two vectors.
+
# We also have PTEST and VPTERNLOG, those should be hidden from the users
# and only appear in rewrite rules.
# const imm predicate(holds for both float and int|uint):
@@ -125,239 +126,233 @@
commutative: true
extension: "AVX.*"
documentation: !string |-
- // Equal compares for equality.
+ // NAME compares for equality.
- go: Less
constImm: 1
commutative: false
extension: "AVX.*"
documentation: !string |-
- // Less compares for less than.
+ // NAME compares for less than.
- go: LessEqual
constImm: 2
commutative: false
extension: "AVX.*"
documentation: !string |-
- // LessEqual compares for less than or equal.
+ // NAME compares for less than or equal.
- go: IsNan # For float only.
constImm: 3
commutative: true
extension: "AVX.*"
documentation: !string |-
- // IsNan checks if elements are NaN. Use as x.IsNan(x).
+ // NAME checks if elements are NaN. Use as x.IsNan(x).
- go: NotEqual
constImm: 4
commutative: true
extension: "AVX.*"
documentation: !string |-
- // NotEqual compares for inequality.
+ // NAME compares for inequality.
- go: GreaterEqual
constImm: 13
commutative: false
extension: "AVX.*"
documentation: !string |-
- // GreaterEqual compares for greater than or equal.
+ // NAME compares for greater than or equal.
- go: Greater
constImm: 14
commutative: false
extension: "AVX.*"
documentation: !string |-
- // Greater compares for greater than.
-
+ // NAME compares for greater than.
- go: EqualMasked
constImm: 0
masked: true
commutative: true
extension: "AVX.*"
documentation: !string |-
- // EqualMasked compares for equality.
+ // NAME compares for equality.
- go: LessMasked
constImm: 1
masked: true
commutative: false
extension: "AVX.*"
documentation: !string |-
- // LessMasked compares for less than.
+ // NAME compares for less than.
- go: LessEqualMasked
constImm: 2
masked: true
commutative: false
extension: "AVX.*"
documentation: !string |-
- // LessEqualMasked compares for less than or equal.
+ // NAME compares for less than or equal.
- go: IsNanMasked # For float only.
constImm: 3
masked: true
commutative: true
extension: "AVX.*"
documentation: !string |-
- // IsNanMasked checks if elements are NaN. Use as x.IsNan(x).
+ // NAME checks if elements are NaN. Use as x.IsNan(x).
- go: NotEqualMasked
constImm: 4
masked: true
commutative: true
extension: "AVX.*"
documentation: !string |-
- // NotEqualMasked compares for inequality.
+ // NAME compares for inequality.
- go: GreaterEqualMasked
constImm: 13
masked: true
commutative: false
extension: "AVX.*"
documentation: !string |-
- // GreaterEqualMasked compares for greater than or equal.
+ // NAME compares for greater than or equal.
- go: GreaterMasked
constImm: 14
masked: true
commutative: false
extension: "AVX.*"
documentation: !string |-
- // GreaterMasked compares for greater than.
+ // NAME compares for greater than.
- go: Div
commutative: false
extension: "AVX.*"
documentation: !string |-
- // Div divides elements of two vectors.
+ // NAME divides elements of two vectors.
- go: DivMasked
commutative: false
masked: true
extension: "AVX.*"
documentation: !string |-
- // DivMasked divides elements of two vectors.
+ // NAME divides elements of two vectors.
- go: Sqrt
commutative: false
extension: "AVX.*"
documentation: !string |-
- // Sqrt computes the square root of each element.
+ // NAME computes the square root of each element.
- go: SqrtMasked
commutative: false
masked: true
extension: "AVX.*"
documentation: !string |-
- // SqrtMasked computes the square root of each element.
+ // NAME computes the square root of each element.
- go: ApproximateReciprocal
commutative: false
extension: "AVX.*"
documentation: !string |-
- // ApproximateReciprocal computes an approximate reciprocal of each element.
+ // NAME computes an approximate reciprocal of each element.
- go: ApproximateReciprocalMasked
commutative: false
masked: true
extension: "AVX.*"
documentation: !string |-
- // ApproximateReciprocalMasked computes an approximate reciprocal of each element.
+ // NAME computes an approximate reciprocal of each element.
- go: ApproximateReciprocalOfSqrt
commutative: false
extension: "AVX.*"
documentation: !string |-
- // ApproximateReciprocalOfSqrt computes an approximate reciprocal of the square root of each element.
+ // NAME computes an approximate reciprocal of the square root of each element.
- go: ApproximateReciprocalOfSqrtMasked
commutative: false
masked: true
extension: "AVX.*"
documentation: !string |-
- // ApproximateReciprocalOfSqrtMasked computes an approximate reciprocal of the square root of each element.
+ // NAME computes an approximate reciprocal of the square root of each element.
- go: MulByPowOf2Masked # This operation is all after AVX512, the unmasked version will be generated.
commutative: false
masked: true
extension: "AVX.*"
documentation: !string |-
- // MulByPowOf2Masked multiplies elements by a power of 2.
-
+ // NAME multiplies elements by a power of 2.
- go: Round
commutative: false
extension: "AVX.*"
constImm: 0
documentation: !string |-
- // Round rounds elements to the nearest integer.
+ // NAME rounds elements to the nearest integer.
- go: RoundWithPrecisionMasked
commutative: false
extension: "AVX.*"
constImm: 0
masked: true
documentation: !string |-
- // RoundWithPrecisionMasked rounds elements with specified precision.
+ // NAME rounds elements with specified precision.
- go: DiffWithRoundWithPrecisionMasked
commutative: false
extension: "AVX.*"
constImm: 0
masked: true
documentation: !string |-
- // DiffWithRoundWithPrecisionMasked computes the difference after rounding with specified precision.
-
+ // NAME computes the difference after rounding with specified precision.
- go: Floor
commutative: false
extension: "AVX.*"
constImm: 1
documentation: !string |-
- // Floor rounds elements down to the nearest integer.
+ // NAME rounds elements down to the nearest integer.
- go: FloorWithPrecisionMasked
commutative: false
extension: "AVX.*"
constImm: 1
masked: true
documentation: !string |-
- // FloorWithPrecisionMasked rounds elements down with specified precision.
+ // NAME rounds elements down with specified precision.
- go: DiffWithFloorWithPrecisionMasked
commutative: false
extension: "AVX.*"
constImm: 1
masked: true
documentation: !string |-
- // DiffWithFloorWithPrecisionMasked computes the difference after flooring with specified precision.
-
+ // NAME computes the difference after flooring with specified precision.
- go: Ceil
commutative: false
extension: "AVX.*"
constImm: 2
documentation: !string |-
- // Ceil rounds elements up to the nearest integer.
+ // NAME rounds elements up to the nearest integer.
- go: CeilWithPrecisionMasked
commutative: false
extension: "AVX.*"
constImm: 2
masked: true
documentation: !string |-
- // CeilWithPrecisionMasked rounds elements up with specified precision.
+ // NAME rounds elements up with specified precision.
- go: DiffWithCeilWithPrecisionMasked
commutative: false
extension: "AVX.*"
constImm: 2
masked: true
documentation: !string |-
- // DiffWithCeilWithPrecisionMasked computes the difference after ceiling with specified precision.
-
+ // NAME computes the difference after ceiling with specified precision.
- go: Trunc
commutative: false
extension: "AVX.*"
constImm: 3
documentation: !string |-
- // Trunc truncates elements towards zero.
+ // NAME truncates elements towards zero.
- go: TruncWithPrecisionMasked
commutative: false
extension: "AVX.*"
constImm: 3
masked: true
documentation: !string |-
- // TruncWithPrecisionMasked truncates elements with specified precision.
+ // NAME truncates elements with specified precision.
- go: DiffWithTruncWithPrecisionMasked
commutative: false
extension: "AVX.*"
constImm: 3
masked: true
documentation: !string |-
- // DiffWithTruncWithPrecisionMasked computes the difference after truncating with specified precision.
-
+ // NAME computes the difference after truncating with specified precision.
- go: AddSub
commutative: false
extension: "AVX.*"
documentation: !string |-
- // AddSub subtracts even elements and adds odd elements of two vectors.
+ // NAME subtracts even elements and adds odd elements of two vectors.
- go: GaloisFieldAffineTransformMasked
masked: true
commutative: false
extension: "AVX.*"
documentation: !string |-
- // GaloisFieldAffineTransformMasked computes an affine transformation in GF(2^8):
+ // NAME computes an affine transformation in GF(2^8):
// x is a vector of 8-bit vectors, with each adjacent 8 as a group; y is a vector of 8x8 1-bit matrixes;
// b is an 8-bit vector. The affine transformation is y * x + b, with each element of y
// corresponding to a group of 8 elements in x.
@@ -366,7 +361,7 @@
commutative: false
extension: "AVX.*"
documentation: !string |-
- // GaloisFieldAffineTransformInverseMasked computes an affine transformation in GF(2^8),
+ // NAME computes an affine transformation in GF(2^8),
// with x inverted with respect to reduction polynomial x^8 + x^4 + x^3 + x + 1:
// x is a vector of 8-bit vectors, with each adjacent 8 as a group; y is a vector of 8x8 1-bit matrixes;
// b is an 8-bit vector. The affine transformation is y * x + b, with each element of y
@@ -376,288 +371,279 @@
commutative: false
extension: "AVX.*"
documentation: !string |-
- // GaloisFieldMulMasked computes element-wise GF(2^8) multiplication with
+ // NAME computes element-wise GF(2^8) multiplication with
// reduction polynomial x^8 + x^4 + x^3 + x + 1.
- go: Average
commutative: true
extension: "AVX.*" # VPAVGB/W are available across various AVX versions
documentation: !string |-
- // Average computes the rounded average of corresponding elements.
+ // NAME computes the rounded average of corresponding elements.
- go: AverageMasked
commutative: true
masked: true
extension: "AVX512.*" # Masked operations are typically AVX512
documentation: !string |-
- // AverageMasked computes the rounded average of corresponding elements.
-
+ // NAME computes the rounded average of corresponding elements.
- go: Absolute
commutative: false
# Unary operation, not commutative
extension: "AVX.*" # VPABSB/W/D are AVX, VPABSQ is AVX512
documentation: !string |-
- // Absolute computes the absolute value of each element.
+ // NAME computes the absolute value of each element.
- go: AbsoluteMasked
commutative: false
masked: true
extension: "AVX512.*"
documentation: !string |-
- // AbsoluteMasked computes the absolute value of each element.
-
+ // NAME computes the absolute value of each element.
- go: Sign
# Applies sign of second operand to first: sign(val, sign_src)
commutative: false
extension: "AVX.*"
documentation: !string |-
- // Sign returns the product of the first operand with -1, 0, or 1,
+ // NAME returns the product of the first operand with -1, 0, or 1,
// whichever constant is nearest to the value of the second operand.
# Sign does not have masked version
-
- go: PopCountMasked
commutative: false
masked: true
extension: "AVX512.*" # VPOPCNT instructions are AVX512 (BITALG or VPOPCNTDQ)
documentation: !string |-
- // PopCountMasked counts the number of set bits in each element.
+ // NAME counts the number of set bits in each element.
- go: PairDotProd
commutative: false
extension: "AVX.*"
documentation: !string |-
- // PairDotProd multiplies the elements and add the pairs together,
+ // NAME multiplies the elements and add the pairs together,
// yielding a vector of half as many elements with twice the input element size.
- go: PairDotProdMasked
masked: true
commutative: false
extension: "AVX.*"
documentation: !string |-
- // PairDotProdMasked multiplies the elements and add the pairs together,
+ // NAME multiplies the elements and add the pairs together,
// yielding a vector of half as many elements with twice the input element size.
# TODO: maybe simplify this name within the receiver-type + method-naming scheme we use.
- go: SaturatedUnsignedSignedPairDotProd
commutative: false
extension: "AVX.*"
documentation: !string |-
- // SaturatedUnsignedSignedPairDotProd multiplies the elements and add the pairs together with saturation,
+ // NAME multiplies the elements and add the pairs together with saturation,
// yielding a vector of half as many elements with twice the input element size.
- go: SaturatedUnsignedSignedPairDotProdMasked
masked: true
commutative: false
extension: "AVX.*"
documentation: !string |-
- // SaturatedUnsignedSignedPairDotProdMasked multiplies the elements and add the pairs together with saturation,
+ // NAME multiplies the elements and add the pairs together with saturation,
// yielding a vector of half as many elements with twice the input element size.
-
# QuadDotProd, i.e. VPDPBUSD(S) are operations with src/dst on the same register, we are not supporting this as of now.
- go: DotProdBroadcast
commutative: true
extension: "AVX.*"
documentation: !string |-
- // DotProdBroadcast multiplies all elements and broadcasts the sum.
+ // NAME multiplies all elements and broadcasts the sum.
- go: UnsignedSignedQuadDotProdAccumulate
commutative: false
extension: "AVX.*"
documentation: !string |-
- // UnsignedSignedQuadDotProdAccumulate performs dot products on groups of 4 elements of x and y and then adds z.
+ // NAME performs dot products on groups of 4 elements of x and y and then adds z.
- go: UnsignedSignedQuadDotProdAccumulateMasked
masked: true
commutative: false
extension: "AVX.*"
documentation: !string |-
- // UnsignedSignedQuadDotProdAccumulateMasked performs dot products on groups of 4 elements of x and y and then adds z.
+ // NAME performs dot products on groups of 4 elements of x and y and then adds z.
- go: SaturatedUnsignedSignedQuadDotProdAccumulate
commutative: false
extension: "AVX.*"
documentation: !string |-
- // SaturatedUnsignedSignedQuadDotProdAccumulate multiplies performs dot products on groups of 4 elements of x and y and then adds z.
+ // NAME multiplies performs dot products on groups of 4 elements of x and y and then adds z.
- go: SaturatedUnsignedSignedQuadDotProdAccumulateMasked
masked: true
commutative: false
extension: "AVX.*"
documentation: !string |-
- // SaturatedUnsignedSignedQuadDotProdAccumulateMasked multiplies performs dot products on groups of 4 elements of x and y and then adds z.
+ // NAME multiplies performs dot products on groups of 4 elements of x and y and then adds z.
- go: PairDotProdAccumulate
commutative: false
extension: "AVX.*"
documentation: !string |-
- // PairDotProdAccumulate performs dot products on pairs of elements of x and y and then adds z.
+ // NAME performs dot products on pairs of elements of x and y and then adds z.
- go: PairDotProdAccumulateMasked
masked: true
commutative: false
extension: "AVX.*"
documentation: !string |-
- // PairDotProdAccumulateMasked performs dot products on pairs of elements of x and y and then adds z.
+ // NAME performs dot products on pairs of elements of x and y and then adds z.
- go: SaturatedPairDotProdAccumulate
commutative: false
extension: "AVX.*"
documentation: !string |-
- // SaturatedPairDotProdAccumulate performs dot products on pairs of elements of x and y and then adds z.
+ // NAME performs dot products on pairs of elements of x and y and then adds z.
- go: SaturatedPairDotProdAccumulateMasked
masked: true
commutative: false
extension: "AVX.*"
documentation: !string |-
- // SaturatedPairDotProdAccumulateMasked performs dot products on pairs of elements of x and y and then adds z.
+ // NAME performs dot products on pairs of elements of x and y and then adds z.
- go: FusedMultiplyAddMasked
masked: true
commutative: false
extension: "AVX.*"
documentation: !string |-
- // FusedMultiplyAddMasked performs (x * y) + z.
+ // NAME performs (x * y) + z.
- go: FusedMultiplyAddSubMasked
masked: true
commutative: false
extension: "AVX.*"
documentation: !string |-
- // FusedMultiplyAddSubMasked performs (x * y) - z for odd-indexed elements, and (x * y) + z for even-indexed elements.
+ // NAME performs (x * y) - z for odd-indexed elements, and (x * y) + z for even-indexed elements.
- go: FusedMultiplySubAddMasked
masked: true
commutative: false
extension: "AVX.*"
documentation: !string |-
- // FusedMultiplySubAddMasked performs (x * y) + z for odd-indexed elements, and (x * y) - z for even-indexed elements.
+ // NAME performs (x * y) + z for odd-indexed elements, and (x * y) - z for even-indexed elements.
- go: Max
commutative: true
extension: "AVX.*"
documentation: !string |-
- // Max computes the maximum of corresponding elements.
+ // NAME computes the maximum of corresponding elements.
- go: MaxMasked
commutative: true
masked: true
extension: "AVX.*"
documentation: !string |-
- // MaxMasked computes the maximum of corresponding elements.
+ // NAME computes the maximum of corresponding elements.
- go: Min
commutative: true
extension: "AVX.*"
documentation: !string |-
- // Min computes the minimum of corresponding elements.
+ // NAME computes the minimum of corresponding elements.
- go: MinMasked
commutative: true
masked: true
extension: "AVX.*"
documentation: !string |-
- // MinMasked computes the minimum of corresponding elements.
+ // NAME computes the minimum of corresponding elements.
- go: SetElem
commutative: false
extension: "AVX.*"
documentation: !string |-
- // SetElem sets a single constant-indexed element's value.
+ // NAME sets a single constant-indexed element's value.
- go: GetElem
commutative: false
extension: "AVX.*"
documentation: !string |-
- // GetElem retrieves a single constant-indexed element's value.
+ // NAME retrieves a single constant-indexed element's value.
- go: Set128
commutative: false
extension: "AVX.*"
documentation: !string |-
- // Set128 combines a 128-bit vector with a 256-bit vector, where the constant operand specifies whether the low (0) or high (1) half is receives the smaller vector.
+ // NAME combines a 128-bit vector with a 256-bit vector, where the constant operand specifies whether the low (0) or high (1) half is receives the smaller vector.
- go: Get128
commutative: false
extension: "AVX.*"
documentation: !string |-
- // Get128 retrieves the upper (1) or lower (0) half of a 256-bit vector, depending on the constant operand.
-
-
+ // NAME retrieves the upper (1) or lower (0) half of a 256-bit vector, depending on the constant operand.
- go: Permute
commutative: false
extension: "AVX.*"
documentation: !string |-
- // Permute performs a full permutation of vector x using indices:
+ // NAME performs a full permutation of vector x using indices:
// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
// Only the needed bits to represent x's index are used in indices' elements.
-
- go: PermuteMasked
commutative: false
masked: true
extension: "AVX.*"
documentation: !string |-
- // PermuteMasked performs a full permutation of vector y using indices:
+ // NAME performs a full permutation of vector y using indices:
// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
// Only the needed bits to represent x's index are used in indices' elements.
-
- go: Permute2Masked # Permute2Masked is only available on or after AVX512
commutative: false
masked: true
extension: "AVX.*"
documentation: !string |-
- // Permute2Masked performs a full permutation of vector x, y using indices:
+ // NAME performs a full permutation of vector x, y using indices:
// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
// where xy is x appending y.
// Only the needed bits to represent xy's index are used in indices' elements.
-
- go: Compress
commutative: false
# The mask in Compress is a control mask rather than a write mask, so we don't mark it "masked"
extension: "AVX.*"
documentation: !string |-
- // Compress performs a compression on vector x using mask by
+ // NAME performs a compression on vector x using mask by
// selecting elements as indicated by mask, and pack them to lower indexed elements.
- go: Mul
commutative: true
extension: "AVX.*"
documentation: !string |-
- // Mul multiplies corresponding elements of two vectors.
+ // NAME multiplies corresponding elements of two vectors.
- go: MulEvenWiden
commutative: true
extension: "AVX.*"
documentation: !string |-
- // MulEvenWiden multiplies even-indexed elements, widening the result.
+ // NAME multiplies even-indexed elements, widening the result.
// Result[i] = v1.Even[i] * v2.Even[i].
- go: MulHigh
commutative: true
extension: "AVX.*"
documentation: !string |-
- // MulHigh multiplies elements and stores the high part of the result.
+ // NAME multiplies elements and stores the high part of the result.
- go: MulLow
commutative: true
extension: "AVX.*"
documentation: !string |-
- // MulLow multiplies elements and stores the low part of the result.
+ // NAME multiplies elements and stores the low part of the result.
- go: MulMasked
masked: true
commutative: true
extension: "AVX.*"
documentation: !string |-
- // MulMasked multiplies corresponding elements of two vectors.
+ // NAME multiplies corresponding elements of two vectors.
- go: MulEvenWidenMasked
masked: true
commutative: true
extension: "AVX.*"
documentation: !string |-
- // MulEvenWidenMasked multiplies even-indexed elements, widening the result.
+ // NAME multiplies even-indexed elements, widening the result.
// Result[i] = v1.Even[i] * v2.Even[i].
- go: MulHighMasked
masked: true
commutative: true
extension: "AVX.*"
documentation: !string |-
- // MulHighMasked multiplies elements and stores the high part of the result.
+ // NAME multiplies elements and stores the high part of the result.
- go: MulLowMasked
masked: true
commutative: true
extension: "AVX.*"
documentation: !string |-
- // MulLowMasked multiplies elements and stores the low part of the result.
+ // NAME multiplies elements and stores the low part of the result.
- go: ShiftAllLeft
nameAndSizeCheck: true
commutative: false
extension: "AVX.*"
documentation: !string |-
- // ShiftAllLeft shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
+ // NAME shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
- go: ShiftAllLeftMasked
nameAndSizeCheck: true
masked: true
commutative: false
extension: "AVX.*"
documentation: !string |-
- // ShiftAllLeftMasked shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
+ // NAME shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
- go: ShiftAllRight
signed: false
nameAndSizeCheck: true
commutative: false
extension: "AVX.*"
documentation: !string |-
- // ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed.
+ // NAME shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed.
- go: ShiftAllRightMasked
signed: false
nameAndSizeCheck: true
@@ -665,14 +651,14 @@
commutative: false
extension: "AVX.*"
documentation: !string |-
- // ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed.
+ // NAME shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed.
- go: ShiftAllRight
signed: true
nameAndSizeCheck: true
commutative: false
extension: "AVX.*"
documentation: !string |-
- // ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit.
+ // NAME shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit.
- go: ShiftAllRightMasked
signed: true
nameAndSizeCheck: true
@@ -680,28 +666,27 @@
commutative: false
extension: "AVX.*"
documentation: !string |-
- // ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit.
-
+ // NAME shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit.
- go: ShiftLeft
nameAndSizeCheck: true
commutative: false
extension: "AVX.*"
documentation: !string |-
- // ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
+ // NAME shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
- go: ShiftLeftMasked
nameAndSizeCheck: true
masked: true
commutative: false
extension: "AVX.*"
documentation: !string |-
- // ShiftLeftMasked shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
+ // NAME shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
- go: ShiftRight
signed: false
nameAndSizeCheck: true
commutative: false
extension: "AVX.*"
documentation: !string |-
- // ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed.
+ // NAME shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed.
- go: ShiftRightMasked
signed: false
nameAndSizeCheck: true
@@ -709,14 +694,14 @@
commutative: false
extension: "AVX.*"
documentation: !string |-
- // ShiftRightMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed.
+ // NAME shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed.
- go: ShiftRight
signed: true
nameAndSizeCheck: true
commutative: false
extension: "AVX.*"
documentation: !string |-
- // ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
+ // NAME shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
- go: ShiftRightMasked
signed: true
nameAndSizeCheck: true
@@ -724,44 +709,42 @@
commutative: false
extension: "AVX.*"
documentation: !string |-
- // ShiftRightMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
-
+ // NAME shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
- go: RotateAllLeftMasked
nameAndSizeCheck: true
masked: true
commutative: false
extension: "AVX.*"
documentation: !string |-
- // RotateAllLeftMasked rotates each element to the left by the number of bits specified by the immediate.
+ // NAME rotates each element to the left by the number of bits specified by the immediate.
- go: RotateLeftMasked
nameAndSizeCheck: true
masked: true
commutative: false
extension: "AVX.*"
documentation: !string |-
- // RotateLeftMasked rotates each element in x to the left by the number of bits specified by y's corresponding elements.
+ // NAME rotates each element in x to the left by the number of bits specified by y's corresponding elements.
- go: RotateAllRightMasked
nameAndSizeCheck: true
masked: true
commutative: false
extension: "AVX.*"
documentation: !string |-
- // RotateAllRightMasked rotates each element to the right by the number of bits specified by the immediate.
+ // NAME rotates each element to the right by the number of bits specified by the immediate.
- go: RotateRightMasked
nameAndSizeCheck: true
masked: true
commutative: false
extension: "AVX.*"
documentation: !string |-
- // RotateRightMasked rotates each element in x to the right by the number of bits specified by y's corresponding elements.
-
+ // NAME rotates each element in x to the right by the number of bits specified by y's corresponding elements.
- go: ShiftAllLeftAndFillUpperFromMasked
nameAndSizeCheck: true
masked: true
commutative: false
extension: "AVX.*"
documentation: !string |-
- // ShiftAllLeftAndFillUpperFromMasked shifts each element of x to the left by the number of bits specified by the
+ // NAME shifts each element of x to the left by the number of bits specified by the
// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
- go: ShiftAllRightAndFillUpperFromMasked
nameAndSizeCheck: true
@@ -769,7 +752,7 @@
commutative: false
extension: "AVX.*"
documentation: !string |-
- // ShiftAllRightAndFillUpperFromMasked shifts each element of x to the right by the number of bits specified by the
+ // NAME shifts each element of x to the right by the number of bits specified by the
// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
- go: ShiftLeftAndFillUpperFromMasked
nameAndSizeCheck: true
@@ -777,7 +760,7 @@
commutative: false
extension: "AVX.*"
documentation: !string |-
- // ShiftLeftAndFillUpperFromMasked shifts each element of x to the left by the number of bits specified by the
+ // NAME shifts each element of x to the left by the number of bits specified by the
// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
- go: ShiftRightAndFillUpperFromMasked
nameAndSizeCheck: true
@@ -785,5 +768,5 @@
commutative: false
extension: "AVX.*"
documentation: !string |-
- // ShiftRightAndFillUpperFromMasked shifts each element of x to the right by the number of bits specified by the
+ // NAME shifts each element of x to the right by the number of bits specified by the
// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
diff --git a/internal/simdgen/gen_utility.go b/internal/simdgen/gen_utility.go
index ea4d56a..be0a945 100644
--- a/internal/simdgen/gen_utility.go
+++ b/internal/simdgen/gen_utility.go
@@ -569,8 +569,8 @@
}
maskedOpName := op2.Go
op2.Go = strings.TrimSuffix(op2.Go, "Masked")
- op2Doc := strings.ReplaceAll(*op2.Documentation, maskedOpName, op2.Go)
- op2.Documentation = &op2Doc
+ op2Doc := strings.ReplaceAll(op2.Documentation, maskedOpName, op2.Go)
+ op2.Documentation = op2Doc
op2.Masked = nil // It's no longer masked.
splited = append(splited, op2)
} else {
@@ -583,9 +583,7 @@
func insertMaskDescToDoc(ops []Operation) {
for i, _ := range ops {
if ops[i].Masked != nil && *ops[i].Masked {
- if ops[i].Documentation != nil {
- *ops[i].Documentation += "\n//\n// This operation is applied selectively under a write mask."
- }
+ ops[i].Documentation += "\n//\n// This operation is applied selectively under a write mask."
}
}
}
diff --git a/internal/simdgen/godefs.go b/internal/simdgen/godefs.go
index 32467de..b45c249 100644
--- a/internal/simdgen/godefs.go
+++ b/internal/simdgen/godefs.go
@@ -7,6 +7,7 @@
import (
"fmt"
"log"
+ "regexp"
"slices"
"strconv"
"strings"
@@ -16,6 +17,16 @@
type Operation struct {
rawOperation
+
+ // Documentation is the doc string for this API.
+ //
+ // It is computed from the raw documentation:
+ //
+ // - "NAME" is replaced by the Go method name.
+ //
+ // - For masked operation, the method name is updated and a sentence about
+ // masking is added.
+ Documentation string
}
// rawOperation is the unifier representation of an [Operation]. It is
@@ -49,6 +60,15 @@
if err := v.Decode(&o.rawOperation); err != nil {
return err
}
+
+ // Compute doc string.
+ if o.rawOperation.Documentation != nil {
+ o.Documentation = *o.rawOperation.Documentation
+ } else {
+ o.Documentation = "// UNDOCUMENTED"
+ }
+ o.Documentation = regexp.MustCompile(`\bNAME\b`).ReplaceAllString(o.Documentation, o.Go)
+
return nil
}
diff --git a/internal/simdgen/ops/AddSub/categories.yaml b/internal/simdgen/ops/AddSub/categories.yaml
index 9bae42e..667508b 100644
--- a/internal/simdgen/ops/AddSub/categories.yaml
+++ b/internal/simdgen/ops/AddSub/categories.yaml
@@ -3,67 +3,67 @@
commutative: true
extension: "AVX.*"
documentation: !string |-
- // Add adds corresponding elements of two vectors.
+ // NAME adds corresponding elements of two vectors.
- go: SaturatedAdd
commutative: true
extension: "AVX.*"
documentation: !string |-
- // SaturatedAdd adds corresponding elements of two vectors with saturation.
+ // NAME adds corresponding elements of two vectors with saturation.
- go: AddMasked
masked: true
commutative: true
extension: "AVX.*"
documentation: !string |-
- // AddMasked adds corresponding elements of two vectors.
+ // NAME adds corresponding elements of two vectors.
- go: SaturatedAddMasked
masked: true
commutative: true
extension: "AVX.*"
documentation: !string |-
- // SaturatedAddMasked adds corresponding elements of two vectors with saturation.
+ // NAME adds corresponding elements of two vectors with saturation.
- go: Sub
commutative: false
extension: "AVX.*"
documentation: !string |-
- // Sub subtracts corresponding elements of two vectors.
+ // NAME subtracts corresponding elements of two vectors.
- go: SaturatedSub
commutative: false
extension: "AVX.*"
documentation: !string |-
- // SaturatedSub subtracts corresponding elements of two vectors with saturation.
+ // NAME subtracts corresponding elements of two vectors with saturation.
- go: SubMasked
masked: true
commutative: false
extension: "AVX.*"
documentation: !string |-
- // SubMasked subtracts corresponding elements of two vectors.
+ // NAME subtracts corresponding elements of two vectors.
- go: SaturatedSubMasked
masked: true
commutative: false
extension: "AVX.*"
documentation: !string |-
- // SaturatedSubMasked subtracts corresponding elements of two vectors with saturation.
+ // NAME subtracts corresponding elements of two vectors with saturation.
- go: PairwiseAdd
commutative: false
extension: "AVX.*"
documentation: !string |-
- // PairwiseAdd horizontally adds adjacent pairs of elements.
+ // NAME horizontally adds adjacent pairs of elements.
// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...].
- go: PairwiseSub
commutative: false
extension: "AVX.*"
documentation: !string |-
- // PairwiseSub horizontally subtracts adjacent pairs of elements.
+ // NAME horizontally subtracts adjacent pairs of elements.
// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0-y1, y2-y3, ..., x0-x1, x2-x3, ...].
- go: SaturatedPairwiseAdd
commutative: false
extension: "AVX.*"
documentation: !string |-
- // SaturatedPairwiseAdd horizontally adds adjacent pairs of elements with saturation.
+ // NAME horizontally adds adjacent pairs of elements with saturation.
// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...].
- go: SaturatedPairwiseSub
commutative: false
extension: "AVX.*"
documentation: !string |-
- // SaturatedPairwiseSub horizontally subtracts adjacent pairs of elements with saturation.
+ // NAME horizontally subtracts adjacent pairs of elements with saturation.
// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0-y1, y2-y3, ..., x0-x1, x2-x3, ...].
diff --git a/internal/simdgen/ops/BitwiseLogic/categories.yaml b/internal/simdgen/ops/BitwiseLogic/categories.yaml
index c6a00cc..3d2eda7 100644
--- a/internal/simdgen/ops/BitwiseLogic/categories.yaml
+++ b/internal/simdgen/ops/BitwiseLogic/categories.yaml
@@ -3,45 +3,46 @@
commutative: true
extension: "AVX.*"
documentation: !string |-
- // And performs a bitwise AND operation between two vectors.
+ // NAME performs a bitwise AND operation between two vectors.
- go: AndMasked
masked: true
commutative: true
extension: "AVX.*"
documentation: !string |-
- // AndMasked performs a bitwise AND operation between two vectors.
+ // NAME performs a bitwise AND operation between two vectors.
- go: Or
commutative: true
extension: "AVX.*"
documentation: !string |-
- // Or performs a bitwise OR operation between two vectors.
+ // NAME performs a bitwise OR operation between two vectors.
- go: OrMasked
masked: true
commutative: true
extension: "AVX.*"
documentation: !string |-
- // OrMasked performs a bitwise OR operation between two vectors.
+ // NAME performs a bitwise OR operation between two vectors.
- go: AndNot
commutative: false
extension: "AVX.*"
documentation: !string |-
- // AndNot performs a bitwise x &^ y.
+ // NAME performs a bitwise x &^ y.
- go: AndNotMasked
masked: true
commutative: false
extension: "AVX.*"
documentation: !string |-
- // AndNotMasked performs a bitwise x &^ y.
+ // NAME performs a bitwise x &^ y.
- go: Xor
commutative: true
extension: "AVX.*"
documentation: !string |-
- // Xor performs a bitwise XOR operation between two vectors.
+ // NAME performs a bitwise XOR operation between two vectors.
- go: XorMasked
masked: true
commutative: true
extension: "AVX.*"
documentation: !string |-
- // XorMasked performs a bitwise XOR operation between two vectors.
+ // NAME performs a bitwise XOR operation between two vectors.
+
# We also have PTEST and VPTERNLOG, those should be hidden from the users
# and only appear in rewrite rules.
diff --git a/internal/simdgen/ops/Compares/categories.yaml b/internal/simdgen/ops/Compares/categories.yaml
index d108051..e17e45d 100644
--- a/internal/simdgen/ops/Compares/categories.yaml
+++ b/internal/simdgen/ops/Compares/categories.yaml
@@ -11,90 +11,89 @@
commutative: true
extension: "AVX.*"
documentation: !string |-
- // Equal compares for equality.
+ // NAME compares for equality.
- go: Less
constImm: 1
commutative: false
extension: "AVX.*"
documentation: !string |-
- // Less compares for less than.
+ // NAME compares for less than.
- go: LessEqual
constImm: 2
commutative: false
extension: "AVX.*"
documentation: !string |-
- // LessEqual compares for less than or equal.
+ // NAME compares for less than or equal.
- go: IsNan # For float only.
constImm: 3
commutative: true
extension: "AVX.*"
documentation: !string |-
- // IsNan checks if elements are NaN. Use as x.IsNan(x).
+ // NAME checks if elements are NaN. Use as x.IsNan(x).
- go: NotEqual
constImm: 4
commutative: true
extension: "AVX.*"
documentation: !string |-
- // NotEqual compares for inequality.
+ // NAME compares for inequality.
- go: GreaterEqual
constImm: 13
commutative: false
extension: "AVX.*"
documentation: !string |-
- // GreaterEqual compares for greater than or equal.
+ // NAME compares for greater than or equal.
- go: Greater
constImm: 14
commutative: false
extension: "AVX.*"
documentation: !string |-
- // Greater compares for greater than.
-
+ // NAME compares for greater than.
- go: EqualMasked
constImm: 0
masked: true
commutative: true
extension: "AVX.*"
documentation: !string |-
- // EqualMasked compares for equality.
+ // NAME compares for equality.
- go: LessMasked
constImm: 1
masked: true
commutative: false
extension: "AVX.*"
documentation: !string |-
- // LessMasked compares for less than.
+ // NAME compares for less than.
- go: LessEqualMasked
constImm: 2
masked: true
commutative: false
extension: "AVX.*"
documentation: !string |-
- // LessEqualMasked compares for less than or equal.
+ // NAME compares for less than or equal.
- go: IsNanMasked # For float only.
constImm: 3
masked: true
commutative: true
extension: "AVX.*"
documentation: !string |-
- // IsNanMasked checks if elements are NaN. Use as x.IsNan(x).
+ // NAME checks if elements are NaN. Use as x.IsNan(x).
- go: NotEqualMasked
constImm: 4
masked: true
commutative: true
extension: "AVX.*"
documentation: !string |-
- // NotEqualMasked compares for inequality.
+ // NAME compares for inequality.
- go: GreaterEqualMasked
constImm: 13
masked: true
commutative: false
extension: "AVX.*"
documentation: !string |-
- // GreaterEqualMasked compares for greater than or equal.
+ // NAME compares for greater than or equal.
- go: GreaterMasked
constImm: 14
masked: true
commutative: false
extension: "AVX.*"
documentation: !string |-
- // GreaterMasked compares for greater than.
+ // NAME compares for greater than.
diff --git a/internal/simdgen/ops/FPonlyArith/categories.yaml b/internal/simdgen/ops/FPonlyArith/categories.yaml
index 1347b53..5329204 100644
--- a/internal/simdgen/ops/FPonlyArith/categories.yaml
+++ b/internal/simdgen/ops/FPonlyArith/categories.yaml
@@ -3,139 +3,134 @@
commutative: false
extension: "AVX.*"
documentation: !string |-
- // Div divides elements of two vectors.
+ // NAME divides elements of two vectors.
- go: DivMasked
commutative: false
masked: true
extension: "AVX.*"
documentation: !string |-
- // DivMasked divides elements of two vectors.
+ // NAME divides elements of two vectors.
- go: Sqrt
commutative: false
extension: "AVX.*"
documentation: !string |-
- // Sqrt computes the square root of each element.
+ // NAME computes the square root of each element.
- go: SqrtMasked
commutative: false
masked: true
extension: "AVX.*"
documentation: !string |-
- // SqrtMasked computes the square root of each element.
+ // NAME computes the square root of each element.
- go: ApproximateReciprocal
commutative: false
extension: "AVX.*"
documentation: !string |-
- // ApproximateReciprocal computes an approximate reciprocal of each element.
+ // NAME computes an approximate reciprocal of each element.
- go: ApproximateReciprocalMasked
commutative: false
masked: true
extension: "AVX.*"
documentation: !string |-
- // ApproximateReciprocalMasked computes an approximate reciprocal of each element.
+ // NAME computes an approximate reciprocal of each element.
- go: ApproximateReciprocalOfSqrt
commutative: false
extension: "AVX.*"
documentation: !string |-
- // ApproximateReciprocalOfSqrt computes an approximate reciprocal of the square root of each element.
+ // NAME computes an approximate reciprocal of the square root of each element.
- go: ApproximateReciprocalOfSqrtMasked
commutative: false
masked: true
extension: "AVX.*"
documentation: !string |-
- // ApproximateReciprocalOfSqrtMasked computes an approximate reciprocal of the square root of each element.
+ // NAME computes an approximate reciprocal of the square root of each element.
- go: MulByPowOf2Masked # This operation is all after AVX512, the unmasked version will be generated.
commutative: false
masked: true
extension: "AVX.*"
documentation: !string |-
- // MulByPowOf2Masked multiplies elements by a power of 2.
-
+ // NAME multiplies elements by a power of 2.
- go: Round
commutative: false
extension: "AVX.*"
constImm: 0
documentation: !string |-
- // Round rounds elements to the nearest integer.
+ // NAME rounds elements to the nearest integer.
- go: RoundWithPrecisionMasked
commutative: false
extension: "AVX.*"
constImm: 0
masked: true
documentation: !string |-
- // RoundWithPrecisionMasked rounds elements with specified precision.
+ // NAME rounds elements with specified precision.
- go: DiffWithRoundWithPrecisionMasked
commutative: false
extension: "AVX.*"
constImm: 0
masked: true
documentation: !string |-
- // DiffWithRoundWithPrecisionMasked computes the difference after rounding with specified precision.
-
+ // NAME computes the difference after rounding with specified precision.
- go: Floor
commutative: false
extension: "AVX.*"
constImm: 1
documentation: !string |-
- // Floor rounds elements down to the nearest integer.
+ // NAME rounds elements down to the nearest integer.
- go: FloorWithPrecisionMasked
commutative: false
extension: "AVX.*"
constImm: 1
masked: true
documentation: !string |-
- // FloorWithPrecisionMasked rounds elements down with specified precision.
+ // NAME rounds elements down with specified precision.
- go: DiffWithFloorWithPrecisionMasked
commutative: false
extension: "AVX.*"
constImm: 1
masked: true
documentation: !string |-
- // DiffWithFloorWithPrecisionMasked computes the difference after flooring with specified precision.
-
+ // NAME computes the difference after flooring with specified precision.
- go: Ceil
commutative: false
extension: "AVX.*"
constImm: 2
documentation: !string |-
- // Ceil rounds elements up to the nearest integer.
+ // NAME rounds elements up to the nearest integer.
- go: CeilWithPrecisionMasked
commutative: false
extension: "AVX.*"
constImm: 2
masked: true
documentation: !string |-
- // CeilWithPrecisionMasked rounds elements up with specified precision.
+ // NAME rounds elements up with specified precision.
- go: DiffWithCeilWithPrecisionMasked
commutative: false
extension: "AVX.*"
constImm: 2
masked: true
documentation: !string |-
- // DiffWithCeilWithPrecisionMasked computes the difference after ceiling with specified precision.
-
+ // NAME computes the difference after ceiling with specified precision.
- go: Trunc
commutative: false
extension: "AVX.*"
constImm: 3
documentation: !string |-
- // Trunc truncates elements towards zero.
+ // NAME truncates elements towards zero.
- go: TruncWithPrecisionMasked
commutative: false
extension: "AVX.*"
constImm: 3
masked: true
documentation: !string |-
- // TruncWithPrecisionMasked truncates elements with specified precision.
+ // NAME truncates elements with specified precision.
- go: DiffWithTruncWithPrecisionMasked
commutative: false
extension: "AVX.*"
constImm: 3
masked: true
documentation: !string |-
- // DiffWithTruncWithPrecisionMasked computes the difference after truncating with specified precision.
-
+ // NAME computes the difference after truncating with specified precision.
- go: AddSub
commutative: false
extension: "AVX.*"
documentation: !string |-
- // AddSub subtracts even elements and adds odd elements of two vectors.
\ No newline at end of file
+ // NAME subtracts even elements and adds odd elements of two vectors.
diff --git a/internal/simdgen/ops/GaloisField/categories.yaml b/internal/simdgen/ops/GaloisField/categories.yaml
index 4184c5e..62d8709 100644
--- a/internal/simdgen/ops/GaloisField/categories.yaml
+++ b/internal/simdgen/ops/GaloisField/categories.yaml
@@ -4,7 +4,7 @@
commutative: false
extension: "AVX.*"
documentation: !string |-
- // GaloisFieldAffineTransformMasked computes an affine transformation in GF(2^8):
+ // NAME computes an affine transformation in GF(2^8):
// x is a vector of 8-bit vectors, with each adjacent 8 as a group; y is a vector of 8x8 1-bit matrixes;
// b is an 8-bit vector. The affine transformation is y * x + b, with each element of y
// corresponding to a group of 8 elements in x.
@@ -13,7 +13,7 @@
commutative: false
extension: "AVX.*"
documentation: !string |-
- // GaloisFieldAffineTransformInverseMasked computes an affine transformation in GF(2^8),
+ // NAME computes an affine transformation in GF(2^8),
// with x inverted with respect to reduction polynomial x^8 + x^4 + x^3 + x + 1:
// x is a vector of 8-bit vectors, with each adjacent 8 as a group; y is a vector of 8x8 1-bit matrixes;
// b is an 8-bit vector. The affine transformation is y * x + b, with each element of y
@@ -23,5 +23,5 @@
commutative: false
extension: "AVX.*"
documentation: !string |-
- // GaloisFieldMulMasked computes element-wise GF(2^8) multiplication with
- // reduction polynomial x^8 + x^4 + x^3 + x + 1.
\ No newline at end of file
+ // NAME computes element-wise GF(2^8) multiplication with
+ // reduction polynomial x^8 + x^4 + x^3 + x + 1.
diff --git a/internal/simdgen/ops/IntOnlyArith/categories.yaml b/internal/simdgen/ops/IntOnlyArith/categories.yaml
index fc277f8..76ab14b 100644
--- a/internal/simdgen/ops/IntOnlyArith/categories.yaml
+++ b/internal/simdgen/ops/IntOnlyArith/categories.yaml
@@ -3,39 +3,36 @@
commutative: true
extension: "AVX.*" # VPAVGB/W are available across various AVX versions
documentation: !string |-
- // Average computes the rounded average of corresponding elements.
+ // NAME computes the rounded average of corresponding elements.
- go: AverageMasked
commutative: true
masked: true
extension: "AVX512.*" # Masked operations are typically AVX512
documentation: !string |-
- // AverageMasked computes the rounded average of corresponding elements.
-
+ // NAME computes the rounded average of corresponding elements.
- go: Absolute
commutative: false
# Unary operation, not commutative
extension: "AVX.*" # VPABSB/W/D are AVX, VPABSQ is AVX512
documentation: !string |-
- // Absolute computes the absolute value of each element.
+ // NAME computes the absolute value of each element.
- go: AbsoluteMasked
commutative: false
masked: true
extension: "AVX512.*"
documentation: !string |-
- // AbsoluteMasked computes the absolute value of each element.
-
+ // NAME computes the absolute value of each element.
- go: Sign
# Applies sign of second operand to first: sign(val, sign_src)
commutative: false
extension: "AVX.*"
documentation: !string |-
- // Sign returns the product of the first operand with -1, 0, or 1,
+ // NAME returns the product of the first operand with -1, 0, or 1,
// whichever constant is nearest to the value of the second operand.
# Sign does not have masked version
-
- go: PopCountMasked
commutative: false
masked: true
extension: "AVX512.*" # VPOPCNT instructions are AVX512 (BITALG or VPOPCNTDQ)
documentation: !string |-
- // PopCountMasked counts the number of set bits in each element.
\ No newline at end of file
+ // NAME counts the number of set bits in each element.
diff --git a/internal/simdgen/ops/MLOps/categories.yaml b/internal/simdgen/ops/MLOps/categories.yaml
index d26b846..65f7462 100644
--- a/internal/simdgen/ops/MLOps/categories.yaml
+++ b/internal/simdgen/ops/MLOps/categories.yaml
@@ -3,95 +3,94 @@
commutative: false
extension: "AVX.*"
documentation: !string |-
- // PairDotProd multiplies the elements and add the pairs together,
+ // NAME multiplies the elements and add the pairs together,
// yielding a vector of half as many elements with twice the input element size.
- go: PairDotProdMasked
masked: true
commutative: false
extension: "AVX.*"
documentation: !string |-
- // PairDotProdMasked multiplies the elements and add the pairs together,
+ // NAME multiplies the elements and add the pairs together,
// yielding a vector of half as many elements with twice the input element size.
# TODO: maybe simplify this name within the receiver-type + method-naming scheme we use.
- go: SaturatedUnsignedSignedPairDotProd
commutative: false
extension: "AVX.*"
documentation: !string |-
- // SaturatedUnsignedSignedPairDotProd multiplies the elements and add the pairs together with saturation,
+ // NAME multiplies the elements and add the pairs together with saturation,
// yielding a vector of half as many elements with twice the input element size.
- go: SaturatedUnsignedSignedPairDotProdMasked
masked: true
commutative: false
extension: "AVX.*"
documentation: !string |-
- // SaturatedUnsignedSignedPairDotProdMasked multiplies the elements and add the pairs together with saturation,
+ // NAME multiplies the elements and add the pairs together with saturation,
// yielding a vector of half as many elements with twice the input element size.
-
# QuadDotProd, i.e. VPDPBUSD(S) are operations with src/dst on the same register, we are not supporting this as of now.
- go: DotProdBroadcast
commutative: true
extension: "AVX.*"
documentation: !string |-
- // DotProdBroadcast multiplies all elements and broadcasts the sum.
+ // NAME multiplies all elements and broadcasts the sum.
- go: UnsignedSignedQuadDotProdAccumulate
commutative: false
extension: "AVX.*"
documentation: !string |-
- // UnsignedSignedQuadDotProdAccumulate performs dot products on groups of 4 elements of x and y and then adds z.
+ // NAME performs dot products on groups of 4 elements of x and y and then adds z.
- go: UnsignedSignedQuadDotProdAccumulateMasked
masked: true
commutative: false
extension: "AVX.*"
documentation: !string |-
- // UnsignedSignedQuadDotProdAccumulateMasked performs dot products on groups of 4 elements of x and y and then adds z.
+ // NAME performs dot products on groups of 4 elements of x and y and then adds z.
- go: SaturatedUnsignedSignedQuadDotProdAccumulate
commutative: false
extension: "AVX.*"
documentation: !string |-
- // SaturatedUnsignedSignedQuadDotProdAccumulate multiplies performs dot products on groups of 4 elements of x and y and then adds z.
+ // NAME multiplies performs dot products on groups of 4 elements of x and y and then adds z.
- go: SaturatedUnsignedSignedQuadDotProdAccumulateMasked
masked: true
commutative: false
extension: "AVX.*"
documentation: !string |-
- // SaturatedUnsignedSignedQuadDotProdAccumulateMasked multiplies performs dot products on groups of 4 elements of x and y and then adds z.
+ // NAME multiplies performs dot products on groups of 4 elements of x and y and then adds z.
- go: PairDotProdAccumulate
commutative: false
extension: "AVX.*"
documentation: !string |-
- // PairDotProdAccumulate performs dot products on pairs of elements of x and y and then adds z.
+ // NAME performs dot products on pairs of elements of x and y and then adds z.
- go: PairDotProdAccumulateMasked
masked: true
commutative: false
extension: "AVX.*"
documentation: !string |-
- // PairDotProdAccumulateMasked performs dot products on pairs of elements of x and y and then adds z.
+ // NAME performs dot products on pairs of elements of x and y and then adds z.
- go: SaturatedPairDotProdAccumulate
commutative: false
extension: "AVX.*"
documentation: !string |-
- // SaturatedPairDotProdAccumulate performs dot products on pairs of elements of x and y and then adds z.
+ // NAME performs dot products on pairs of elements of x and y and then adds z.
- go: SaturatedPairDotProdAccumulateMasked
masked: true
commutative: false
extension: "AVX.*"
documentation: !string |-
- // SaturatedPairDotProdAccumulateMasked performs dot products on pairs of elements of x and y and then adds z.
+ // NAME performs dot products on pairs of elements of x and y and then adds z.
- go: FusedMultiplyAddMasked
masked: true
commutative: false
extension: "AVX.*"
documentation: !string |-
- // FusedMultiplyAddMasked performs (x * y) + z.
+ // NAME performs (x * y) + z.
- go: FusedMultiplyAddSubMasked
masked: true
commutative: false
extension: "AVX.*"
documentation: !string |-
- // FusedMultiplyAddSubMasked performs (x * y) - z for odd-indexed elements, and (x * y) + z for even-indexed elements.
+ // NAME performs (x * y) - z for odd-indexed elements, and (x * y) + z for even-indexed elements.
- go: FusedMultiplySubAddMasked
masked: true
commutative: false
extension: "AVX.*"
documentation: !string |-
- // FusedMultiplySubAddMasked performs (x * y) + z for odd-indexed elements, and (x * y) - z for even-indexed elements.
+ // NAME performs (x * y) + z for odd-indexed elements, and (x * y) - z for even-indexed elements.
diff --git a/internal/simdgen/ops/MinMax/categories.yaml b/internal/simdgen/ops/MinMax/categories.yaml
index 929bfad..ce87994 100644
--- a/internal/simdgen/ops/MinMax/categories.yaml
+++ b/internal/simdgen/ops/MinMax/categories.yaml
@@ -3,21 +3,21 @@
commutative: true
extension: "AVX.*"
documentation: !string |-
- // Max computes the maximum of corresponding elements.
+ // NAME computes the maximum of corresponding elements.
- go: MaxMasked
commutative: true
masked: true
extension: "AVX.*"
documentation: !string |-
- // MaxMasked computes the maximum of corresponding elements.
+ // NAME computes the maximum of corresponding elements.
- go: Min
commutative: true
extension: "AVX.*"
documentation: !string |-
- // Min computes the minimum of corresponding elements.
+ // NAME computes the minimum of corresponding elements.
- go: MinMasked
commutative: true
masked: true
extension: "AVX.*"
documentation: !string |-
- // MinMasked computes the minimum of corresponding elements.
+ // NAME computes the minimum of corresponding elements.
diff --git a/internal/simdgen/ops/Moves/categories.yaml b/internal/simdgen/ops/Moves/categories.yaml
index d6c4d5d..dd30ca8 100644
--- a/internal/simdgen/ops/Moves/categories.yaml
+++ b/internal/simdgen/ops/Moves/categories.yaml
@@ -3,55 +3,50 @@
commutative: false
extension: "AVX.*"
documentation: !string |-
- // SetElem sets a single constant-indexed element's value.
+ // NAME sets a single constant-indexed element's value.
- go: GetElem
commutative: false
extension: "AVX.*"
documentation: !string |-
- // GetElem retrieves a single constant-indexed element's value.
+ // NAME retrieves a single constant-indexed element's value.
- go: Set128
commutative: false
extension: "AVX.*"
documentation: !string |-
- // Set128 combines a 128-bit vector with a 256-bit vector, where the constant operand specifies whether the low (0) or high (1) half is receives the smaller vector.
+ // NAME combines a 128-bit vector with a 256-bit vector, where the constant operand specifies whether the low (0) or high (1) half is receives the smaller vector.
- go: Get128
commutative: false
extension: "AVX.*"
documentation: !string |-
- // Get128 retrieves the upper (1) or lower (0) half of a 256-bit vector, depending on the constant operand.
-
-
+ // NAME retrieves the upper (1) or lower (0) half of a 256-bit vector, depending on the constant operand.
- go: Permute
commutative: false
extension: "AVX.*"
documentation: !string |-
- // Permute performs a full permutation of vector x using indices:
+ // NAME performs a full permutation of vector x using indices:
// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
// Only the needed bits to represent x's index are used in indices' elements.
-
- go: PermuteMasked
commutative: false
masked: true
extension: "AVX.*"
documentation: !string |-
- // PermuteMasked performs a full permutation of vector y using indices:
+ // NAME performs a full permutation of vector y using indices:
// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
// Only the needed bits to represent x's index are used in indices' elements.
-
- go: Permute2Masked # Permute2Masked is only available on or after AVX512
commutative: false
masked: true
extension: "AVX.*"
documentation: !string |-
- // Permute2Masked performs a full permutation of vector x, y using indices:
+ // NAME performs a full permutation of vector x, y using indices:
// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
// where xy is x appending y.
// Only the needed bits to represent xy's index are used in indices' elements.
-
- go: Compress
commutative: false
# The mask in Compress is a control mask rather than a write mask, so we don't mark it "masked"
extension: "AVX.*"
documentation: !string |-
- // Compress performs a compression on vector x using mask by
+ // NAME performs a compression on vector x using mask by
// selecting elements as indicated by mask, and pack them to lower indexed elements.
diff --git a/internal/simdgen/ops/Mul/categories.yaml b/internal/simdgen/ops/Mul/categories.yaml
index 1884d66..8dc51f4 100644
--- a/internal/simdgen/ops/Mul/categories.yaml
+++ b/internal/simdgen/ops/Mul/categories.yaml
@@ -3,45 +3,45 @@
commutative: true
extension: "AVX.*"
documentation: !string |-
- // Mul multiplies corresponding elements of two vectors.
+ // NAME multiplies corresponding elements of two vectors.
- go: MulEvenWiden
commutative: true
extension: "AVX.*"
documentation: !string |-
- // MulEvenWiden multiplies even-indexed elements, widening the result.
+ // NAME multiplies even-indexed elements, widening the result.
// Result[i] = v1.Even[i] * v2.Even[i].
- go: MulHigh
commutative: true
extension: "AVX.*"
documentation: !string |-
- // MulHigh multiplies elements and stores the high part of the result.
+ // NAME multiplies elements and stores the high part of the result.
- go: MulLow
commutative: true
extension: "AVX.*"
documentation: !string |-
- // MulLow multiplies elements and stores the low part of the result.
+ // NAME multiplies elements and stores the low part of the result.
- go: MulMasked
masked: true
commutative: true
extension: "AVX.*"
documentation: !string |-
- // MulMasked multiplies corresponding elements of two vectors.
+ // NAME multiplies corresponding elements of two vectors.
- go: MulEvenWidenMasked
masked: true
commutative: true
extension: "AVX.*"
documentation: !string |-
- // MulEvenWidenMasked multiplies even-indexed elements, widening the result.
+ // NAME multiplies even-indexed elements, widening the result.
// Result[i] = v1.Even[i] * v2.Even[i].
- go: MulHighMasked
masked: true
commutative: true
extension: "AVX.*"
documentation: !string |-
- // MulHighMasked multiplies elements and stores the high part of the result.
+ // NAME multiplies elements and stores the high part of the result.
- go: MulLowMasked
masked: true
commutative: true
extension: "AVX.*"
documentation: !string |-
- // MulLowMasked multiplies elements and stores the low part of the result.
+ // NAME multiplies elements and stores the low part of the result.
diff --git a/internal/simdgen/ops/ShiftRotate/categories.yaml b/internal/simdgen/ops/ShiftRotate/categories.yaml
index b9e2a63..71e7825 100644
--- a/internal/simdgen/ops/ShiftRotate/categories.yaml
+++ b/internal/simdgen/ops/ShiftRotate/categories.yaml
@@ -4,21 +4,21 @@
commutative: false
extension: "AVX.*"
documentation: !string |-
- // ShiftAllLeft shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
+ // NAME shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
- go: ShiftAllLeftMasked
nameAndSizeCheck: true
masked: true
commutative: false
extension: "AVX.*"
documentation: !string |-
- // ShiftAllLeftMasked shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
+ // NAME shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
- go: ShiftAllRight
signed: false
nameAndSizeCheck: true
commutative: false
extension: "AVX.*"
documentation: !string |-
- // ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed.
+ // NAME shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed.
- go: ShiftAllRightMasked
signed: false
nameAndSizeCheck: true
@@ -26,14 +26,14 @@
commutative: false
extension: "AVX.*"
documentation: !string |-
- // ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed.
+ // NAME shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed.
- go: ShiftAllRight
signed: true
nameAndSizeCheck: true
commutative: false
extension: "AVX.*"
documentation: !string |-
- // ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit.
+ // NAME shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit.
- go: ShiftAllRightMasked
signed: true
nameAndSizeCheck: true
@@ -41,28 +41,27 @@
commutative: false
extension: "AVX.*"
documentation: !string |-
- // ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit.
-
+ // NAME shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit.
- go: ShiftLeft
nameAndSizeCheck: true
commutative: false
extension: "AVX.*"
documentation: !string |-
- // ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
+ // NAME shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
- go: ShiftLeftMasked
nameAndSizeCheck: true
masked: true
commutative: false
extension: "AVX.*"
documentation: !string |-
- // ShiftLeftMasked shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
+ // NAME shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
- go: ShiftRight
signed: false
nameAndSizeCheck: true
commutative: false
extension: "AVX.*"
documentation: !string |-
- // ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed.
+ // NAME shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed.
- go: ShiftRightMasked
signed: false
nameAndSizeCheck: true
@@ -70,14 +69,14 @@
commutative: false
extension: "AVX.*"
documentation: !string |-
- // ShiftRightMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed.
+ // NAME shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed.
- go: ShiftRight
signed: true
nameAndSizeCheck: true
commutative: false
extension: "AVX.*"
documentation: !string |-
- // ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
+ // NAME shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
- go: ShiftRightMasked
signed: true
nameAndSizeCheck: true
@@ -85,44 +84,42 @@
commutative: false
extension: "AVX.*"
documentation: !string |-
- // ShiftRightMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
-
+ // NAME shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
- go: RotateAllLeftMasked
nameAndSizeCheck: true
masked: true
commutative: false
extension: "AVX.*"
documentation: !string |-
- // RotateAllLeftMasked rotates each element to the left by the number of bits specified by the immediate.
+ // NAME rotates each element to the left by the number of bits specified by the immediate.
- go: RotateLeftMasked
nameAndSizeCheck: true
masked: true
commutative: false
extension: "AVX.*"
documentation: !string |-
- // RotateLeftMasked rotates each element in x to the left by the number of bits specified by y's corresponding elements.
+ // NAME rotates each element in x to the left by the number of bits specified by y's corresponding elements.
- go: RotateAllRightMasked
nameAndSizeCheck: true
masked: true
commutative: false
extension: "AVX.*"
documentation: !string |-
- // RotateAllRightMasked rotates each element to the right by the number of bits specified by the immediate.
+ // NAME rotates each element to the right by the number of bits specified by the immediate.
- go: RotateRightMasked
nameAndSizeCheck: true
masked: true
commutative: false
extension: "AVX.*"
documentation: !string |-
- // RotateRightMasked rotates each element in x to the right by the number of bits specified by y's corresponding elements.
-
+ // NAME rotates each element in x to the right by the number of bits specified by y's corresponding elements.
- go: ShiftAllLeftAndFillUpperFromMasked
nameAndSizeCheck: true
masked: true
commutative: false
extension: "AVX.*"
documentation: !string |-
- // ShiftAllLeftAndFillUpperFromMasked shifts each element of x to the left by the number of bits specified by the
+ // NAME shifts each element of x to the left by the number of bits specified by the
// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
- go: ShiftAllRightAndFillUpperFromMasked
nameAndSizeCheck: true
@@ -130,7 +127,7 @@
commutative: false
extension: "AVX.*"
documentation: !string |-
- // ShiftAllRightAndFillUpperFromMasked shifts each element of x to the right by the number of bits specified by the
+ // NAME shifts each element of x to the right by the number of bits specified by the
// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
- go: ShiftLeftAndFillUpperFromMasked
nameAndSizeCheck: true
@@ -138,7 +135,7 @@
commutative: false
extension: "AVX.*"
documentation: !string |-
- // ShiftLeftAndFillUpperFromMasked shifts each element of x to the left by the number of bits specified by the
+ // NAME shifts each element of x to the left by the number of bits specified by the
// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
- go: ShiftRightAndFillUpperFromMasked
nameAndSizeCheck: true
@@ -146,5 +143,5 @@
commutative: false
extension: "AVX.*"
documentation: !string |-
- // ShiftRightAndFillUpperFromMasked shifts each element of x to the right by the number of bits specified by the
+ // NAME shifts each element of x to the right by the number of bits specified by the
// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.