internal/simdgen: use "NAME" for method names in doc strings

This will let us merge operations that differ only in their method
names. In particular, this will give us a robust way to insert
computed method names.

The YAML changes were done using the following Gemini CLI prompt:

  In all of the files named "categories.yaml", for each operation in
  the YAML list, find the Go method name from the "go" field, and
  replace anywhere that operation name appears as a word in the
  "documentation" field with the literal string "NAME". Please do this
  using Go with the yaml.v3 library. The yaml.v3 library is already
  imported in this module.

No effect on generated godefs.

Change-Id: Ifdac95c5d62475937fc33a8013d0b0c5c5dca312
Reviewed-on: https://go-review.googlesource.com/c/arch/+/691340
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Auto-Submit: Austin Clements <austin@google.com>
Reviewed-by: Junyang Shao <shaojunyang@google.com>
diff --git a/internal/simdgen/categories.yaml b/internal/simdgen/categories.yaml
index f839f69..5a7e711 100644
--- a/internal/simdgen/categories.yaml
+++ b/internal/simdgen/categories.yaml
@@ -3,114 +3,115 @@
   commutative: true
   extension: "AVX.*"
   documentation: !string |-
-    // Add adds corresponding elements of two vectors.
+    // NAME adds corresponding elements of two vectors.
 - go: SaturatedAdd
   commutative: true
   extension: "AVX.*"
   documentation: !string |-
-    // SaturatedAdd adds corresponding elements of two vectors with saturation.
+    // NAME adds corresponding elements of two vectors with saturation.
 - go: AddMasked
   masked: true
   commutative: true
   extension: "AVX.*"
   documentation: !string |-
-    // AddMasked adds corresponding elements of two vectors.
+    // NAME adds corresponding elements of two vectors.
 - go: SaturatedAddMasked
   masked: true
   commutative: true
   extension: "AVX.*"
   documentation: !string |-
-    // SaturatedAddMasked adds corresponding elements of two vectors with saturation.
+    // NAME adds corresponding elements of two vectors with saturation.
 - go: Sub
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
-    // Sub subtracts corresponding elements of two vectors.
+    // NAME subtracts corresponding elements of two vectors.
 - go: SaturatedSub
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
-    // SaturatedSub subtracts corresponding elements of two vectors with saturation.
+    // NAME subtracts corresponding elements of two vectors with saturation.
 - go: SubMasked
   masked: true
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
-    // SubMasked subtracts corresponding elements of two vectors.
+    // NAME subtracts corresponding elements of two vectors.
 - go: SaturatedSubMasked
   masked: true
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
-    // SaturatedSubMasked subtracts corresponding elements of two vectors with saturation.
+    // NAME subtracts corresponding elements of two vectors with saturation.
 - go: PairwiseAdd
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
-    // PairwiseAdd horizontally adds adjacent pairs of elements.
+    // NAME horizontally adds adjacent pairs of elements.
     // For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...].
 - go: PairwiseSub
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
-    // PairwiseSub horizontally subtracts adjacent pairs of elements.
+    // NAME horizontally subtracts adjacent pairs of elements.
     // For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0-y1, y2-y3, ..., x0-x1, x2-x3, ...].
 - go: SaturatedPairwiseAdd
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
-    // SaturatedPairwiseAdd horizontally adds adjacent pairs of elements with saturation.
+    // NAME horizontally adds adjacent pairs of elements with saturation.
     // For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...].
 - go: SaturatedPairwiseSub
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
-    // SaturatedPairwiseSub horizontally subtracts adjacent pairs of elements with saturation.
+    // NAME horizontally subtracts adjacent pairs of elements with saturation.
     // For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0-y1, y2-y3, ..., x0-x1, x2-x3, ...].
 - go: And
   commutative: true
   extension: "AVX.*"
   documentation: !string |-
-    // And performs a bitwise AND operation between two vectors.
+    // NAME performs a bitwise AND operation between two vectors.
 - go: AndMasked
   masked: true
   commutative: true
   extension: "AVX.*"
   documentation: !string |-
-    // AndMasked performs a bitwise AND operation between two vectors.
+    // NAME performs a bitwise AND operation between two vectors.
 - go: Or
   commutative: true
   extension: "AVX.*"
   documentation: !string |-
-    // Or performs a bitwise OR operation between two vectors.
+    // NAME performs a bitwise OR operation between two vectors.
 - go: OrMasked
   masked: true
   commutative: true
   extension: "AVX.*"
   documentation: !string |-
-    // OrMasked performs a bitwise OR operation between two vectors.
+    // NAME performs a bitwise OR operation between two vectors.
 - go: AndNot
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
-    // AndNot performs a bitwise x &^ y.
+    // NAME performs a bitwise x &^ y.
 - go: AndNotMasked
   masked: true
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
-    // AndNotMasked performs a bitwise x &^ y.
+    // NAME performs a bitwise x &^ y.
 - go: Xor
   commutative: true
   extension: "AVX.*"
   documentation: !string |-
-    // Xor performs a bitwise XOR operation between two vectors.
+    // NAME performs a bitwise XOR operation between two vectors.
 - go: XorMasked
   masked: true
   commutative: true
   extension: "AVX.*"
   documentation: !string |-
-    // XorMasked performs a bitwise XOR operation between two vectors.
+    // NAME performs a bitwise XOR operation between two vectors.
+
 # We also have PTEST and VPTERNLOG, those should be hidden from the users
 # and only appear in rewrite rules.
 # const imm predicate(holds for both float and int|uint):
@@ -125,239 +126,233 @@
   commutative: true
   extension: "AVX.*"
   documentation: !string |-
-    // Equal compares for equality.
+    // NAME compares for equality.
 - go: Less
   constImm: 1
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
-    // Less compares for less than.
+    // NAME compares for less than.
 - go: LessEqual
   constImm: 2
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
-    // LessEqual compares for less than or equal.
+    // NAME compares for less than or equal.
 - go: IsNan # For float only.
   constImm: 3
   commutative: true
   extension: "AVX.*"
   documentation: !string |-
-    // IsNan checks if elements are NaN. Use as x.IsNan(x).
+    // NAME checks if elements are NaN. Use as x.IsNan(x).
 - go: NotEqual
   constImm: 4
   commutative: true
   extension: "AVX.*"
   documentation: !string |-
-    // NotEqual compares for inequality.
+    // NAME compares for inequality.
 - go: GreaterEqual
   constImm: 13
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
-    // GreaterEqual compares for greater than or equal.
+    // NAME compares for greater than or equal.
 - go: Greater
   constImm: 14
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
-    // Greater compares for greater than.
-
+    // NAME compares for greater than.
 - go: EqualMasked
   constImm: 0
   masked: true
   commutative: true
   extension: "AVX.*"
   documentation: !string |-
-    // EqualMasked compares for equality.
+    // NAME compares for equality.
 - go: LessMasked
   constImm: 1
   masked: true
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
-    // LessMasked compares for less than.
+    // NAME compares for less than.
 - go: LessEqualMasked
   constImm: 2
   masked: true
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
-    // LessEqualMasked compares for less than or equal.
+    // NAME compares for less than or equal.
 - go: IsNanMasked # For float only.
   constImm: 3
   masked: true
   commutative: true
   extension: "AVX.*"
   documentation: !string |-
-    // IsNanMasked checks if elements are NaN. Use as x.IsNan(x).
+    // NAME checks if elements are NaN. Use as x.IsNan(x).
 - go: NotEqualMasked
   constImm: 4
   masked: true
   commutative: true
   extension: "AVX.*"
   documentation: !string |-
-    // NotEqualMasked compares for inequality.
+    // NAME compares for inequality.
 - go: GreaterEqualMasked
   constImm: 13
   masked: true
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
-    // GreaterEqualMasked compares for greater than or equal.
+    // NAME compares for greater than or equal.
 - go: GreaterMasked
   constImm: 14
   masked: true
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
-    // GreaterMasked compares for greater than.
+    // NAME compares for greater than.
 - go: Div
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
-    // Div divides elements of two vectors.
+    // NAME divides elements of two vectors.
 - go: DivMasked
   commutative: false
   masked: true
   extension: "AVX.*"
   documentation: !string |-
-    // DivMasked divides elements of two vectors.
+    // NAME divides elements of two vectors.
 - go: Sqrt
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
-    // Sqrt computes the square root of each element.
+    // NAME computes the square root of each element.
 - go: SqrtMasked
   commutative: false
   masked: true
   extension: "AVX.*"
   documentation: !string |-
-    // SqrtMasked computes the square root of each element.
+    // NAME computes the square root of each element.
 - go: ApproximateReciprocal
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
-    // ApproximateReciprocal computes an approximate reciprocal of each element.
+    // NAME computes an approximate reciprocal of each element.
 - go: ApproximateReciprocalMasked
   commutative: false
   masked: true
   extension: "AVX.*"
   documentation: !string |-
-    // ApproximateReciprocalMasked computes an approximate reciprocal of each element.
+    // NAME computes an approximate reciprocal of each element.
 - go: ApproximateReciprocalOfSqrt
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
-    // ApproximateReciprocalOfSqrt computes an approximate reciprocal of the square root of each element.
+    // NAME computes an approximate reciprocal of the square root of each element.
 - go: ApproximateReciprocalOfSqrtMasked
   commutative: false
   masked: true
   extension: "AVX.*"
   documentation: !string |-
-    // ApproximateReciprocalOfSqrtMasked computes an approximate reciprocal of the square root of each element.
+    // NAME computes an approximate reciprocal of the square root of each element.
 - go: MulByPowOf2Masked # This operation is all after AVX512, the unmasked version will be generated.
   commutative: false
   masked: true
   extension: "AVX.*"
   documentation: !string |-
-    // MulByPowOf2Masked multiplies elements by a power of 2.
-
+    // NAME multiplies elements by a power of 2.
 - go: Round
   commutative: false
   extension: "AVX.*"
   constImm: 0
   documentation: !string |-
-    // Round rounds elements to the nearest integer.
+    // NAME rounds elements to the nearest integer.
 - go: RoundWithPrecisionMasked
   commutative: false
   extension: "AVX.*"
   constImm: 0
   masked: true
   documentation: !string |-
-    // RoundWithPrecisionMasked rounds elements with specified precision.
+    // NAME rounds elements with specified precision.
 - go: DiffWithRoundWithPrecisionMasked
   commutative: false
   extension: "AVX.*"
   constImm: 0
   masked: true
   documentation: !string |-
-    // DiffWithRoundWithPrecisionMasked computes the difference after rounding with specified precision.
-
+    // NAME computes the difference after rounding with specified precision.
 - go: Floor
   commutative: false
   extension: "AVX.*"
   constImm: 1
   documentation: !string |-
-    // Floor rounds elements down to the nearest integer.
+    // NAME rounds elements down to the nearest integer.
 - go: FloorWithPrecisionMasked
   commutative: false
   extension: "AVX.*"
   constImm: 1
   masked: true
   documentation: !string |-
-    // FloorWithPrecisionMasked rounds elements down with specified precision.
+    // NAME rounds elements down with specified precision.
 - go: DiffWithFloorWithPrecisionMasked
   commutative: false
   extension: "AVX.*"
   constImm: 1
   masked: true
   documentation: !string |-
-    // DiffWithFloorWithPrecisionMasked computes the difference after flooring with specified precision.
-
+    // NAME computes the difference after flooring with specified precision.
 - go: Ceil
   commutative: false
   extension: "AVX.*"
   constImm: 2
   documentation: !string |-
-    // Ceil rounds elements up to the nearest integer.
+    // NAME rounds elements up to the nearest integer.
 - go: CeilWithPrecisionMasked
   commutative: false
   extension: "AVX.*"
   constImm: 2
   masked: true
   documentation: !string |-
-    // CeilWithPrecisionMasked rounds elements up with specified precision.
+    // NAME rounds elements up with specified precision.
 - go: DiffWithCeilWithPrecisionMasked
   commutative: false
   extension: "AVX.*"
   constImm: 2
   masked: true
   documentation: !string |-
-    // DiffWithCeilWithPrecisionMasked computes the difference after ceiling with specified precision.
-
+    // NAME computes the difference after ceiling with specified precision.
 - go: Trunc
   commutative: false
   extension: "AVX.*"
   constImm: 3
   documentation: !string |-
-    // Trunc truncates elements towards zero.
+    // NAME truncates elements towards zero.
 - go: TruncWithPrecisionMasked
   commutative: false
   extension: "AVX.*"
   constImm: 3
   masked: true
   documentation: !string |-
-    // TruncWithPrecisionMasked truncates elements with specified precision.
+    // NAME truncates elements with specified precision.
 - go: DiffWithTruncWithPrecisionMasked
   commutative: false
   extension: "AVX.*"
   constImm: 3
   masked: true
   documentation: !string |-
-    // DiffWithTruncWithPrecisionMasked computes the difference after truncating with specified precision.
-
+    // NAME computes the difference after truncating with specified precision.
 - go: AddSub
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
-    // AddSub subtracts even elements and adds odd elements of two vectors.
+    // NAME subtracts even elements and adds odd elements of two vectors.
 - go: GaloisFieldAffineTransformMasked
   masked: true
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
-    // GaloisFieldAffineTransformMasked computes an affine transformation in GF(2^8):
+    // NAME computes an affine transformation in GF(2^8):
     // x is a vector of 8-bit vectors, with each adjacent 8 as a group; y is a vector of 8x8 1-bit matrixes;
     // b is an 8-bit vector. The affine transformation is y * x + b, with each element of y
     // corresponding to a group of 8 elements in x.
@@ -366,7 +361,7 @@
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
-    // GaloisFieldAffineTransformInverseMasked computes an affine transformation in GF(2^8),
+    // NAME computes an affine transformation in GF(2^8),
     // with x inverted with respect to reduction polynomial x^8 + x^4 + x^3 + x + 1:
     // x is a vector of 8-bit vectors, with each adjacent 8 as a group; y is a vector of 8x8 1-bit matrixes;
     // b is an 8-bit vector. The affine transformation is y * x + b, with each element of y
@@ -376,288 +371,279 @@
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
-    // GaloisFieldMulMasked computes element-wise GF(2^8) multiplication with
+    // NAME computes element-wise GF(2^8) multiplication with
     // reduction polynomial x^8 + x^4 + x^3 + x + 1.
 - go: Average
   commutative: true
   extension: "AVX.*" # VPAVGB/W are available across various AVX versions
   documentation: !string |-
-    // Average computes the rounded average of corresponding elements.
+    // NAME computes the rounded average of corresponding elements.
 - go: AverageMasked
   commutative: true
   masked: true
   extension: "AVX512.*" # Masked operations are typically AVX512
   documentation: !string |-
-    // AverageMasked computes the rounded average of corresponding elements.
-
+    // NAME computes the rounded average of corresponding elements.
 - go: Absolute
   commutative: false
   # Unary operation, not commutative
   extension: "AVX.*" # VPABSB/W/D are AVX, VPABSQ is AVX512
   documentation: !string |-
-    // Absolute computes the absolute value of each element.
+    // NAME computes the absolute value of each element.
 - go: AbsoluteMasked
   commutative: false
   masked: true
   extension: "AVX512.*"
   documentation: !string |-
-    // AbsoluteMasked computes the absolute value of each element.
-
+    // NAME computes the absolute value of each element.
 - go: Sign
   # Applies sign of second operand to first: sign(val, sign_src)
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
-    // Sign returns the product of the first operand with -1, 0, or 1,
+    // NAME returns the product of the first operand with -1, 0, or 1,
     // whichever constant is nearest to the value of the second operand.
   # Sign does not have masked version
-
 - go: PopCountMasked
   commutative: false
   masked: true
   extension: "AVX512.*" # VPOPCNT instructions are AVX512 (BITALG or VPOPCNTDQ)
   documentation: !string |-
-    // PopCountMasked counts the number of set bits in each element.
+    // NAME counts the number of set bits in each element.
 - go: PairDotProd
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
-    // PairDotProd multiplies the elements and add the pairs together,
+    // NAME multiplies the elements and add the pairs together,
     // yielding a vector of half as many elements with twice the input element size.
 - go: PairDotProdMasked
   masked: true
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
-    // PairDotProdMasked multiplies the elements and add the pairs together,
+    // NAME multiplies the elements and add the pairs together,
     // yielding a vector of half as many elements with twice the input element size.
 # TODO: maybe simplify this name within the receiver-type + method-naming scheme we use.
 - go: SaturatedUnsignedSignedPairDotProd
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
-    // SaturatedUnsignedSignedPairDotProd multiplies the elements and add the pairs together with saturation,
+    // NAME multiplies the elements and add the pairs together with saturation,
     // yielding a vector of half as many elements with twice the input element size.
 - go: SaturatedUnsignedSignedPairDotProdMasked
   masked: true
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
-    // SaturatedUnsignedSignedPairDotProdMasked multiplies the elements and add the pairs together with saturation,
+    // NAME multiplies the elements and add the pairs together with saturation,
     // yielding a vector of half as many elements with twice the input element size.
-
 # QuadDotProd, i.e. VPDPBUSD(S) are operations with src/dst on the same register, we are not supporting this as of now.
 - go: DotProdBroadcast
   commutative: true
   extension: "AVX.*"
   documentation: !string |-
-    // DotProdBroadcast multiplies all elements and broadcasts the sum.
+    // NAME multiplies all elements and broadcasts the sum.
 - go: UnsignedSignedQuadDotProdAccumulate
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
-    // UnsignedSignedQuadDotProdAccumulate performs dot products on groups of 4 elements of x and y and then adds z.
+    // NAME performs dot products on groups of 4 elements of x and y and then adds z.
 - go: UnsignedSignedQuadDotProdAccumulateMasked
   masked: true
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
-    // UnsignedSignedQuadDotProdAccumulateMasked performs dot products on groups of 4 elements of x and y and then adds z.
+    // NAME performs dot products on groups of 4 elements of x and y and then adds z.
 - go: SaturatedUnsignedSignedQuadDotProdAccumulate
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
-    // SaturatedUnsignedSignedQuadDotProdAccumulate multiplies performs dot products on groups of 4 elements of x and y and then adds z.
+    // NAME multiplies performs dot products on groups of 4 elements of x and y and then adds z.
 - go: SaturatedUnsignedSignedQuadDotProdAccumulateMasked
   masked: true
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
-    // SaturatedUnsignedSignedQuadDotProdAccumulateMasked multiplies performs dot products on groups of 4 elements of x and y and then adds z.
+    // NAME multiplies performs dot products on groups of 4 elements of x and y and then adds z.
 - go: PairDotProdAccumulate
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
-    // PairDotProdAccumulate performs dot products on pairs of elements of x and y and then adds z.
+    // NAME performs dot products on pairs of elements of x and y and then adds z.
 - go: PairDotProdAccumulateMasked
   masked: true
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
-    // PairDotProdAccumulateMasked performs dot products on pairs of elements of x and y and then adds z.
+    // NAME performs dot products on pairs of elements of x and y and then adds z.
 - go: SaturatedPairDotProdAccumulate
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
-    // SaturatedPairDotProdAccumulate performs dot products on pairs of elements of x and y and then adds z.
+    // NAME performs dot products on pairs of elements of x and y and then adds z.
 - go: SaturatedPairDotProdAccumulateMasked
   masked: true
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
-    // SaturatedPairDotProdAccumulateMasked performs dot products on pairs of elements of x and y and then adds z.
+    // NAME performs dot products on pairs of elements of x and y and then adds z.
 - go: FusedMultiplyAddMasked
   masked: true
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
-    // FusedMultiplyAddMasked performs (x * y) + z.
+    // NAME performs (x * y) + z.
 - go: FusedMultiplyAddSubMasked
   masked: true
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
-    // FusedMultiplyAddSubMasked performs (x * y) - z for odd-indexed elements, and (x * y) + z for even-indexed elements.
+    // NAME performs (x * y) - z for odd-indexed elements, and (x * y) + z for even-indexed elements.
 - go: FusedMultiplySubAddMasked
   masked: true
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
-    // FusedMultiplySubAddMasked performs (x * y) + z for odd-indexed elements, and (x * y) - z for even-indexed elements.
+    // NAME performs (x * y) + z for odd-indexed elements, and (x * y) - z for even-indexed elements.
 - go: Max
   commutative: true
   extension: "AVX.*"
   documentation: !string |-
-    // Max computes the maximum of corresponding elements.
+    // NAME computes the maximum of corresponding elements.
 - go: MaxMasked
   commutative: true
   masked: true
   extension: "AVX.*"
   documentation: !string |-
-    // MaxMasked computes the maximum of corresponding elements.
+    // NAME computes the maximum of corresponding elements.
 - go: Min
   commutative: true
   extension: "AVX.*"
   documentation: !string |-
-    // Min computes the minimum of corresponding elements.
+    // NAME computes the minimum of corresponding elements.
 - go: MinMasked
   commutative: true
   masked: true
   extension: "AVX.*"
   documentation: !string |-
-    // MinMasked computes the minimum of corresponding elements.
+    // NAME computes the minimum of corresponding elements.
 - go: SetElem
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
-    // SetElem sets a single constant-indexed element's value.
+    // NAME sets a single constant-indexed element's value.
 - go: GetElem
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
-    // GetElem retrieves a single constant-indexed element's value.
+    // NAME retrieves a single constant-indexed element's value.
 - go: Set128
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
-    // Set128 combines a 128-bit vector with a 256-bit vector, where the constant operand specifies whether the low (0) or high (1) half is receives the smaller vector.
+    // NAME combines a 128-bit vector with a 256-bit vector, where the constant operand specifies whether the low (0) or high (1) half is receives the smaller vector.
 - go: Get128
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
-    // Get128 retrieves the upper (1) or lower (0) half of a 256-bit vector, depending on the constant operand.
-
-
+    // NAME retrieves the upper (1) or lower (0) half of a 256-bit vector, depending on the constant operand.
 - go: Permute
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
-    // Permute performs a full permutation of vector x using indices:
+    // NAME performs a full permutation of vector x using indices:
     // result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
     // Only the needed bits to represent x's index are used in indices' elements.
-
 - go: PermuteMasked
   commutative: false
   masked: true
   extension: "AVX.*"
   documentation: !string |-
-    // PermuteMasked performs a full permutation of vector y using indices:
+    // NAME performs a full permutation of vector y using indices:
     // result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
     // Only the needed bits to represent x's index are used in indices' elements.
-
 - go: Permute2Masked # Permute2Masked is only available on or after AVX512
   commutative: false
   masked: true
   extension: "AVX.*"
   documentation: !string |-
-    // Permute2Masked performs a full permutation of vector x, y using indices:
+    // NAME performs a full permutation of vector x, y using indices:
     // result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
     // where xy is x appending y.
     // Only the needed bits to represent xy's index are used in indices' elements.
-
 - go: Compress
   commutative: false
   # The mask in Compress is a control mask rather than a write mask, so we don't mark it "masked"
   extension: "AVX.*"
   documentation: !string |-
-    // Compress performs a compression on vector x using mask by
+    // NAME performs a compression on vector x using mask by
     // selecting elements as indicated by mask, and pack them to lower indexed elements.
 - go: Mul
   commutative: true
   extension: "AVX.*"
   documentation: !string |-
-    // Mul multiplies corresponding elements of two vectors.
+    // NAME multiplies corresponding elements of two vectors.
 - go: MulEvenWiden
   commutative: true
   extension: "AVX.*"
   documentation: !string |-
-    // MulEvenWiden multiplies even-indexed elements, widening the result.
+    // NAME multiplies even-indexed elements, widening the result.
     // Result[i] = v1.Even[i] * v2.Even[i].
 - go: MulHigh
   commutative: true
   extension: "AVX.*"
   documentation: !string |-
-    // MulHigh multiplies elements and stores the high part of the result.
+    // NAME multiplies elements and stores the high part of the result.
 - go: MulLow
   commutative: true
   extension: "AVX.*"
   documentation: !string |-
-    // MulLow multiplies elements and stores the low part of the result.
+    // NAME multiplies elements and stores the low part of the result.
 - go: MulMasked
   masked: true
   commutative: true
   extension: "AVX.*"
   documentation: !string |-
-    // MulMasked multiplies corresponding elements of two vectors.
+    // NAME multiplies corresponding elements of two vectors.
 - go: MulEvenWidenMasked
   masked: true
   commutative: true
   extension: "AVX.*"
   documentation: !string |-
-    // MulEvenWidenMasked multiplies even-indexed elements, widening the result.
+    // NAME multiplies even-indexed elements, widening the result.
     // Result[i] = v1.Even[i] * v2.Even[i].
 - go: MulHighMasked
   masked: true
   commutative: true
   extension: "AVX.*"
   documentation: !string |-
-    // MulHighMasked multiplies elements and stores the high part of the result.
+    // NAME multiplies elements and stores the high part of the result.
 - go: MulLowMasked
   masked: true
   commutative: true
   extension: "AVX.*"
   documentation: !string |-
-    // MulLowMasked multiplies elements and stores the low part of the result.
+    // NAME multiplies elements and stores the low part of the result.
 - go: ShiftAllLeft
   nameAndSizeCheck: true
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
-    // ShiftAllLeft shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
+    // NAME shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
 - go: ShiftAllLeftMasked
   nameAndSizeCheck: true
   masked: true
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
-    // ShiftAllLeftMasked shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
+    // NAME shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
 - go: ShiftAllRight
   signed: false
   nameAndSizeCheck: true
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
-    // ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed.
+    // NAME shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed.
 - go: ShiftAllRightMasked
   signed: false
   nameAndSizeCheck: true
@@ -665,14 +651,14 @@
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
-    // ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed.
+    // NAME shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed.
 - go: ShiftAllRight
   signed: true
   nameAndSizeCheck: true
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
-    // ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit.
+    // NAME shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit.
 - go: ShiftAllRightMasked
   signed: true
   nameAndSizeCheck: true
@@ -680,28 +666,27 @@
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
-    // ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit.
-
+    // NAME shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit.
 - go: ShiftLeft
   nameAndSizeCheck: true
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
-    // ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
+    // NAME shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
 - go: ShiftLeftMasked
   nameAndSizeCheck: true
   masked: true
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
-    // ShiftLeftMasked shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
+    // NAME shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
 - go: ShiftRight
   signed: false
   nameAndSizeCheck: true
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
-    // ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed.
+    // NAME shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed.
 - go: ShiftRightMasked
   signed: false
   nameAndSizeCheck: true
@@ -709,14 +694,14 @@
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
-    // ShiftRightMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed.
+    // NAME shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed.
 - go: ShiftRight
   signed: true
   nameAndSizeCheck: true
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
-    // ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
+    // NAME shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
 - go: ShiftRightMasked
   signed: true
   nameAndSizeCheck: true
@@ -724,44 +709,42 @@
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
-    // ShiftRightMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
-
+    // NAME shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
 - go: RotateAllLeftMasked
   nameAndSizeCheck: true
   masked: true
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
-    // RotateAllLeftMasked rotates each element to the left by the number of bits specified by the immediate.
+    // NAME rotates each element to the left by the number of bits specified by the immediate.
 - go: RotateLeftMasked
   nameAndSizeCheck: true
   masked: true
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
-    // RotateLeftMasked rotates each element in x to the left by the number of bits specified by y's corresponding elements.
+    // NAME rotates each element in x to the left by the number of bits specified by y's corresponding elements.
 - go: RotateAllRightMasked
   nameAndSizeCheck: true
   masked: true
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
-    // RotateAllRightMasked rotates each element to the right by the number of bits specified by the immediate.
+    // NAME rotates each element to the right by the number of bits specified by the immediate.
 - go: RotateRightMasked
   nameAndSizeCheck: true
   masked: true
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
-    // RotateRightMasked rotates each element in x to the right by the number of bits specified by y's corresponding elements.
-
+    // NAME rotates each element in x to the right by the number of bits specified by y's corresponding elements.
 - go: ShiftAllLeftAndFillUpperFromMasked
   nameAndSizeCheck: true
   masked: true
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
-    // ShiftAllLeftAndFillUpperFromMasked shifts each element of x to the left by the number of bits specified by the
+    // NAME shifts each element of x to the left by the number of bits specified by the
     // immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
 - go: ShiftAllRightAndFillUpperFromMasked
   nameAndSizeCheck: true
@@ -769,7 +752,7 @@
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
-    // ShiftAllRightAndFillUpperFromMasked shifts each element of x to the right by the number of bits specified by the
+    // NAME shifts each element of x to the right by the number of bits specified by the
     // immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
 - go: ShiftLeftAndFillUpperFromMasked
   nameAndSizeCheck: true
@@ -777,7 +760,7 @@
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
-    // ShiftLeftAndFillUpperFromMasked shifts each element of x to the left by the number of bits specified by the
+    // NAME shifts each element of x to the left by the number of bits specified by the
     // corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
 - go: ShiftRightAndFillUpperFromMasked
   nameAndSizeCheck: true
@@ -785,5 +768,5 @@
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
-    // ShiftRightAndFillUpperFromMasked shifts each element of x to the right by the number of bits specified by the
+    // NAME shifts each element of x to the right by the number of bits specified by the
     // corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
diff --git a/internal/simdgen/gen_utility.go b/internal/simdgen/gen_utility.go
index ea4d56a..be0a945 100644
--- a/internal/simdgen/gen_utility.go
+++ b/internal/simdgen/gen_utility.go
@@ -569,8 +569,8 @@
 			}
 			maskedOpName := op2.Go
 			op2.Go = strings.TrimSuffix(op2.Go, "Masked")
-			op2Doc := strings.ReplaceAll(*op2.Documentation, maskedOpName, op2.Go)
-			op2.Documentation = &op2Doc
+			op2Doc := strings.ReplaceAll(op2.Documentation, maskedOpName, op2.Go)
+			op2.Documentation = op2Doc
 			op2.Masked = nil // It's no longer masked.
 			splited = append(splited, op2)
 		} else {
@@ -583,9 +583,7 @@
 func insertMaskDescToDoc(ops []Operation) {
 	for i, _ := range ops {
 		if ops[i].Masked != nil && *ops[i].Masked {
-			if ops[i].Documentation != nil {
-				*ops[i].Documentation += "\n//\n// This operation is applied selectively under a write mask."
-			}
+			ops[i].Documentation += "\n//\n// This operation is applied selectively under a write mask."
 		}
 	}
 }
diff --git a/internal/simdgen/godefs.go b/internal/simdgen/godefs.go
index 32467de..b45c249 100644
--- a/internal/simdgen/godefs.go
+++ b/internal/simdgen/godefs.go
@@ -7,6 +7,7 @@
 import (
 	"fmt"
 	"log"
+	"regexp"
 	"slices"
 	"strconv"
 	"strings"
@@ -16,6 +17,16 @@
 
 type Operation struct {
 	rawOperation
+
+	// Documentation is the doc string for this API.
+	//
+	// It is computed from the raw documentation:
+	//
+	// - "NAME" is replaced by the Go method name.
+	//
+	// - For masked operation, the method name is updated and a sentence about
+	// masking is added.
+	Documentation string
 }
 
 // rawOperation is the unifier representation of an [Operation]. It is
@@ -49,6 +60,15 @@
 	if err := v.Decode(&o.rawOperation); err != nil {
 		return err
 	}
+
+	// Compute doc string.
+	if o.rawOperation.Documentation != nil {
+		o.Documentation = *o.rawOperation.Documentation
+	} else {
+		o.Documentation = "// UNDOCUMENTED"
+	}
+	o.Documentation = regexp.MustCompile(`\bNAME\b`).ReplaceAllString(o.Documentation, o.Go)
+
 	return nil
 }
 
diff --git a/internal/simdgen/ops/AddSub/categories.yaml b/internal/simdgen/ops/AddSub/categories.yaml
index 9bae42e..667508b 100644
--- a/internal/simdgen/ops/AddSub/categories.yaml
+++ b/internal/simdgen/ops/AddSub/categories.yaml
@@ -3,67 +3,67 @@
   commutative: true
   extension: "AVX.*"
   documentation: !string |-
-    // Add adds corresponding elements of two vectors.
+    // NAME adds corresponding elements of two vectors.
 - go: SaturatedAdd
   commutative: true
   extension: "AVX.*"
   documentation: !string |-
-    // SaturatedAdd adds corresponding elements of two vectors with saturation.
+    // NAME adds corresponding elements of two vectors with saturation.
 - go: AddMasked
   masked: true
   commutative: true
   extension: "AVX.*"
   documentation: !string |-
-    // AddMasked adds corresponding elements of two vectors.
+    // NAME adds corresponding elements of two vectors.
 - go: SaturatedAddMasked
   masked: true
   commutative: true
   extension: "AVX.*"
   documentation: !string |-
-    // SaturatedAddMasked adds corresponding elements of two vectors with saturation.
+    // NAME adds corresponding elements of two vectors with saturation.
 - go: Sub
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
-    // Sub subtracts corresponding elements of two vectors.
+    // NAME subtracts corresponding elements of two vectors.
 - go: SaturatedSub
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
-    // SaturatedSub subtracts corresponding elements of two vectors with saturation.
+    // NAME subtracts corresponding elements of two vectors with saturation.
 - go: SubMasked
   masked: true
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
-    // SubMasked subtracts corresponding elements of two vectors.
+    // NAME subtracts corresponding elements of two vectors.
 - go: SaturatedSubMasked
   masked: true
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
-    // SaturatedSubMasked subtracts corresponding elements of two vectors with saturation.
+    // NAME subtracts corresponding elements of two vectors with saturation.
 - go: PairwiseAdd
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
-    // PairwiseAdd horizontally adds adjacent pairs of elements.
+    // NAME horizontally adds adjacent pairs of elements.
     // For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...].
 - go: PairwiseSub
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
-    // PairwiseSub horizontally subtracts adjacent pairs of elements.
+    // NAME horizontally subtracts adjacent pairs of elements.
     // For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0-y1, y2-y3, ..., x0-x1, x2-x3, ...].
 - go: SaturatedPairwiseAdd
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
-    // SaturatedPairwiseAdd horizontally adds adjacent pairs of elements with saturation.
+    // NAME horizontally adds adjacent pairs of elements with saturation.
     // For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...].
 - go: SaturatedPairwiseSub
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
-    // SaturatedPairwiseSub horizontally subtracts adjacent pairs of elements with saturation.
+    // NAME horizontally subtracts adjacent pairs of elements with saturation.
     // For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0-y1, y2-y3, ..., x0-x1, x2-x3, ...].
diff --git a/internal/simdgen/ops/BitwiseLogic/categories.yaml b/internal/simdgen/ops/BitwiseLogic/categories.yaml
index c6a00cc..3d2eda7 100644
--- a/internal/simdgen/ops/BitwiseLogic/categories.yaml
+++ b/internal/simdgen/ops/BitwiseLogic/categories.yaml
@@ -3,45 +3,46 @@
   commutative: true
   extension: "AVX.*"
   documentation: !string |-
-    // And performs a bitwise AND operation between two vectors.
+    // NAME performs a bitwise AND operation between two vectors.
 - go: AndMasked
   masked: true
   commutative: true
   extension: "AVX.*"
   documentation: !string |-
-    // AndMasked performs a bitwise AND operation between two vectors.
+    // NAME performs a bitwise AND operation between two vectors.
 - go: Or
   commutative: true
   extension: "AVX.*"
   documentation: !string |-
-    // Or performs a bitwise OR operation between two vectors.
+    // NAME performs a bitwise OR operation between two vectors.
 - go: OrMasked
   masked: true
   commutative: true
   extension: "AVX.*"
   documentation: !string |-
-    // OrMasked performs a bitwise OR operation between two vectors.
+    // NAME performs a bitwise OR operation between two vectors.
 - go: AndNot
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
-    // AndNot performs a bitwise x &^ y.
+    // NAME performs a bitwise x &^ y.
 - go: AndNotMasked
   masked: true
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
-    // AndNotMasked performs a bitwise x &^ y.
+    // NAME performs a bitwise x &^ y.
 - go: Xor
   commutative: true
   extension: "AVX.*"
   documentation: !string |-
-    // Xor performs a bitwise XOR operation between two vectors.
+    // NAME performs a bitwise XOR operation between two vectors.
 - go: XorMasked
   masked: true
   commutative: true
   extension: "AVX.*"
   documentation: !string |-
-    // XorMasked performs a bitwise XOR operation between two vectors.
+    // NAME performs a bitwise XOR operation between two vectors.
+
 # We also have PTEST and VPTERNLOG, those should be hidden from the users
 # and only appear in rewrite rules.
diff --git a/internal/simdgen/ops/Compares/categories.yaml b/internal/simdgen/ops/Compares/categories.yaml
index d108051..e17e45d 100644
--- a/internal/simdgen/ops/Compares/categories.yaml
+++ b/internal/simdgen/ops/Compares/categories.yaml
@@ -11,90 +11,89 @@
   commutative: true
   extension: "AVX.*"
   documentation: !string |-
-    // Equal compares for equality.
+    // NAME compares for equality.
 - go: Less
   constImm: 1
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
-    // Less compares for less than.
+    // NAME compares for less than.
 - go: LessEqual
   constImm: 2
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
-    // LessEqual compares for less than or equal.
+    // NAME compares for less than or equal.
 - go: IsNan # For float only.
   constImm: 3
   commutative: true
   extension: "AVX.*"
   documentation: !string |-
-    // IsNan checks if elements are NaN. Use as x.IsNan(x).
+    // NAME checks if elements are NaN. Use as x.IsNan(x).
 - go: NotEqual
   constImm: 4
   commutative: true
   extension: "AVX.*"
   documentation: !string |-
-    // NotEqual compares for inequality.
+    // NAME compares for inequality.
 - go: GreaterEqual
   constImm: 13
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
-    // GreaterEqual compares for greater than or equal.
+    // NAME compares for greater than or equal.
 - go: Greater
   constImm: 14
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
-    // Greater compares for greater than.
-
+    // NAME compares for greater than.
 - go: EqualMasked
   constImm: 0
   masked: true
   commutative: true
   extension: "AVX.*"
   documentation: !string |-
-    // EqualMasked compares for equality.
+    // NAME compares for equality.
 - go: LessMasked
   constImm: 1
   masked: true
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
-    // LessMasked compares for less than.
+    // NAME compares for less than.
 - go: LessEqualMasked
   constImm: 2
   masked: true
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
-    // LessEqualMasked compares for less than or equal.
+    // NAME compares for less than or equal.
 - go: IsNanMasked # For float only.
   constImm: 3
   masked: true
   commutative: true
   extension: "AVX.*"
   documentation: !string |-
-    // IsNanMasked checks if elements are NaN. Use as x.IsNan(x).
+    // NAME checks if elements are NaN. Use as x.IsNan(x).
 - go: NotEqualMasked
   constImm: 4
   masked: true
   commutative: true
   extension: "AVX.*"
   documentation: !string |-
-    // NotEqualMasked compares for inequality.
+    // NAME compares for inequality.
 - go: GreaterEqualMasked
   constImm: 13
   masked: true
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
-    // GreaterEqualMasked compares for greater than or equal.
+    // NAME compares for greater than or equal.
 - go: GreaterMasked
   constImm: 14
   masked: true
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
-    // GreaterMasked compares for greater than.
+    // NAME compares for greater than.
diff --git a/internal/simdgen/ops/FPonlyArith/categories.yaml b/internal/simdgen/ops/FPonlyArith/categories.yaml
index 1347b53..5329204 100644
--- a/internal/simdgen/ops/FPonlyArith/categories.yaml
+++ b/internal/simdgen/ops/FPonlyArith/categories.yaml
@@ -3,139 +3,134 @@
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
-    // Div divides elements of two vectors.
+    // NAME divides elements of two vectors.
 - go: DivMasked
   commutative: false
   masked: true
   extension: "AVX.*"
   documentation: !string |-
-    // DivMasked divides elements of two vectors.
+    // NAME divides elements of two vectors.
 - go: Sqrt
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
-    // Sqrt computes the square root of each element.
+    // NAME computes the square root of each element.
 - go: SqrtMasked
   commutative: false
   masked: true
   extension: "AVX.*"
   documentation: !string |-
-    // SqrtMasked computes the square root of each element.
+    // NAME computes the square root of each element.
 - go: ApproximateReciprocal
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
-    // ApproximateReciprocal computes an approximate reciprocal of each element.
+    // NAME computes an approximate reciprocal of each element.
 - go: ApproximateReciprocalMasked
   commutative: false
   masked: true
   extension: "AVX.*"
   documentation: !string |-
-    // ApproximateReciprocalMasked computes an approximate reciprocal of each element.
+    // NAME computes an approximate reciprocal of each element.
 - go: ApproximateReciprocalOfSqrt
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
-    // ApproximateReciprocalOfSqrt computes an approximate reciprocal of the square root of each element.
+    // NAME computes an approximate reciprocal of the square root of each element.
 - go: ApproximateReciprocalOfSqrtMasked
   commutative: false
   masked: true
   extension: "AVX.*"
   documentation: !string |-
-    // ApproximateReciprocalOfSqrtMasked computes an approximate reciprocal of the square root of each element.
+    // NAME computes an approximate reciprocal of the square root of each element.
 - go: MulByPowOf2Masked # This operation is all after AVX512, the unmasked version will be generated.
   commutative: false
   masked: true
   extension: "AVX.*"
   documentation: !string |-
-    // MulByPowOf2Masked multiplies elements by a power of 2.
-
+    // NAME multiplies elements by a power of 2.
 - go: Round
   commutative: false
   extension: "AVX.*"
   constImm: 0
   documentation: !string |-
-    // Round rounds elements to the nearest integer.
+    // NAME rounds elements to the nearest integer.
 - go: RoundWithPrecisionMasked
   commutative: false
   extension: "AVX.*"
   constImm: 0
   masked: true
   documentation: !string |-
-    // RoundWithPrecisionMasked rounds elements with specified precision.
+    // NAME rounds elements with specified precision.
 - go: DiffWithRoundWithPrecisionMasked
   commutative: false
   extension: "AVX.*"
   constImm: 0
   masked: true
   documentation: !string |-
-    // DiffWithRoundWithPrecisionMasked computes the difference after rounding with specified precision.
-
+    // NAME computes the difference after rounding with specified precision.
 - go: Floor
   commutative: false
   extension: "AVX.*"
   constImm: 1
   documentation: !string |-
-    // Floor rounds elements down to the nearest integer.
+    // NAME rounds elements down to the nearest integer.
 - go: FloorWithPrecisionMasked
   commutative: false
   extension: "AVX.*"
   constImm: 1
   masked: true
   documentation: !string |-
-    // FloorWithPrecisionMasked rounds elements down with specified precision.
+    // NAME rounds elements down with specified precision.
 - go: DiffWithFloorWithPrecisionMasked
   commutative: false
   extension: "AVX.*"
   constImm: 1
   masked: true
   documentation: !string |-
-    // DiffWithFloorWithPrecisionMasked computes the difference after flooring with specified precision.
-
+    // NAME computes the difference after flooring with specified precision.
 - go: Ceil
   commutative: false
   extension: "AVX.*"
   constImm: 2
   documentation: !string |-
-    // Ceil rounds elements up to the nearest integer.
+    // NAME rounds elements up to the nearest integer.
 - go: CeilWithPrecisionMasked
   commutative: false
   extension: "AVX.*"
   constImm: 2
   masked: true
   documentation: !string |-
-    // CeilWithPrecisionMasked rounds elements up with specified precision.
+    // NAME rounds elements up with specified precision.
 - go: DiffWithCeilWithPrecisionMasked
   commutative: false
   extension: "AVX.*"
   constImm: 2
   masked: true
   documentation: !string |-
-    // DiffWithCeilWithPrecisionMasked computes the difference after ceiling with specified precision.
-
+    // NAME computes the difference after ceiling with specified precision.
 - go: Trunc
   commutative: false
   extension: "AVX.*"
   constImm: 3
   documentation: !string |-
-    // Trunc truncates elements towards zero.
+    // NAME truncates elements towards zero.
 - go: TruncWithPrecisionMasked
   commutative: false
   extension: "AVX.*"
   constImm: 3
   masked: true
   documentation: !string |-
-    // TruncWithPrecisionMasked truncates elements with specified precision.
+    // NAME truncates elements with specified precision.
 - go: DiffWithTruncWithPrecisionMasked
   commutative: false
   extension: "AVX.*"
   constImm: 3
   masked: true
   documentation: !string |-
-    // DiffWithTruncWithPrecisionMasked computes the difference after truncating with specified precision.
-
+    // NAME computes the difference after truncating with specified precision.
 - go: AddSub
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
-    // AddSub subtracts even elements and adds odd elements of two vectors.
\ No newline at end of file
+    // NAME subtracts even elements and adds odd elements of two vectors.
diff --git a/internal/simdgen/ops/GaloisField/categories.yaml b/internal/simdgen/ops/GaloisField/categories.yaml
index 4184c5e..62d8709 100644
--- a/internal/simdgen/ops/GaloisField/categories.yaml
+++ b/internal/simdgen/ops/GaloisField/categories.yaml
@@ -4,7 +4,7 @@
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
-    // GaloisFieldAffineTransformMasked computes an affine transformation in GF(2^8):
+    // NAME computes an affine transformation in GF(2^8):
     // x is a vector of 8-bit vectors, with each adjacent 8 as a group; y is a vector of 8x8 1-bit matrixes;
     // b is an 8-bit vector. The affine transformation is y * x + b, with each element of y
     // corresponding to a group of 8 elements in x.
@@ -13,7 +13,7 @@
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
-    // GaloisFieldAffineTransformInverseMasked computes an affine transformation in GF(2^8),
+    // NAME computes an affine transformation in GF(2^8),
     // with x inverted with respect to reduction polynomial x^8 + x^4 + x^3 + x + 1:
     // x is a vector of 8-bit vectors, with each adjacent 8 as a group; y is a vector of 8x8 1-bit matrixes;
     // b is an 8-bit vector. The affine transformation is y * x + b, with each element of y
@@ -23,5 +23,5 @@
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
-    // GaloisFieldMulMasked computes element-wise GF(2^8) multiplication with
-    // reduction polynomial x^8 + x^4 + x^3 + x + 1.
\ No newline at end of file
+    // NAME computes element-wise GF(2^8) multiplication with
+    // reduction polynomial x^8 + x^4 + x^3 + x + 1.
diff --git a/internal/simdgen/ops/IntOnlyArith/categories.yaml b/internal/simdgen/ops/IntOnlyArith/categories.yaml
index fc277f8..76ab14b 100644
--- a/internal/simdgen/ops/IntOnlyArith/categories.yaml
+++ b/internal/simdgen/ops/IntOnlyArith/categories.yaml
@@ -3,39 +3,36 @@
   commutative: true
   extension: "AVX.*" # VPAVGB/W are available across various AVX versions
   documentation: !string |-
-    // Average computes the rounded average of corresponding elements.
+    // NAME computes the rounded average of corresponding elements.
 - go: AverageMasked
   commutative: true
   masked: true
   extension: "AVX512.*" # Masked operations are typically AVX512
   documentation: !string |-
-    // AverageMasked computes the rounded average of corresponding elements.
-
+    // NAME computes the rounded average of corresponding elements.
 - go: Absolute
   commutative: false
   # Unary operation, not commutative
   extension: "AVX.*" # VPABSB/W/D are AVX, VPABSQ is AVX512
   documentation: !string |-
-    // Absolute computes the absolute value of each element.
+    // NAME computes the absolute value of each element.
 - go: AbsoluteMasked
   commutative: false
   masked: true
   extension: "AVX512.*"
   documentation: !string |-
-    // AbsoluteMasked computes the absolute value of each element.
-
+    // NAME computes the absolute value of each element.
 - go: Sign
   # Applies sign of second operand to first: sign(val, sign_src)
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
-    // Sign returns the product of the first operand with -1, 0, or 1,
+    // NAME returns the product of the first operand with -1, 0, or 1,
     // whichever constant is nearest to the value of the second operand.
   # Sign does not have masked version
-
 - go: PopCountMasked
   commutative: false
   masked: true
   extension: "AVX512.*" # VPOPCNT instructions are AVX512 (BITALG or VPOPCNTDQ)
   documentation: !string |-
-    // PopCountMasked counts the number of set bits in each element.
\ No newline at end of file
+    // NAME counts the number of set bits in each element.
diff --git a/internal/simdgen/ops/MLOps/categories.yaml b/internal/simdgen/ops/MLOps/categories.yaml
index d26b846..65f7462 100644
--- a/internal/simdgen/ops/MLOps/categories.yaml
+++ b/internal/simdgen/ops/MLOps/categories.yaml
@@ -3,95 +3,94 @@
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
-    // PairDotProd multiplies the elements and add the pairs together,
+    // NAME multiplies the elements and add the pairs together,
     // yielding a vector of half as many elements with twice the input element size.
 - go: PairDotProdMasked
   masked: true
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
-    // PairDotProdMasked multiplies the elements and add the pairs together,
+    // NAME multiplies the elements and add the pairs together,
     // yielding a vector of half as many elements with twice the input element size.
 # TODO: maybe simplify this name within the receiver-type + method-naming scheme we use.
 - go: SaturatedUnsignedSignedPairDotProd
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
-    // SaturatedUnsignedSignedPairDotProd multiplies the elements and add the pairs together with saturation,
+    // NAME multiplies the elements and add the pairs together with saturation,
     // yielding a vector of half as many elements with twice the input element size.
 - go: SaturatedUnsignedSignedPairDotProdMasked
   masked: true
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
-    // SaturatedUnsignedSignedPairDotProdMasked multiplies the elements and add the pairs together with saturation,
+    // NAME multiplies the elements and add the pairs together with saturation,
     // yielding a vector of half as many elements with twice the input element size.
-
 # QuadDotProd, i.e. VPDPBUSD(S) are operations with src/dst on the same register, we are not supporting this as of now.
 - go: DotProdBroadcast
   commutative: true
   extension: "AVX.*"
   documentation: !string |-
-    // DotProdBroadcast multiplies all elements and broadcasts the sum.
+    // NAME multiplies all elements and broadcasts the sum.
 - go: UnsignedSignedQuadDotProdAccumulate
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
-    // UnsignedSignedQuadDotProdAccumulate performs dot products on groups of 4 elements of x and y and then adds z.
+    // NAME performs dot products on groups of 4 elements of x and y and then adds z.
 - go: UnsignedSignedQuadDotProdAccumulateMasked
   masked: true
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
-    // UnsignedSignedQuadDotProdAccumulateMasked performs dot products on groups of 4 elements of x and y and then adds z.
+    // NAME performs dot products on groups of 4 elements of x and y and then adds z.
 - go: SaturatedUnsignedSignedQuadDotProdAccumulate
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
-    // SaturatedUnsignedSignedQuadDotProdAccumulate multiplies performs dot products on groups of 4 elements of x and y and then adds z.
+    // NAME multiplies performs dot products on groups of 4 elements of x and y and then adds z.
 - go: SaturatedUnsignedSignedQuadDotProdAccumulateMasked
   masked: true
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
-    // SaturatedUnsignedSignedQuadDotProdAccumulateMasked multiplies performs dot products on groups of 4 elements of x and y and then adds z.
+    // NAME multiplies performs dot products on groups of 4 elements of x and y and then adds z.
 - go: PairDotProdAccumulate
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
-    // PairDotProdAccumulate performs dot products on pairs of elements of x and y and then adds z.
+    // NAME performs dot products on pairs of elements of x and y and then adds z.
 - go: PairDotProdAccumulateMasked
   masked: true
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
-    // PairDotProdAccumulateMasked performs dot products on pairs of elements of x and y and then adds z.
+    // NAME performs dot products on pairs of elements of x and y and then adds z.
 - go: SaturatedPairDotProdAccumulate
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
-    // SaturatedPairDotProdAccumulate performs dot products on pairs of elements of x and y and then adds z.
+    // NAME performs dot products on pairs of elements of x and y and then adds z.
 - go: SaturatedPairDotProdAccumulateMasked
   masked: true
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
-    // SaturatedPairDotProdAccumulateMasked performs dot products on pairs of elements of x and y and then adds z.
+    // NAME performs dot products on pairs of elements of x and y and then adds z.
 - go: FusedMultiplyAddMasked
   masked: true
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
-    // FusedMultiplyAddMasked performs (x * y) + z.
+    // NAME performs (x * y) + z.
 - go: FusedMultiplyAddSubMasked
   masked: true
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
-    // FusedMultiplyAddSubMasked performs (x * y) - z for odd-indexed elements, and (x * y) + z for even-indexed elements.
+    // NAME performs (x * y) - z for odd-indexed elements, and (x * y) + z for even-indexed elements.
 - go: FusedMultiplySubAddMasked
   masked: true
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
-    // FusedMultiplySubAddMasked performs (x * y) + z for odd-indexed elements, and (x * y) - z for even-indexed elements.
+    // NAME performs (x * y) + z for odd-indexed elements, and (x * y) - z for even-indexed elements.
diff --git a/internal/simdgen/ops/MinMax/categories.yaml b/internal/simdgen/ops/MinMax/categories.yaml
index 929bfad..ce87994 100644
--- a/internal/simdgen/ops/MinMax/categories.yaml
+++ b/internal/simdgen/ops/MinMax/categories.yaml
@@ -3,21 +3,21 @@
   commutative: true
   extension: "AVX.*"
   documentation: !string |-
-    // Max computes the maximum of corresponding elements.
+    // NAME computes the maximum of corresponding elements.
 - go: MaxMasked
   commutative: true
   masked: true
   extension: "AVX.*"
   documentation: !string |-
-    // MaxMasked computes the maximum of corresponding elements.
+    // NAME computes the maximum of corresponding elements.
 - go: Min
   commutative: true
   extension: "AVX.*"
   documentation: !string |-
-    // Min computes the minimum of corresponding elements.
+    // NAME computes the minimum of corresponding elements.
 - go: MinMasked
   commutative: true
   masked: true
   extension: "AVX.*"
   documentation: !string |-
-    // MinMasked computes the minimum of corresponding elements.
+    // NAME computes the minimum of corresponding elements.
diff --git a/internal/simdgen/ops/Moves/categories.yaml b/internal/simdgen/ops/Moves/categories.yaml
index d6c4d5d..dd30ca8 100644
--- a/internal/simdgen/ops/Moves/categories.yaml
+++ b/internal/simdgen/ops/Moves/categories.yaml
@@ -3,55 +3,50 @@
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
-    // SetElem sets a single constant-indexed element's value.
+    // NAME sets a single constant-indexed element's value.
 - go: GetElem
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
-    // GetElem retrieves a single constant-indexed element's value.
+    // NAME retrieves a single constant-indexed element's value.
 - go: Set128
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
-    // Set128 combines a 128-bit vector with a 256-bit vector, where the constant operand specifies whether the low (0) or high (1) half is receives the smaller vector.
+    // NAME combines a 128-bit vector with a 256-bit vector, where the constant operand specifies whether the low (0) or high (1) half is receives the smaller vector.
 - go: Get128
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
-    // Get128 retrieves the upper (1) or lower (0) half of a 256-bit vector, depending on the constant operand.
-
-
+    // NAME retrieves the upper (1) or lower (0) half of a 256-bit vector, depending on the constant operand.
 - go: Permute
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
-    // Permute performs a full permutation of vector x using indices:
+    // NAME performs a full permutation of vector x using indices:
     // result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
     // Only the needed bits to represent x's index are used in indices' elements.
-
 - go: PermuteMasked
   commutative: false
   masked: true
   extension: "AVX.*"
   documentation: !string |-
-    // PermuteMasked performs a full permutation of vector y using indices:
+    // NAME performs a full permutation of vector y using indices:
     // result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
     // Only the needed bits to represent x's index are used in indices' elements.
-
 - go: Permute2Masked # Permute2Masked is only available on or after AVX512
   commutative: false
   masked: true
   extension: "AVX.*"
   documentation: !string |-
-    // Permute2Masked performs a full permutation of vector x, y using indices:
+    // NAME performs a full permutation of vector x, y using indices:
     // result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
     // where xy is x appending y.
     // Only the needed bits to represent xy's index are used in indices' elements.
-
 - go: Compress
   commutative: false
   # The mask in Compress is a control mask rather than a write mask, so we don't mark it "masked"
   extension: "AVX.*"
   documentation: !string |-
-    // Compress performs a compression on vector x using mask by
+    // NAME performs a compression on vector x using mask by
     // selecting elements as indicated by mask, and pack them to lower indexed elements.
diff --git a/internal/simdgen/ops/Mul/categories.yaml b/internal/simdgen/ops/Mul/categories.yaml
index 1884d66..8dc51f4 100644
--- a/internal/simdgen/ops/Mul/categories.yaml
+++ b/internal/simdgen/ops/Mul/categories.yaml
@@ -3,45 +3,45 @@
   commutative: true
   extension: "AVX.*"
   documentation: !string |-
-    // Mul multiplies corresponding elements of two vectors.
+    // NAME multiplies corresponding elements of two vectors.
 - go: MulEvenWiden
   commutative: true
   extension: "AVX.*"
   documentation: !string |-
-    // MulEvenWiden multiplies even-indexed elements, widening the result.
+    // NAME multiplies even-indexed elements, widening the result.
     // Result[i] = v1.Even[i] * v2.Even[i].
 - go: MulHigh
   commutative: true
   extension: "AVX.*"
   documentation: !string |-
-    // MulHigh multiplies elements and stores the high part of the result.
+    // NAME multiplies elements and stores the high part of the result.
 - go: MulLow
   commutative: true
   extension: "AVX.*"
   documentation: !string |-
-    // MulLow multiplies elements and stores the low part of the result.
+    // NAME multiplies elements and stores the low part of the result.
 - go: MulMasked
   masked: true
   commutative: true
   extension: "AVX.*"
   documentation: !string |-
-    // MulMasked multiplies corresponding elements of two vectors.
+    // NAME multiplies corresponding elements of two vectors.
 - go: MulEvenWidenMasked
   masked: true
   commutative: true
   extension: "AVX.*"
   documentation: !string |-
-    // MulEvenWidenMasked multiplies even-indexed elements, widening the result.
+    // NAME multiplies even-indexed elements, widening the result.
     // Result[i] = v1.Even[i] * v2.Even[i].
 - go: MulHighMasked
   masked: true
   commutative: true
   extension: "AVX.*"
   documentation: !string |-
-    // MulHighMasked multiplies elements and stores the high part of the result.
+    // NAME multiplies elements and stores the high part of the result.
 - go: MulLowMasked
   masked: true
   commutative: true
   extension: "AVX.*"
   documentation: !string |-
-    // MulLowMasked multiplies elements and stores the low part of the result.
+    // NAME multiplies elements and stores the low part of the result.
diff --git a/internal/simdgen/ops/ShiftRotate/categories.yaml b/internal/simdgen/ops/ShiftRotate/categories.yaml
index b9e2a63..71e7825 100644
--- a/internal/simdgen/ops/ShiftRotate/categories.yaml
+++ b/internal/simdgen/ops/ShiftRotate/categories.yaml
@@ -4,21 +4,21 @@
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
-    // ShiftAllLeft shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
+    // NAME shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
 - go: ShiftAllLeftMasked
   nameAndSizeCheck: true
   masked: true
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
-    // ShiftAllLeftMasked shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
+    // NAME shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
 - go: ShiftAllRight
   signed: false
   nameAndSizeCheck: true
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
-    // ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed.
+    // NAME shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed.
 - go: ShiftAllRightMasked
   signed: false
   nameAndSizeCheck: true
@@ -26,14 +26,14 @@
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
-    // ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed.
+    // NAME shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed.
 - go: ShiftAllRight
   signed: true
   nameAndSizeCheck: true
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
-    // ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit.
+    // NAME shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit.
 - go: ShiftAllRightMasked
   signed: true
   nameAndSizeCheck: true
@@ -41,28 +41,27 @@
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
-    // ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit.
-
+    // NAME shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit.
 - go: ShiftLeft
   nameAndSizeCheck: true
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
-    // ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
+    // NAME shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
 - go: ShiftLeftMasked
   nameAndSizeCheck: true
   masked: true
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
-    // ShiftLeftMasked shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
+    // NAME shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
 - go: ShiftRight
   signed: false
   nameAndSizeCheck: true
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
-    // ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed.
+    // NAME shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed.
 - go: ShiftRightMasked
   signed: false
   nameAndSizeCheck: true
@@ -70,14 +69,14 @@
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
-    // ShiftRightMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed.
+    // NAME shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed.
 - go: ShiftRight
   signed: true
   nameAndSizeCheck: true
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
-    // ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
+    // NAME shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
 - go: ShiftRightMasked
   signed: true
   nameAndSizeCheck: true
@@ -85,44 +84,42 @@
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
-    // ShiftRightMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
-
+    // NAME shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
 - go: RotateAllLeftMasked
   nameAndSizeCheck: true
   masked: true
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
-    // RotateAllLeftMasked rotates each element to the left by the number of bits specified by the immediate.
+    // NAME rotates each element to the left by the number of bits specified by the immediate.
 - go: RotateLeftMasked
   nameAndSizeCheck: true
   masked: true
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
-    // RotateLeftMasked rotates each element in x to the left by the number of bits specified by y's corresponding elements.
+    // NAME rotates each element in x to the left by the number of bits specified by y's corresponding elements.
 - go: RotateAllRightMasked
   nameAndSizeCheck: true
   masked: true
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
-    // RotateAllRightMasked rotates each element to the right by the number of bits specified by the immediate.
+    // NAME rotates each element to the right by the number of bits specified by the immediate.
 - go: RotateRightMasked
   nameAndSizeCheck: true
   masked: true
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
-    // RotateRightMasked rotates each element in x to the right by the number of bits specified by y's corresponding elements.
-
+    // NAME rotates each element in x to the right by the number of bits specified by y's corresponding elements.
 - go: ShiftAllLeftAndFillUpperFromMasked
   nameAndSizeCheck: true
   masked: true
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
-    // ShiftAllLeftAndFillUpperFromMasked shifts each element of x to the left by the number of bits specified by the
+    // NAME shifts each element of x to the left by the number of bits specified by the
     // immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
 - go: ShiftAllRightAndFillUpperFromMasked
   nameAndSizeCheck: true
@@ -130,7 +127,7 @@
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
-    // ShiftAllRightAndFillUpperFromMasked shifts each element of x to the right by the number of bits specified by the
+    // NAME shifts each element of x to the right by the number of bits specified by the
     // immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
 - go: ShiftLeftAndFillUpperFromMasked
   nameAndSizeCheck: true
@@ -138,7 +135,7 @@
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
-    // ShiftLeftAndFillUpperFromMasked shifts each element of x to the left by the number of bits specified by the
+    // NAME shifts each element of x to the left by the number of bits specified by the
     // corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
 - go: ShiftRightAndFillUpperFromMasked
   nameAndSizeCheck: true
@@ -146,5 +143,5 @@
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
-    // ShiftRightAndFillUpperFromMasked shifts each element of x to the right by the number of bits specified by the
+    // NAME shifts each element of x to the right by the number of bits specified by the
     // corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.