internal/simdgen/go.yaml - arch - Git at Google

 !sum
 # TODO: remove the "Const Immediate" from the documentation field, it's there only for debug purposes.
 # Add
 - go: Add
   asm: "VPADD[BWDQ]|VADDP[SD]"
   in:
   - &any
     go: $t
   - *any
   out:
   - *any
 - go: MaskedAdd
   asm: "VPADD[BWDQ]|VADDP[SD]"
   in:
   - class: mask
   - *any
   - *any
   out:
   - *any
 # Saturated Add
 - go: SaturatedAdd
   asm: "VPADDS[BWDQ]"
   in:
   - &int
     go: $t
     base: int
   - *int
   out:
   - *int
 - go: SaturatedAdd
   asm: "VPADDS[BWDQ]"
   in:
   - &uint
     go: $t
     base: uint
   - *uint
   out:
   - *uint
 - go: MaskedSaturatedAdd
   asm: "VPADDS[BWDQ]"
   in:
   - class: mask
   - *int
   - *int
   out:
   - *int
 - go: MaskedSaturatedAdd
   asm: "VPADDS[BWDQ]"
   in:
   - class: mask
   - *uint
   - *uint
   out:
   - *uint

 # Sub
 - go: Sub
   asm: "VPSUB[BWDQ]|VSUBP[SD]"
   in: &2any
   - *any
   - *any
   out: &1any
   - *any
 - go: MaskedSub
   asm: "VPSUB[BWDQ]|VSUBP[SD]"
   in:
   - class: mask
   - *any
   - *any
   out:
   - *any
 # Saturated Sub
 - go: SaturatedSub
   asm: "VPSUBS[BWDQ]"
   in: &2int
   - *int
   - *int
   out: &1int
   - *int
 - go: SaturatedSub
   asm: "VPSUBS[BWDQ]"
   in:
   - *uint
   - *uint
   out:
   - *uint
 - go: MaskedSaturatedSub
   asm: "VPSUBS[BWDQ]"
   in:
   - class: mask
   - *int
   - *int
   out:
   - *int
 - go: MaskedSaturatedSub
   asm: "VPSUBS[BWDQ]"
   in:
   - class: mask
   - *uint
   - *uint
   out:
   - *uint
 - go: PairwiseAdd
   asm: "VPHADD[DW]"
   in: *2any
   out: *1any
 - go: PairwiseSub
   asm: "VPHSUB[DW]"
   in: *2any
   out: *1any
 - go: PairwiseAdd
   asm: "VHADDP[SD]" # floats
   in: *2any
   out: *1any
 - go: PairwiseSub
   asm: "VHSUBP[SD]"  # floats
   in: *2any
   out: *1any
 - go: SaturatedPairwiseAdd
   asm: "VPHADDS[DW]"
   in: *2int
   out: *1int
 - go: SaturatedPairwiseSub
   asm: "VPHSUBS[DW]"
   in: *2int
   out: *1int
 # In the XED data, *all* floating point bitwise logic operation has their
 # operand type marked as uint. We are not trying to understand why Intel
 # decided that they want FP bit-wise logic operations, but this irregularity
 # has to be dealed with in separate rules with some overwrites.

 # Int/Uint operations.
 # Non-masked for 128/256-bit vectors
 # For binary operations, we constrain their two inputs and one output to the
 # same Go type using a variable. This will map to instructions before AVX512.
 - go: And
   asm: "VPAND"
   in:
   - &any
     go: $t
   - *any
   out:
   - *any
 # Masked
 # Looks like VPAND$xi works only for 2 shapes for integer:
 # Dword and Qword.
 # TODO: should we wildcard other smaller elemBits to VPANDQ or
 # VPANDD? Looks like elemBits doesn't really matter afterall in bitwise operations.
 - go: MaskedAnd
   asm: "VPAND[DQ]"
   in:
   - class: mask
   - *any
   - *any
   out:
   - *any

 - go: AndNot
   asm: "VPANDN"
   in:
   - *any
   - *any
   out:
   - *any
 - go: MaskedAndNot
   asm: "VPANDN[DQ]"
   in:
   - class: mask
   - *any
   - *any
   out:
   - *any

 - go: Or
   asm: "VPOR"
   in:
   - *any
   - *any
   out:
   - *any
 - go: MaskedOr
   asm: "VPOR[DQ]"
   in:
   - class: mask
   - *any
   - *any
   out:
   - *any

 - go: Xor
   asm: "VPXOR"
   in:
   - *any
   - *any
   out:
   - *any
 - go: MaskedXor
   asm: "VPXOR[DQ]"
   in:
   - class: mask
   - *any
   - *any
   out:
   - *any

 # FP operations.
 # Set the [base] to be "int" to not include duplicates(excluding "uint").
 # [base] is not used when [overwriteBase] is present.
 - go: And
   asm: "VANDP[SD]"
   in:
   - &intToFloat
     go: $t
     base: int
     overwriteBase: float
   - *intToFloat
   out:
   - *intToFloat
 - go: MaskedAnd
   asm: "VANDP[SD]"
   in:
   - class: mask
   - *intToFloat
   - *intToFloat
   out:
   - *intToFloat

 - go: AndNot
   asm: "VANDNP[SD]"
   in:
   - *intToFloat
   - *intToFloat
   out:
   - *intToFloat
 - go: MaskedAndNot
   asm: "VANDNP[SD]"
   in:
   - class: mask
   - *intToFloat
   - *intToFloat
   out:
   - *intToFloat

 - go: Or
   asm: "VORP[SD]"
   in:
   - *intToFloat
   - *intToFloat
   out:
   - *intToFloat
 - go: MaskedOr
   asm: "VORP[SD]"
   in:
   - class: mask
   - *intToFloat
   - *intToFloat
   out:
   - *intToFloat

 - go: Xor
   asm: "VXORP[SD]"
   in:
   - *intToFloat
   - *intToFloat
   out:
   - *intToFloat
 - go: MaskedXor
   asm: "VXORP[SD]"
   in:
   - class: mask
   - *intToFloat
   - *intToFloat
   out:
   - *intToFloat
 # Ints
 - go: Equal
   asm: "V?PCMPEQ[BWDQ]"
   in: &int2
   - &int
     go: $t
     base: int # Looks like PCMP is on signed integers - but for equals does it really matters?
   - *int
   out:
   - &anyvregToMask
     go: $t # We still need the output to be the same shape as inputs.
     overwriteBase: int
     overwriteClass: mask
 - go: Greater
   asm: "V?PCMPGT[BWDQ]"
   in: *int2
   out:
   - *anyvregToMask
 - go: MaskedEqual
   asm: "V?PCMPEQ[BWDQ]"
   in: &maskint2
   - class: mask
   - *int
   - *int
   out:
   - class: mask
 - go: MaskedGreater
   asm: "V?PCMPGT[BWDQ]"
   in: *maskint2
   out:
   - class: mask
 # The const imm predicated compares after AVX512, please see categories.yaml
 # for const imm specification.
 - go: Masked(Equal|Greater|Less|LessEqual|GreaterEqual|NotEqual)
   asm: "VPCMP[BWDQ]"
   in:
   - class: mask
   - &int
     go: $t
     base: int
   - *int
   - class: immediate
     const: 0 # Just a placeholder, will be overwritten by const imm porting.
   out:
   - class: mask
 - go: Masked(Equal|Greater|Less|LessEqual|GreaterEqual|NotEqual)
   asm: "VPCMPU[BWDQ]"
   in:
   - class: mask
   - &uint
     go: $t
     base: uint
   - *uint
   - class: immediate
     const: 0
   out:
   - class: mask

 # Floats
 - go: Equal|Greater|Less|LessEqual|GreaterEqual|NotEqual|IsNan
   asm: "VCMPP[SD]"
   in:
   - &float
     go: $t
     base: float
   - *float
   - class: immediate
     const: 0
   out:
   - go: $t # We still need the output to be the same shape as inputs.
     overwriteBase: int
     overwriteClass: mask
 - go: Masked(Equal|Greater|Less|LessEqual|GreaterEqual|NotEqual|IsNan)
   asm: "VCMPP[SD]"
   in:
   - class: mask
   - *float
   - *float
   - class: immediate
     const: 0
   out:
   - class: mask
 - go: Div
   asm: "V?DIVP[SD]"
   in: &2fp
   - &fp
     go: $t
     base: float
   - *fp
   out: &1fp
   - *fp
 - go: MaskedDiv
   asm: "V?DIVP[SD]"
   in: &1mask2fp
   - class: mask
   - *fp
   - *fp
   out: *1fp
 - go: Sqrt
   asm: "V?SQRTP[SD]"
   in: *1fp
   out: *1fp
 - go: MaskedSqrt
   asm: "V?SQRTP[SD]"
   in: &1mask1fp
   - class: mask
   - *fp
   out: *1fp
 - go: MaskedApproximateReciprocal
   asm: "VRCP14P[SD]"
   in: *1mask1fp
   out: *1fp
 - go: ApproximateReciprocalOfSqrt
   asm: "V?RSQRTPS"
   in: *1fp
   out: *1fp
 - go: MaskedApproximateReciprocalOfSqrt
   asm: "VRSQRT14P[SD]"
   in: *1mask1fp
   out: *1fp
 - go: MaskedMulByPowOf2
   asm: "VSCALEFP[SD]"
   in: *1mask2fp
   out: *1fp

 - go: "Round|Ceil|Floor|Trunc"
   asm: "VROUNDP[SD]"
   in:
   - *fp
   - class: immediate
     const: 0 # place holder
   out: *1fp

 - go: "Masked(Round|Ceil|Floor|Trunc)(SuppressException)?WithPrecision"
   asm: "VRNDSCALEP[SD]"
   in:
   - class: mask
   - *fp
   - class: immediate
     const: 0 # place holder
     immOffset: 4 # "M", round to numbers with M digits after dot(by means of binary number).
   out: *1fp
 - go: "MaskedDiffWith(Round|Ceil|Floor|Trunc)(SuppressException)?WithPrecision"
   asm: "VREDUCEP[SD]"
   in:
   - class: mask
   - *fp
   - class: immediate
     const: 0 # place holder
     immOffset: 4 # "M", round to numbers with M digits after dot(by means of binary number).
   out: *1fp

 - go: "AddSub"
   asm: "VADDSUBP[SD]"
   in:
   - *fp
   - *fp
   out:
   - *fp
 - go: MaskedGaloisFieldAffineTransform
   asm: VGF2P8AFFINEQB
   in: &AffineArgs
   - class: mask
   - &uint8
     go: $t
     base: uint
   - &uint8x8
     go: $t2
     base: uint
   - &pureImmVar
     class: immediate
     immOffset: 0
   out:
   - *uint8

 - go: MaskedGaloisFieldAffineTransformInversed
   asm: VGF2P8AFFINEINVQB
   in: *AffineArgs
   out:
   - *uint8

 - go: MaskedGaloisFieldMul
   asm: VGF2P8MULB
   in:
   - class: mask
   - *uint8
   - *uint8
   out:
   - *uint8
 # Average (unsigned byte, unsigned word)
 # Instructions: VPAVGB, VPAVGW
 - go: Average
   asm: "VPAVG[BW]" # Matches VPAVGB (byte) and VPAVGW (word)
   in:
   - &uint_t # $t will be Uint8xN for VPAVGB, Uint16xN for VPAVGW
     go: $t
     base: uint
   - *uint_t
   out:
   - *uint_t
 - go: MaskedAverage
   asm: "VPAVG[BW]"
   in:
   - class: mask
   - *uint_t
   - *uint_t
   out:
   - *uint_t

 # Absolute Value (signed byte, word, dword, qword)
 # Instructions: VPABSB, VPABSW, VPABSD, VPABSQ
 - go: Absolute
   asm: "VPABS[BWDQ]" # Matches VPABSB, VPABSW, VPABSD, VPABSQ
   in:
   - &int_t # $t will be Int8xN, Int16xN, Int32xN, Int64xN
     go: $t
     base: int
   out:
   - *int_t # Output is magnitude, fits in the same signed type
 - go: MaskedAbsolute
   asm: "VPABS[BWDQ]"
   in:
   - class: mask
   - *int_t
   out:
   - *int_t

 # Sign Operation (signed byte, word, dword)
 # Applies sign of second operand to the first.
 # Instructions: VPSIGNB, VPSIGNW, VPSIGND
 - go: Sign
   asm: "VPSIGN[BWD]" # Matches VPSIGNB, VPSIGNW, VPSIGND
   in:
   - *int_t # value to apply sign to
   - *int_t # value from which to take the sign
   out:
   - *int_t

 # Population Count (count set bits in each element)
 # Instructions: VPOPCNTB, VPOPCNTW (AVX512_BITALG)
 #               VPOPCNTD, VPOPCNTQ (AVX512_VPOPCNTDQ)
 - go: MaskedPopCount
   asm: "VPOPCNT[BWDQ]"
   in:
   - class: mask
   - &any
     go: $t
   out:
   - *any
 - go: PairDotProd
   asm: VPMADDWD
   in:
   - &int
     go: $t
     base: int
   - *int
   out:
   - &int2 # The elemBits are different
     go: $t2
     base: int
 - go: MaskedPairDotProd
   asm: VPMADDWD
   in:
   - class: mask
   - *int
   - *int
   out:
   - *int2
 - go: SaturatedUnsignedSignedPairDotProd
   asm: VPMADDUBSW
   in:
   - &uint
     go: $t
     base: uint
   - &int3
     go: $t3
     base: int
   out:
   - *int2
 - go: MaskedSaturatedUnsignedSignedPairDotProd
   asm: VPMADDUBSW
   in:
   - class: mask
   - go: $t1
     base: uint
     overwriteElementBits: 8
   - go: $t2
     base: int
     overwriteElementBits: 8
   out:
   - *int3
 - go: DotProdBroadcast
   asm: VDPPD
   in:
   - &dpb_src
     go: $t
     base: float
     elemBits: 64
     bits: $bits
   - *dpb_src
   - class: immediate
     const: 127
   out:
   - *dpb_src
 - go: UnsignedSignedQuadDotProdAccumulate
   asm: "VPDPBUSD"
   in:
   - &qdpa_acc
     go: $t_acc
     elemBits: 32
   - &qdpa_src1
     go: $t_src1
     base: uint
     overwriteElementBits: 8
   - &qdpa_src2
     go: $t_src2
     base: int
     overwriteElementBits: 8
   out:
   - *qdpa_acc
 - go: MaskedUnsignedSignedQuadDotProdAccumulate
   asm: "VPDPBUSD"
   in:
   - *qdpa_acc
   - class: mask
   - *qdpa_src1
   - *qdpa_src2
   out:
   - *qdpa_acc
 - go: SaturatedUnsignedSignedQuadDotProdAccumulate
   asm: "VPDPBUSDS"
   in:
   - *qdpa_acc
   - *qdpa_src1
   - *qdpa_src2
   out:
   - *qdpa_acc
 - go: MaskedSaturatedUnsignedSignedQuadDotProdAccumulate
   asm: "VPDPBUSDS"
   in:
   - *qdpa_acc
   - class: mask
   - *qdpa_src1
   - *qdpa_src2
   out:
   - *qdpa_acc
 - go: PairDotProdAccumulate
   asm: "VPDPWSSD"
   in:
   - &pdpa_acc
     go: $t_acc
     base: int
     elemBits: 32
   - &pdpa_src1
     go: $t_src1
     base: int
     overwriteElementBits: 16
   - &pdpa_src2
     go: $t_src2
     base: int
     overwriteElementBits: 16
   out:
   - *pdpa_acc
 - go: MaskedPairDotProdAccumulate
   asm: "VPDPWSSD"
   in:
   - *pdpa_acc
   - class: mask
   - *pdpa_src1
   - *pdpa_src2
   out:
   - *pdpa_acc
 - go: SaturatedPairDotProdAccumulate
   asm: "VPDPWSSDS"
   in:
   - *pdpa_acc
   - *pdpa_src1
   - *pdpa_src2
   out:
   - *pdpa_acc
 - go: MaskedSaturatedPairDotProdAccumulate
   asm: "VPDPWSSDS"
   in:
   - *pdpa_acc
   - class: mask
   - *pdpa_src1
   - *pdpa_src2
   out:
   - *pdpa_acc
 - go: MaskedFusedMultiplyAdd
   asm: "VFMADD213PS|VFMADD213PD"
   in:
   - &fma_op
    go: $t
    base: float
   - class: mask
   - *fma_op
   - *fma_op
   out:
   - *fma_op
 - go: MaskedFusedMultiplyAddSub
   asm: "VFMADDSUB213PS|VFMADDSUB213PD"
   in:
   - *fma_op
   - class: mask
   - *fma_op
   - *fma_op
   out:
   - *fma_op
 - go: MaskedFusedMultiplySubAdd
   asm: "VFMSUBADD213PS|VFMSUBADD213PD"
   in:
   - *fma_op
   - class: mask
   - *fma_op
   - *fma_op
   out:
   - *fma_op
 - go: Max
   asm: "V?PMAXS[BWDQ]"
   in: &2int
   - &int
     go: $t
     base: int
   - *int
   out: &1int
   - *int
 - go: Max
   asm: "V?PMAXU[BWDQ]"
   in: &2uint
   - &uint
     go: $t
     base: uint
   - *uint
   out: &1uint
   - *uint
 - go: MaskedMax
   asm: "V?PMAXS[BWDQ]"
   in: &1mask2int
   - class: mask
   - *int
   - *int
   out: *1int
 - go: MaskedMax
   asm: "V?PMAXU[BWDQ]"
   in: &1mask2uint
   - class: mask
   - *uint
   - *uint
   out: *1uint

 - go: Min
   asm: "V?PMINS[BWDQ]"
   in: *2int
   out: *1int
 - go: Min
   asm: "V?PMINU[BWDQ]"
   in: *2uint
   out: *1uint
 - go: MaskedMin
   asm: "V?PMINS[BWDQ]"
   in: *1mask2int
   out: *1int
 - go: MaskedMin
   asm: "V?PMINU[BWDQ]"
   in: *1mask2uint
   out: *1uint

 - go: Max
   asm: "V?MAXP[SD]"
   in: &2float
   - &float
     go: $t
     base: float
   - *float
   out: &1float
   - *float
 - go: MaskedMax
   asm: "V?MAXP[SD]"
   in: &1mask2float
   - class: mask
   - *float
   - *float
   out: *1float
 - go: Min
   asm: "V?MINP[SD]"
   in: *2float
   out: *1float
 - go: MaskedMin
   asm: "V?MINP[SD]"
   in: *1mask2float
   out: *1float
 - go: SetElem
   asm: "VPINSR[BWDQ]"
   in:
   - &t
     class: vreg
     base: $b
   - class: greg
     base: $b
     lanes: 1 # Scalar, darn it!
   - class: immediate
     immOffset: 0
   out:
   - *t
 # "Normal" multiplication is only available for floats.
 # This only covers the single and double precision.
 - go: Mul
   asm: "VMULP[SD]"
   in:
   - &fp
     go: $t
     base: float
   - *fp
   out:
   - *fp
 - go: MaskedMul
   asm: "VMULP[SD]"
   in:
   - class: mask
   - *fp
   - *fp
   out:
   - *fp

 # Integer multiplications.

 # MulEvenWiden
 # Dword only.
 - go: MulEvenWiden
   asm: "VPMULDQ"
   in:
   - &int
     go: $t
     base: int
   - *int
   out:
   - &int2
     go: $t2
     base: int
 - go: MulEvenWiden
   asm: "VPMULUDQ"
   in:
   - &uint
     go: $t
     base: uint
   - *uint
   out:
   - &uint2
     go: $t2
     base: uint
 - go: MaskedMulEvenWiden
   asm: "VPMULDQ"
   in:
   - class: mask
   - *int
   - *int
   out:
   - *int2
 - go: MaskedMulEvenWiden
   asm: "VPMULUDQ"
   in:
   - class: mask
   - *uint
   - *uint
   out:
   - *uint2

 # MulHigh
 # Word only.
 # Non-masked
 - go: MulHigh
   asm: "VPMULHW"
   in:
   - *int
   - *int
   out:
   - *int2
 - go: MulHigh
   asm: "VPMULHUW"
   in:
   - *uint
   - *uint
   out:
   - *uint2
 - go: MaskedMulHigh
   asm: "VPMULHW"
   in:
   - class: mask
   - *int
   - *int
   out:
   - *int2
 - go: MaskedMulHigh
   asm: "VPMULHUW"
   in:
   - class: mask
   - *uint
   - *uint
   out:
   - *uint2

 # MulLow
 # Signed int only.
 # Non-masked
 - go: MulLow
   asm: "VPMULL[WDQ]"
   in:
   - *int
   - *int
   out:
   - *int2
 - go: MaskedMulLow
   asm: "VPMULL[WDQ]"
   in:
   - class: mask
   - *int
   - *int
   out:
   - *int2
 # Integers
 # ShiftAll*
 - go: ShiftAllLeft
   asm: "VPSLL[WDQ]"
   in:
   - &any
     go: $t
   - &vecAsScalar64
     treatLikeAScalarOfSize: 64
     go: Uint64x2
   out:
   - *any
 - go: MaskedShiftAllLeft
   asm: "VPSLL[WDQ]"
   in:
   - class: mask
   - *any
   - *vecAsScalar64
   out:
   - *any
 - go: ShiftAllRight
   asm: "VPSRL[WDQ]"
   in:
   - *any
   - *vecAsScalar64
   out:
   - *any
 - go: MaskedShiftAllRight
   asm: "VPSRL[WDQ]"
   in:
   - class: mask
   - *any
   - *vecAsScalar64
   out:
   - *any
 - go: ShiftAllRightSignExtended
   asm: "VPSRA[WDQ]"
   in:
   - &int
     go: $t
     base: int
   - *vecAsScalar64
   out:
   - *int
 - go: MaskedShiftAllRightSignExtended
   asm: "VPSRA[WDQ]"
   in:
   - class: mask
   - *int
   - *vecAsScalar64
   out:
   - *int

 # Shift* (variable)
 - go: ShiftLeft
   asm: "VPSLLV[WD]"
   in:
   - *any
   - *any
   out:
   - *any
 - go: MaskedShiftLeft
   asm: "VPSLLV[WD]"
   in:
   - class: mask
   - *any
   - *any
   out:
   - *any
 # XED data of VPSLLVQ marks the element bits 32 which is off to the actual semantic, we need to overwrite
 # it to 64.
 - go: ShiftLeft
   asm: "VPSLLVQ"
   in:
   - &anyOverwriteElemBits
     go: $t
     overwriteElementBits: 64
   - *anyOverwriteElemBits
   out:
   - *anyOverwriteElemBits
 - go: MaskedShiftLeft
   asm: "VPSLLVQ"
   in:
   - class: mask
   - *anyOverwriteElemBits
   - *anyOverwriteElemBits
   out:
   - *anyOverwriteElemBits
 - go: ShiftRight
   asm: "VPSRLV[WD]"
   in:
   - *any
   - *any
   out:
   - *any
 - go: MaskedShiftRight
   asm: "VPSRLV[WD]"
   in:
   - class: mask
   - *any
   - *any
   out:
   - *any
 # XED data of VPSRLVQ needs the same overwrite as VPSLLVQ.
 - go: ShiftRight
   asm: "VPSRLVQ"
   in:
   - *anyOverwriteElemBits
   - *anyOverwriteElemBits
   out:
   - *anyOverwriteElemBits
 - go: MaskedShiftRight
   asm: "VPSRLVQ"
   in:
   - class: mask
   - *anyOverwriteElemBits
   - *anyOverwriteElemBits
   out:
   - *anyOverwriteElemBits
 - go: ShiftRightSignExtended
   asm: "VPSRAV[WDQ]"
   in:
   - *any
   - *any
   out:
   - *any
 - go: MaskedShiftRightSignExtended
   asm: "VPSRAV[WDQ]"
   in:
   - class: mask
   - *any
   - *any
   out:
   - *any

 # Rotate
 - go: MaskedRotateAllLeft
   asm: "VPROL[DQ]"
   in:
   - class: mask
   - *any
   - &pureImm
     class: immediate
     immOffset: 0
   out:
   - *any
 - go: MaskedRotateAllRight
   asm: "VPROR[DQ]"
   in:
   - class: mask
   - *any
   - *pureImm
   out:
   - *any
 - go: MaskedRotateLeft
   asm: "VPROLV[DQ]"
   in:
   - class: mask
   - *any
   - *any
   out:
   - *any
 - go: MaskedRotateRight
   asm: "VPRORV[DQ]"
   in:
   - class: mask
   - *any
   - *any
   out:
   - *any

 # Bizzare shifts.
 - go: MaskedShiftAllLeftAndFillUpperFrom
   asm: "VPSHLD[WDQ]"
   in:
   - class: mask
   - *any
   - *any
   - *pureImm
   out:
   - *any
 - go: MaskedShiftAllRightAndFillUpperFrom
   asm: "VPSHRD[WDQ]"
   in:
   - class: mask
   - *any
   - *any
   - *pureImm
   out:
   - *any
 - go: MaskedShiftLeftAndFillUpperFrom
   asm: "VPSHLDV[WDQ]"
   in:
   - *any
   - class: mask
   - *any
   - *any
   out:
   - *any
 - go: MaskedShiftRightAndFillUpperFrom
   asm: "VPSHRDV[WDQ]"
   in:
   - *any
   - class: mask
   - *any
   - *any
   out:
   - *any
	!sum
	# TODO: remove the "Const Immediate" from the documentation field, it's there only for debug purposes.
	# Add
	- go: Add
	asm: "VPADD[BWDQ]\|VADDP[SD]"
	in:
	- &any
	go: $t
	- *any
	out:
	- *any
	- go: MaskedAdd
	asm: "VPADD[BWDQ]\|VADDP[SD]"
	in:
	- class: mask
	- *any
	- *any
	out:
	- *any
	# Saturated Add
	- go: SaturatedAdd
	asm: "VPADDS[BWDQ]"
	in:
	- &int
	go: $t
	base: int
	- *int
	out:
	- *int
	- go: SaturatedAdd
	asm: "VPADDS[BWDQ]"
	in:
	- &uint
	go: $t
	base: uint
	- *uint
	out:
	- *uint
	- go: MaskedSaturatedAdd
	asm: "VPADDS[BWDQ]"
	in:
	- class: mask
	- *int
	- *int
	out:
	- *int
	- go: MaskedSaturatedAdd
	asm: "VPADDS[BWDQ]"
	in:
	- class: mask
	- *uint
	- *uint
	out:
	- *uint

	# Sub
	- go: Sub
	asm: "VPSUB[BWDQ]\|VSUBP[SD]"
	in: &2any
	- *any
	- *any
	out: &1any
	- *any
	- go: MaskedSub
	asm: "VPSUB[BWDQ]\|VSUBP[SD]"
	in:
	- class: mask
	- *any
	- *any
	out:
	- *any
	# Saturated Sub
	- go: SaturatedSub
	asm: "VPSUBS[BWDQ]"
	in: &2int
	- *int
	- *int
	out: &1int
	- *int
	- go: SaturatedSub
	asm: "VPSUBS[BWDQ]"
	in:
	- *uint
	- *uint
	out:
	- *uint
	- go: MaskedSaturatedSub
	asm: "VPSUBS[BWDQ]"
	in:
	- class: mask
	- *int
	- *int
	out:
	- *int
	- go: MaskedSaturatedSub
	asm: "VPSUBS[BWDQ]"
	in:
	- class: mask
	- *uint
	- *uint
	out:
	- *uint
	- go: PairwiseAdd
	asm: "VPHADD[DW]"
	in: *2any
	out: *1any
	- go: PairwiseSub
	asm: "VPHSUB[DW]"
	in: *2any
	out: *1any
	- go: PairwiseAdd
	asm: "VHADDP[SD]" # floats
	in: *2any
	out: *1any
	- go: PairwiseSub
	asm: "VHSUBP[SD]" # floats
	in: *2any
	out: *1any
	- go: SaturatedPairwiseAdd
	asm: "VPHADDS[DW]"
	in: *2int
	out: *1int
	- go: SaturatedPairwiseSub
	asm: "VPHSUBS[DW]"
	in: *2int
	out: *1int
	# In the XED data, all floating point bitwise logic operation has their
	# operand type marked as uint. We are not trying to understand why Intel
	# decided that they want FP bit-wise logic operations, but this irregularity
	# has to be dealed with in separate rules with some overwrites.

	# Int/Uint operations.
	# Non-masked for 128/256-bit vectors
	# For binary operations, we constrain their two inputs and one output to the
	# same Go type using a variable. This will map to instructions before AVX512.
	- go: And
	asm: "VPAND"
	in:
	- &any
	go: $t
	- *any
	out:
	- *any
	# Masked
	# Looks like VPAND$xi works only for 2 shapes for integer:
	# Dword and Qword.
	# TODO: should we wildcard other smaller elemBits to VPANDQ or
	# VPANDD? Looks like elemBits doesn't really matter afterall in bitwise operations.
	- go: MaskedAnd
	asm: "VPAND[DQ]"
	in:
	- class: mask
	- *any
	- *any
	out:
	- *any

	- go: AndNot
	asm: "VPANDN"
	in:
	- *any
	- *any
	out:
	- *any
	- go: MaskedAndNot
	asm: "VPANDN[DQ]"
	in:
	- class: mask
	- *any
	- *any
	out:
	- *any

	- go: Or
	asm: "VPOR"
	in:
	- *any
	- *any
	out:
	- *any
	- go: MaskedOr
	asm: "VPOR[DQ]"
	in:
	- class: mask
	- *any
	- *any
	out:
	- *any

	- go: Xor
	asm: "VPXOR"
	in:
	- *any
	- *any
	out:
	- *any
	- go: MaskedXor
	asm: "VPXOR[DQ]"
	in:
	- class: mask
	- *any
	- *any
	out:
	- *any

	# FP operations.
	# Set the [base] to be "int" to not include duplicates(excluding "uint").
	# [base] is not used when [overwriteBase] is present.
	- go: And
	asm: "VANDP[SD]"
	in:
	- &intToFloat
	go: $t
	base: int
	overwriteBase: float
	- *intToFloat
	out:
	- *intToFloat
	- go: MaskedAnd
	asm: "VANDP[SD]"
	in:
	- class: mask
	- *intToFloat
	- *intToFloat
	out:
	- *intToFloat

	- go: AndNot
	asm: "VANDNP[SD]"
	in:
	- *intToFloat
	- *intToFloat
	out:
	- *intToFloat
	- go: MaskedAndNot
	asm: "VANDNP[SD]"
	in:
	- class: mask
	- *intToFloat
	- *intToFloat
	out:
	- *intToFloat

	- go: Or
	asm: "VORP[SD]"
	in:
	- *intToFloat
	- *intToFloat
	out:
	- *intToFloat
	- go: MaskedOr
	asm: "VORP[SD]"
	in:
	- class: mask
	- *intToFloat
	- *intToFloat
	out:
	- *intToFloat

	- go: Xor
	asm: "VXORP[SD]"
	in:
	- *intToFloat
	- *intToFloat
	out:
	- *intToFloat
	- go: MaskedXor
	asm: "VXORP[SD]"
	in:
	- class: mask
	- *intToFloat
	- *intToFloat
	out:
	- *intToFloat
	# Ints
	- go: Equal
	asm: "V?PCMPEQ[BWDQ]"
	in: &int2
	- &int
	go: $t
	base: int # Looks like PCMP is on signed integers - but for equals does it really matters?
	- *int
	out:
	- &anyvregToMask
	go: $t # We still need the output to be the same shape as inputs.
	overwriteBase: int
	overwriteClass: mask
	- go: Greater
	asm: "V?PCMPGT[BWDQ]"
	in: *int2
	out:
	- *anyvregToMask
	- go: MaskedEqual
	asm: "V?PCMPEQ[BWDQ]"
	in: &maskint2
	- class: mask
	- *int
	- *int
	out:
	- class: mask
	- go: MaskedGreater
	asm: "V?PCMPGT[BWDQ]"
	in: *maskint2
	out:
	- class: mask
	# The const imm predicated compares after AVX512, please see categories.yaml
	# for const imm specification.
	- go: Masked(Equal\|Greater\|Less\|LessEqual\|GreaterEqual\|NotEqual)
	asm: "VPCMP[BWDQ]"
	in:
	- class: mask
	- &int
	go: $t
	base: int
	- *int
	- class: immediate
	const: 0 # Just a placeholder, will be overwritten by const imm porting.
	out:
	- class: mask
	- go: Masked(Equal\|Greater\|Less\|LessEqual\|GreaterEqual\|NotEqual)
	asm: "VPCMPU[BWDQ]"
	in:
	- class: mask
	- &uint
	go: $t
	base: uint
	- *uint
	- class: immediate
	const: 0
	out:
	- class: mask

	# Floats
	- go: Equal\|Greater\|Less\|LessEqual\|GreaterEqual\|NotEqual\|IsNan
	asm: "VCMPP[SD]"
	in:
	- &float
	go: $t
	base: float
	- *float
	- class: immediate
	const: 0
	out:
	- go: $t # We still need the output to be the same shape as inputs.
	overwriteBase: int
	overwriteClass: mask
	- go: Masked(Equal\|Greater\|Less\|LessEqual\|GreaterEqual\|NotEqual\|IsNan)
	asm: "VCMPP[SD]"
	in:
	- class: mask
	- *float
	- *float
	- class: immediate
	const: 0
	out:
	- class: mask
	- go: Div
	asm: "V?DIVP[SD]"
	in: &2fp
	- &fp
	go: $t
	base: float
	- *fp
	out: &1fp
	- *fp
	- go: MaskedDiv
	asm: "V?DIVP[SD]"
	in: &1mask2fp
	- class: mask
	- *fp
	- *fp
	out: *1fp
	- go: Sqrt
	asm: "V?SQRTP[SD]"
	in: *1fp
	out: *1fp
	- go: MaskedSqrt
	asm: "V?SQRTP[SD]"
	in: &1mask1fp
	- class: mask
	- *fp
	out: *1fp
	- go: MaskedApproximateReciprocal
	asm: "VRCP14P[SD]"
	in: *1mask1fp
	out: *1fp
	- go: ApproximateReciprocalOfSqrt
	asm: "V?RSQRTPS"
	in: *1fp
	out: *1fp
	- go: MaskedApproximateReciprocalOfSqrt
	asm: "VRSQRT14P[SD]"
	in: *1mask1fp
	out: *1fp
	- go: MaskedMulByPowOf2
	asm: "VSCALEFP[SD]"
	in: *1mask2fp
	out: *1fp

	- go: "Round\|Ceil\|Floor\|Trunc"
	asm: "VROUNDP[SD]"
	in:
	- *fp
	- class: immediate
	const: 0 # place holder
	out: *1fp

	- go: "Masked(Round\|Ceil\|Floor\|Trunc)(SuppressException)?WithPrecision"
	asm: "VRNDSCALEP[SD]"
	in:
	- class: mask
	- *fp
	- class: immediate
	const: 0 # place holder
	immOffset: 4 # "M", round to numbers with M digits after dot(by means of binary number).
	out: *1fp
	- go: "MaskedDiffWith(Round\|Ceil\|Floor\|Trunc)(SuppressException)?WithPrecision"
	asm: "VREDUCEP[SD]"
	in:
	- class: mask
	- *fp
	- class: immediate
	const: 0 # place holder
	immOffset: 4 # "M", round to numbers with M digits after dot(by means of binary number).
	out: *1fp

	- go: "AddSub"
	asm: "VADDSUBP[SD]"
	in:
	- *fp
	- *fp
	out:
	- *fp
	- go: MaskedGaloisFieldAffineTransform
	asm: VGF2P8AFFINEQB
	in: &AffineArgs
	- class: mask
	- &uint8
	go: $t
	base: uint
	- &uint8x8
	go: $t2
	base: uint
	- &pureImmVar
	class: immediate
	immOffset: 0
	out:
	- *uint8

	- go: MaskedGaloisFieldAffineTransformInversed
	asm: VGF2P8AFFINEINVQB
	in: *AffineArgs
	out:
	- *uint8

	- go: MaskedGaloisFieldMul
	asm: VGF2P8MULB
	in:
	- class: mask
	- *uint8
	- *uint8
	out:
	- *uint8
	# Average (unsigned byte, unsigned word)
	# Instructions: VPAVGB, VPAVGW
	- go: Average
	asm: "VPAVG[BW]" # Matches VPAVGB (byte) and VPAVGW (word)
	in:
	- &uint_t # $t will be Uint8xN for VPAVGB, Uint16xN for VPAVGW
	go: $t
	base: uint
	- *uint_t
	out:
	- *uint_t
	- go: MaskedAverage
	asm: "VPAVG[BW]"
	in:
	- class: mask
	- *uint_t
	- *uint_t
	out:
	- *uint_t

	# Absolute Value (signed byte, word, dword, qword)
	# Instructions: VPABSB, VPABSW, VPABSD, VPABSQ
	- go: Absolute
	asm: "VPABS[BWDQ]" # Matches VPABSB, VPABSW, VPABSD, VPABSQ
	in:
	- &int_t # $t will be Int8xN, Int16xN, Int32xN, Int64xN
	go: $t
	base: int
	out:
	- *int_t # Output is magnitude, fits in the same signed type
	- go: MaskedAbsolute
	asm: "VPABS[BWDQ]"
	in:
	- class: mask
	- *int_t
	out:
	- *int_t

	# Sign Operation (signed byte, word, dword)
	# Applies sign of second operand to the first.
	# Instructions: VPSIGNB, VPSIGNW, VPSIGND
	- go: Sign
	asm: "VPSIGN[BWD]" # Matches VPSIGNB, VPSIGNW, VPSIGND
	in:
	- *int_t # value to apply sign to
	- *int_t # value from which to take the sign
	out:
	- *int_t

	# Population Count (count set bits in each element)
	# Instructions: VPOPCNTB, VPOPCNTW (AVX512_BITALG)
	# VPOPCNTD, VPOPCNTQ (AVX512_VPOPCNTDQ)
	- go: MaskedPopCount
	asm: "VPOPCNT[BWDQ]"
	in:
	- class: mask
	- &any
	go: $t
	out:
	- *any
	- go: PairDotProd
	asm: VPMADDWD
	in:
	- &int
	go: $t
	base: int
	- *int
	out:
	- &int2 # The elemBits are different
	go: $t2
	base: int
	- go: MaskedPairDotProd
	asm: VPMADDWD
	in:
	- class: mask
	- *int
	- *int
	out:
	- *int2
	- go: SaturatedUnsignedSignedPairDotProd
	asm: VPMADDUBSW
	in:
	- &uint
	go: $t
	base: uint
	- &int3
	go: $t3
	base: int
	out:
	- *int2
	- go: MaskedSaturatedUnsignedSignedPairDotProd
	asm: VPMADDUBSW
	in:
	- class: mask
	- go: $t1
	base: uint
	overwriteElementBits: 8
	- go: $t2
	base: int
	overwriteElementBits: 8
	out:
	- *int3
	- go: DotProdBroadcast
	asm: VDPPD
	in:
	- &dpb_src
	go: $t
	base: float
	elemBits: 64
	bits: $bits
	- *dpb_src
	- class: immediate
	const: 127
	out:
	- *dpb_src
	- go: UnsignedSignedQuadDotProdAccumulate
	asm: "VPDPBUSD"
	in:
	- &qdpa_acc
	go: $t_acc
	elemBits: 32
	- &qdpa_src1
	go: $t_src1
	base: uint
	overwriteElementBits: 8
	- &qdpa_src2
	go: $t_src2
	base: int
	overwriteElementBits: 8
	out:
	- *qdpa_acc
	- go: MaskedUnsignedSignedQuadDotProdAccumulate
	asm: "VPDPBUSD"
	in:
	- *qdpa_acc
	- class: mask
	- *qdpa_src1
	- *qdpa_src2
	out:
	- *qdpa_acc
	- go: SaturatedUnsignedSignedQuadDotProdAccumulate
	asm: "VPDPBUSDS"
	in:
	- *qdpa_acc
	- *qdpa_src1
	- *qdpa_src2
	out:
	- *qdpa_acc
	- go: MaskedSaturatedUnsignedSignedQuadDotProdAccumulate
	asm: "VPDPBUSDS"
	in:
	- *qdpa_acc
	- class: mask
	- *qdpa_src1
	- *qdpa_src2
	out:
	- *qdpa_acc
	- go: PairDotProdAccumulate
	asm: "VPDPWSSD"
	in:
	- &pdpa_acc
	go: $t_acc
	base: int
	elemBits: 32
	- &pdpa_src1
	go: $t_src1
	base: int
	overwriteElementBits: 16
	- &pdpa_src2
	go: $t_src2
	base: int
	overwriteElementBits: 16
	out:
	- *pdpa_acc
	- go: MaskedPairDotProdAccumulate
	asm: "VPDPWSSD"
	in:
	- *pdpa_acc
	- class: mask
	- *pdpa_src1
	- *pdpa_src2
	out:
	- *pdpa_acc
	- go: SaturatedPairDotProdAccumulate
	asm: "VPDPWSSDS"
	in:
	- *pdpa_acc
	- *pdpa_src1
	- *pdpa_src2
	out:
	- *pdpa_acc
	- go: MaskedSaturatedPairDotProdAccumulate
	asm: "VPDPWSSDS"
	in:
	- *pdpa_acc
	- class: mask
	- *pdpa_src1
	- *pdpa_src2
	out:
	- *pdpa_acc
	- go: MaskedFusedMultiplyAdd
	asm: "VFMADD213PS\|VFMADD213PD"
	in:
	- &fma_op
	go: $t
	base: float
	- class: mask
	- *fma_op
	- *fma_op
	out:
	- *fma_op
	- go: MaskedFusedMultiplyAddSub
	asm: "VFMADDSUB213PS\|VFMADDSUB213PD"
	in:
	- *fma_op
	- class: mask
	- *fma_op
	- *fma_op
	out:
	- *fma_op
	- go: MaskedFusedMultiplySubAdd
	asm: "VFMSUBADD213PS\|VFMSUBADD213PD"
	in:
	- *fma_op
	- class: mask
	- *fma_op
	- *fma_op
	out:
	- *fma_op
	- go: Max
	asm: "V?PMAXS[BWDQ]"
	in: &2int
	- &int
	go: $t
	base: int
	- *int
	out: &1int
	- *int
	- go: Max
	asm: "V?PMAXU[BWDQ]"
	in: &2uint
	- &uint
	go: $t
	base: uint
	- *uint
	out: &1uint
	- *uint
	- go: MaskedMax
	asm: "V?PMAXS[BWDQ]"
	in: &1mask2int
	- class: mask
	- *int
	- *int
	out: *1int
	- go: MaskedMax
	asm: "V?PMAXU[BWDQ]"
	in: &1mask2uint
	- class: mask
	- *uint
	- *uint
	out: *1uint

	- go: Min
	asm: "V?PMINS[BWDQ]"
	in: *2int
	out: *1int
	- go: Min
	asm: "V?PMINU[BWDQ]"
	in: *2uint
	out: *1uint
	- go: MaskedMin
	asm: "V?PMINS[BWDQ]"
	in: *1mask2int
	out: *1int
	- go: MaskedMin
	asm: "V?PMINU[BWDQ]"
	in: *1mask2uint
	out: *1uint

	- go: Max
	asm: "V?MAXP[SD]"
	in: &2float
	- &float
	go: $t
	base: float
	- *float
	out: &1float
	- *float
	- go: MaskedMax
	asm: "V?MAXP[SD]"
	in: &1mask2float
	- class: mask
	- *float
	- *float
	out: *1float
	- go: Min
	asm: "V?MINP[SD]"
	in: *2float
	out: *1float
	- go: MaskedMin
	asm: "V?MINP[SD]"
	in: *1mask2float
	out: *1float
	- go: SetElem
	asm: "VPINSR[BWDQ]"
	in:
	- &t
	class: vreg
	base: $b
	- class: greg
	base: $b
	lanes: 1 # Scalar, darn it!
	- class: immediate
	immOffset: 0
	out:
	- *t
	# "Normal" multiplication is only available for floats.
	# This only covers the single and double precision.
	- go: Mul
	asm: "VMULP[SD]"
	in:
	- &fp
	go: $t
	base: float
	- *fp
	out:
	- *fp
	- go: MaskedMul
	asm: "VMULP[SD]"
	in:
	- class: mask
	- *fp
	- *fp
	out:
	- *fp

	# Integer multiplications.

	# MulEvenWiden
	# Dword only.
	- go: MulEvenWiden
	asm: "VPMULDQ"
	in:
	- &int
	go: $t
	base: int
	- *int
	out:
	- &int2
	go: $t2
	base: int
	- go: MulEvenWiden
	asm: "VPMULUDQ"
	in:
	- &uint
	go: $t
	base: uint
	- *uint
	out:
	- &uint2
	go: $t2
	base: uint
	- go: MaskedMulEvenWiden
	asm: "VPMULDQ"
	in:
	- class: mask
	- *int
	- *int
	out:
	- *int2
	- go: MaskedMulEvenWiden
	asm: "VPMULUDQ"
	in:
	- class: mask
	- *uint
	- *uint
	out:
	- *uint2

	# MulHigh
	# Word only.
	# Non-masked
	- go: MulHigh
	asm: "VPMULHW"
	in:
	- *int
	- *int
	out:
	- *int2
	- go: MulHigh
	asm: "VPMULHUW"
	in:
	- *uint
	- *uint
	out:
	- *uint2
	- go: MaskedMulHigh
	asm: "VPMULHW"
	in:
	- class: mask
	- *int
	- *int
	out:
	- *int2
	- go: MaskedMulHigh
	asm: "VPMULHUW"
	in:
	- class: mask
	- *uint
	- *uint
	out:
	- *uint2

	# MulLow
	# Signed int only.
	# Non-masked
	- go: MulLow
	asm: "VPMULL[WDQ]"
	in:
	- *int
	- *int
	out:
	- *int2
	- go: MaskedMulLow
	asm: "VPMULL[WDQ]"
	in:
	- class: mask
	- *int
	- *int
	out:
	- *int2
	# Integers
	# ShiftAll*
	- go: ShiftAllLeft
	asm: "VPSLL[WDQ]"
	in:
	- &any
	go: $t
	- &vecAsScalar64
	treatLikeAScalarOfSize: 64
	go: Uint64x2
	out:
	- *any
	- go: MaskedShiftAllLeft
	asm: "VPSLL[WDQ]"
	in:
	- class: mask
	- *any
	- *vecAsScalar64
	out:
	- *any
	- go: ShiftAllRight
	asm: "VPSRL[WDQ]"
	in:
	- *any
	- *vecAsScalar64
	out:
	- *any
	- go: MaskedShiftAllRight
	asm: "VPSRL[WDQ]"
	in:
	- class: mask
	- *any
	- *vecAsScalar64
	out:
	- *any
	- go: ShiftAllRightSignExtended
	asm: "VPSRA[WDQ]"
	in:
	- &int
	go: $t
	base: int
	- *vecAsScalar64
	out:
	- *int
	- go: MaskedShiftAllRightSignExtended
	asm: "VPSRA[WDQ]"
	in:
	- class: mask
	- *int
	- *vecAsScalar64
	out:
	- *int

	# Shift* (variable)
	- go: ShiftLeft
	asm: "VPSLLV[WD]"
	in:
	- *any
	- *any
	out:
	- *any
	- go: MaskedShiftLeft
	asm: "VPSLLV[WD]"
	in:
	- class: mask
	- *any
	- *any
	out:
	- *any
	# XED data of VPSLLVQ marks the element bits 32 which is off to the actual semantic, we need to overwrite
	# it to 64.
	- go: ShiftLeft
	asm: "VPSLLVQ"
	in:
	- &anyOverwriteElemBits
	go: $t
	overwriteElementBits: 64
	- *anyOverwriteElemBits
	out:
	- *anyOverwriteElemBits
	- go: MaskedShiftLeft
	asm: "VPSLLVQ"
	in:
	- class: mask
	- *anyOverwriteElemBits
	- *anyOverwriteElemBits
	out:
	- *anyOverwriteElemBits
	- go: ShiftRight
	asm: "VPSRLV[WD]"
	in:
	- *any
	- *any
	out:
	- *any
	- go: MaskedShiftRight
	asm: "VPSRLV[WD]"
	in:
	- class: mask
	- *any
	- *any
	out:
	- *any
	# XED data of VPSRLVQ needs the same overwrite as VPSLLVQ.
	- go: ShiftRight
	asm: "VPSRLVQ"
	in:
	- *anyOverwriteElemBits
	- *anyOverwriteElemBits
	out:
	- *anyOverwriteElemBits
	- go: MaskedShiftRight
	asm: "VPSRLVQ"
	in:
	- class: mask
	- *anyOverwriteElemBits
	- *anyOverwriteElemBits
	out:
	- *anyOverwriteElemBits
	- go: ShiftRightSignExtended
	asm: "VPSRAV[WDQ]"
	in:
	- *any
	- *any
	out:
	- *any
	- go: MaskedShiftRightSignExtended
	asm: "VPSRAV[WDQ]"
	in:
	- class: mask
	- *any
	- *any
	out:
	- *any

	# Rotate
	- go: MaskedRotateAllLeft
	asm: "VPROL[DQ]"
	in:
	- class: mask
	- *any
	- &pureImm
	class: immediate
	immOffset: 0
	out:
	- *any
	- go: MaskedRotateAllRight
	asm: "VPROR[DQ]"
	in:
	- class: mask
	- *any
	- *pureImm
	out:
	- *any
	- go: MaskedRotateLeft
	asm: "VPROLV[DQ]"
	in:
	- class: mask
	- *any
	- *any
	out:
	- *any
	- go: MaskedRotateRight
	asm: "VPRORV[DQ]"
	in:
	- class: mask
	- *any
	- *any
	out:
	- *any

	# Bizzare shifts.
	- go: MaskedShiftAllLeftAndFillUpperFrom
	asm: "VPSHLD[WDQ]"
	in:
	- class: mask
	- *any
	- *any
	- *pureImm
	out:
	- *any
	- go: MaskedShiftAllRightAndFillUpperFrom
	asm: "VPSHRD[WDQ]"
	in:
	- class: mask
	- *any
	- *any
	- *pureImm
	out:
	- *any
	- go: MaskedShiftLeftAndFillUpperFrom
	asm: "VPSHLDV[WDQ]"
	in:
	- *any
	- class: mask
	- *any
	- *any
	out:
	- *any
	- go: MaskedShiftRightAndFillUpperFrom
	asm: "VPSHRDV[WDQ]"
	in:
	- *any
	- class: mask
	- *any
	- *any
	out:
	- *any