blob: 2d1038dada557b12466b789de0419029de1e7e5c [file] [log] [blame]
!sum
# TODO: remove the "Const Immediate" from the documentation field, it's there only for debug purposes.
# Add
- go: Add
asm: "VPADD[BWDQ]|VADDP[SD]"
in:
- &any
go: $t
- *any
out:
- *any
- go: MaskedAdd
asm: "VPADD[BWDQ]|VADDP[SD]"
in:
- class: mask
- *any
- *any
out:
- *any
# Saturated Add
- go: SaturatedAdd
asm: "VPADDS[BWDQ]"
in:
- &int
go: $t
base: int
- *int
out:
- *int
- go: SaturatedAdd
asm: "VPADDS[BWDQ]"
in:
- &uint
go: $t
base: uint
- *uint
out:
- *uint
- go: MaskedSaturatedAdd
asm: "VPADDS[BWDQ]"
in:
- class: mask
- *int
- *int
out:
- *int
- go: MaskedSaturatedAdd
asm: "VPADDS[BWDQ]"
in:
- class: mask
- *uint
- *uint
out:
- *uint
# Sub
- go: Sub
asm: "VPSUB[BWDQ]|VSUBP[SD]"
in: &2any
- *any
- *any
out: &1any
- *any
- go: MaskedSub
asm: "VPSUB[BWDQ]|VSUBP[SD]"
in:
- class: mask
- *any
- *any
out:
- *any
# Saturated Sub
- go: SaturatedSub
asm: "VPSUBS[BWDQ]"
in: &2int
- *int
- *int
out: &1int
- *int
- go: SaturatedSub
asm: "VPSUBS[BWDQ]"
in:
- *uint
- *uint
out:
- *uint
- go: MaskedSaturatedSub
asm: "VPSUBS[BWDQ]"
in:
- class: mask
- *int
- *int
out:
- *int
- go: MaskedSaturatedSub
asm: "VPSUBS[BWDQ]"
in:
- class: mask
- *uint
- *uint
out:
- *uint
- go: PairwiseAdd
asm: "VPHADD[DW]"
in: *2any
out: *1any
- go: PairwiseSub
asm: "VPHSUB[DW]"
in: *2any
out: *1any
- go: PairwiseAdd
asm: "VHADDP[SD]" # floats
in: *2any
out: *1any
- go: PairwiseSub
asm: "VHSUBP[SD]" # floats
in: *2any
out: *1any
- go: SaturatedPairwiseAdd
asm: "VPHADDS[DW]"
in: *2int
out: *1int
- go: SaturatedPairwiseSub
asm: "VPHSUBS[DW]"
in: *2int
out: *1int
# In the XED data, *all* floating point bitwise logic operation has their
# operand type marked as uint. We are not trying to understand why Intel
# decided that they want FP bit-wise logic operations, but this irregularity
# has to be dealed with in separate rules with some overwrites.
# Int/Uint operations.
# Non-masked for 128/256-bit vectors
# For binary operations, we constrain their two inputs and one output to the
# same Go type using a variable. This will map to instructions before AVX512.
- go: And
asm: "VPAND"
in:
- &any
go: $t
- *any
out:
- *any
# Masked
# Looks like VPAND$xi works only for 2 shapes for integer:
# Dword and Qword.
# TODO: should we wildcard other smaller elemBits to VPANDQ or
# VPANDD? Looks like elemBits doesn't really matter afterall in bitwise operations.
- go: MaskedAnd
asm: "VPAND[DQ]"
in:
- class: mask
- *any
- *any
out:
- *any
- go: AndNot
asm: "VPANDN"
in:
- *any
- *any
out:
- *any
- go: MaskedAndNot
asm: "VPANDN[DQ]"
in:
- class: mask
- *any
- *any
out:
- *any
- go: Or
asm: "VPOR"
in:
- *any
- *any
out:
- *any
- go: MaskedOr
asm: "VPOR[DQ]"
in:
- class: mask
- *any
- *any
out:
- *any
- go: Xor
asm: "VPXOR"
in:
- *any
- *any
out:
- *any
- go: MaskedXor
asm: "VPXOR[DQ]"
in:
- class: mask
- *any
- *any
out:
- *any
# FP operations.
# Set the [base] to be "int" to not include duplicates(excluding "uint").
# [base] is not used when [overwriteBase] is present.
- go: And
asm: "VANDP[SD]"
in:
- &intToFloat
go: $t
base: int
overwriteBase: float
- *intToFloat
out:
- *intToFloat
- go: MaskedAnd
asm: "VANDP[SD]"
in:
- class: mask
- *intToFloat
- *intToFloat
out:
- *intToFloat
- go: AndNot
asm: "VANDNP[SD]"
in:
- *intToFloat
- *intToFloat
out:
- *intToFloat
- go: MaskedAndNot
asm: "VANDNP[SD]"
in:
- class: mask
- *intToFloat
- *intToFloat
out:
- *intToFloat
- go: Or
asm: "VORP[SD]"
in:
- *intToFloat
- *intToFloat
out:
- *intToFloat
- go: MaskedOr
asm: "VORP[SD]"
in:
- class: mask
- *intToFloat
- *intToFloat
out:
- *intToFloat
- go: Xor
asm: "VXORP[SD]"
in:
- *intToFloat
- *intToFloat
out:
- *intToFloat
- go: MaskedXor
asm: "VXORP[SD]"
in:
- class: mask
- *intToFloat
- *intToFloat
out:
- *intToFloat
# Ints
- go: Equal
asm: "V?PCMPEQ[BWDQ]"
in: &int2
- &int
go: $t
base: int # Looks like PCMP is on signed integers - but for equals does it really matters?
- *int
out:
- &anyvregToMask
go: $t # We still need the output to be the same shape as inputs.
overwriteBase: int
overwriteClass: mask
- go: Greater
asm: "V?PCMPGT[BWDQ]"
in: *int2
out:
- *anyvregToMask
- go: MaskedEqual
asm: "V?PCMPEQ[BWDQ]"
in: &maskint2
- class: mask
- *int
- *int
out:
- class: mask
- go: MaskedGreater
asm: "V?PCMPGT[BWDQ]"
in: *maskint2
out:
- class: mask
# The const imm predicated compares after AVX512, please see categories.yaml
# for const imm specification.
- go: Masked(Equal|Greater|Less|LessEqual|GreaterEqual|NotEqual)
asm: "VPCMP[BWDQ]"
in:
- class: mask
- &int
go: $t
base: int
- *int
- class: immediate
const: 0 # Just a placeholder, will be overwritten by const imm porting.
out:
- class: mask
- go: Masked(Equal|Greater|Less|LessEqual|GreaterEqual|NotEqual)
asm: "VPCMPU[BWDQ]"
in:
- class: mask
- &uint
go: $t
base: uint
- *uint
- class: immediate
const: 0
out:
- class: mask
# Floats
- go: Equal|Greater|Less|LessEqual|GreaterEqual|NotEqual|IsNan
asm: "VCMPP[SD]"
in:
- &float
go: $t
base: float
- *float
- class: immediate
const: 0
out:
- go: $t # We still need the output to be the same shape as inputs.
overwriteBase: int
overwriteClass: mask
- go: Masked(Equal|Greater|Less|LessEqual|GreaterEqual|NotEqual|IsNan)
asm: "VCMPP[SD]"
in:
- class: mask
- *float
- *float
- class: immediate
const: 0
out:
- class: mask
- go: Div
asm: "V?DIVP[SD]"
in: &2fp
- &fp
go: $t
base: float
- *fp
out: &1fp
- *fp
- go: MaskedDiv
asm: "V?DIVP[SD]"
in: &1mask2fp
- class: mask
- *fp
- *fp
out: *1fp
- go: Sqrt
asm: "V?SQRTP[SD]"
in: *1fp
out: *1fp
- go: MaskedSqrt
asm: "V?SQRTP[SD]"
in: &1mask1fp
- class: mask
- *fp
out: *1fp
- go: MaskedApproximateReciprocal
asm: "VRCP14P[SD]"
in: *1mask1fp
out: *1fp
- go: ApproximateReciprocalOfSqrt
asm: "V?RSQRTPS"
in: *1fp
out: *1fp
- go: MaskedApproximateReciprocalOfSqrt
asm: "VRSQRT14P[SD]"
in: *1mask1fp
out: *1fp
- go: MaskedMulByPowOf2
asm: "VSCALEFP[SD]"
in: *1mask2fp
out: *1fp
- go: "Round|Ceil|Floor|Trunc"
asm: "VROUNDP[SD]"
in:
- *fp
- class: immediate
const: 0 # place holder
out: *1fp
- go: "Masked(Round|Ceil|Floor|Trunc)(SuppressException)?WithPrecision"
asm: "VRNDSCALEP[SD]"
in:
- class: mask
- *fp
- class: immediate
const: 0 # place holder
immOffset: 4 # "M", round to numbers with M digits after dot(by means of binary number).
out: *1fp
- go: "MaskedDiffWith(Round|Ceil|Floor|Trunc)(SuppressException)?WithPrecision"
asm: "VREDUCEP[SD]"
in:
- class: mask
- *fp
- class: immediate
const: 0 # place holder
immOffset: 4 # "M", round to numbers with M digits after dot(by means of binary number).
out: *1fp
- go: "AddSub"
asm: "VADDSUBP[SD]"
in:
- *fp
- *fp
out:
- *fp
- go: MaskedGaloisFieldAffineTransform
asm: VGF2P8AFFINEQB
in: &AffineArgs
- class: mask
- &uint8
go: $t
base: uint
- &uint8x8
go: $t2
base: uint
- &pureImmVar
class: immediate
immOffset: 0
out:
- *uint8
- go: MaskedGaloisFieldAffineTransformInversed
asm: VGF2P8AFFINEINVQB
in: *AffineArgs
out:
- *uint8
- go: MaskedGaloisFieldMul
asm: VGF2P8MULB
in:
- class: mask
- *uint8
- *uint8
out:
- *uint8
# Average (unsigned byte, unsigned word)
# Instructions: VPAVGB, VPAVGW
- go: Average
asm: "VPAVG[BW]" # Matches VPAVGB (byte) and VPAVGW (word)
in:
- &uint_t # $t will be Uint8xN for VPAVGB, Uint16xN for VPAVGW
go: $t
base: uint
- *uint_t
out:
- *uint_t
- go: MaskedAverage
asm: "VPAVG[BW]"
in:
- class: mask
- *uint_t
- *uint_t
out:
- *uint_t
# Absolute Value (signed byte, word, dword, qword)
# Instructions: VPABSB, VPABSW, VPABSD, VPABSQ
- go: Absolute
asm: "VPABS[BWDQ]" # Matches VPABSB, VPABSW, VPABSD, VPABSQ
in:
- &int_t # $t will be Int8xN, Int16xN, Int32xN, Int64xN
go: $t
base: int
out:
- *int_t # Output is magnitude, fits in the same signed type
- go: MaskedAbsolute
asm: "VPABS[BWDQ]"
in:
- class: mask
- *int_t
out:
- *int_t
# Sign Operation (signed byte, word, dword)
# Applies sign of second operand to the first.
# Instructions: VPSIGNB, VPSIGNW, VPSIGND
- go: Sign
asm: "VPSIGN[BWD]" # Matches VPSIGNB, VPSIGNW, VPSIGND
in:
- *int_t # value to apply sign to
- *int_t # value from which to take the sign
out:
- *int_t
# Population Count (count set bits in each element)
# Instructions: VPOPCNTB, VPOPCNTW (AVX512_BITALG)
# VPOPCNTD, VPOPCNTQ (AVX512_VPOPCNTDQ)
- go: MaskedPopCount
asm: "VPOPCNT[BWDQ]"
in:
- class: mask
- &any
go: $t
out:
- *any
- go: PairDotProd
asm: VPMADDWD
in:
- &int
go: $t
base: int
- *int
out:
- &int2 # The elemBits are different
go: $t2
base: int
- go: MaskedPairDotProd
asm: VPMADDWD
in:
- class: mask
- *int
- *int
out:
- *int2
- go: SaturatedUnsignedSignedPairDotProd
asm: VPMADDUBSW
in:
- &uint
go: $t
base: uint
- &int3
go: $t3
base: int
out:
- *int2
- go: MaskedSaturatedUnsignedSignedPairDotProd
asm: VPMADDUBSW
in:
- class: mask
- go: $t1
base: uint
overwriteElementBits: 8
- go: $t2
base: int
overwriteElementBits: 8
out:
- *int3
- go: DotProdBroadcast
asm: VDPPD
in:
- &dpb_src
go: $t
base: float
elemBits: 64
bits: $bits
- *dpb_src
- class: immediate
const: 127
out:
- *dpb_src
- go: UnsignedSignedQuadDotProdAccumulate
asm: "VPDPBUSD"
in:
- &qdpa_acc
go: $t_acc
elemBits: 32
- &qdpa_src1
go: $t_src1
base: uint
overwriteElementBits: 8
- &qdpa_src2
go: $t_src2
base: int
overwriteElementBits: 8
out:
- *qdpa_acc
- go: MaskedUnsignedSignedQuadDotProdAccumulate
asm: "VPDPBUSD"
in:
- *qdpa_acc
- class: mask
- *qdpa_src1
- *qdpa_src2
out:
- *qdpa_acc
- go: SaturatedUnsignedSignedQuadDotProdAccumulate
asm: "VPDPBUSDS"
in:
- *qdpa_acc
- *qdpa_src1
- *qdpa_src2
out:
- *qdpa_acc
- go: MaskedSaturatedUnsignedSignedQuadDotProdAccumulate
asm: "VPDPBUSDS"
in:
- *qdpa_acc
- class: mask
- *qdpa_src1
- *qdpa_src2
out:
- *qdpa_acc
- go: PairDotProdAccumulate
asm: "VPDPWSSD"
in:
- &pdpa_acc
go: $t_acc
base: int
elemBits: 32
- &pdpa_src1
go: $t_src1
base: int
overwriteElementBits: 16
- &pdpa_src2
go: $t_src2
base: int
overwriteElementBits: 16
out:
- *pdpa_acc
- go: MaskedPairDotProdAccumulate
asm: "VPDPWSSD"
in:
- *pdpa_acc
- class: mask
- *pdpa_src1
- *pdpa_src2
out:
- *pdpa_acc
- go: SaturatedPairDotProdAccumulate
asm: "VPDPWSSDS"
in:
- *pdpa_acc
- *pdpa_src1
- *pdpa_src2
out:
- *pdpa_acc
- go: MaskedSaturatedPairDotProdAccumulate
asm: "VPDPWSSDS"
in:
- *pdpa_acc
- class: mask
- *pdpa_src1
- *pdpa_src2
out:
- *pdpa_acc
- go: MaskedFusedMultiplyAdd
asm: "VFMADD213PS|VFMADD213PD"
in:
- &fma_op
go: $t
base: float
- class: mask
- *fma_op
- *fma_op
out:
- *fma_op
- go: MaskedFusedMultiplyAddSub
asm: "VFMADDSUB213PS|VFMADDSUB213PD"
in:
- *fma_op
- class: mask
- *fma_op
- *fma_op
out:
- *fma_op
- go: MaskedFusedMultiplySubAdd
asm: "VFMSUBADD213PS|VFMSUBADD213PD"
in:
- *fma_op
- class: mask
- *fma_op
- *fma_op
out:
- *fma_op
- go: Max
asm: "V?PMAXS[BWDQ]"
in: &2int
- &int
go: $t
base: int
- *int
out: &1int
- *int
- go: Max
asm: "V?PMAXU[BWDQ]"
in: &2uint
- &uint
go: $t
base: uint
- *uint
out: &1uint
- *uint
- go: MaskedMax
asm: "V?PMAXS[BWDQ]"
in: &1mask2int
- class: mask
- *int
- *int
out: *1int
- go: MaskedMax
asm: "V?PMAXU[BWDQ]"
in: &1mask2uint
- class: mask
- *uint
- *uint
out: *1uint
- go: Min
asm: "V?PMINS[BWDQ]"
in: *2int
out: *1int
- go: Min
asm: "V?PMINU[BWDQ]"
in: *2uint
out: *1uint
- go: MaskedMin
asm: "V?PMINS[BWDQ]"
in: *1mask2int
out: *1int
- go: MaskedMin
asm: "V?PMINU[BWDQ]"
in: *1mask2uint
out: *1uint
- go: Max
asm: "V?MAXP[SD]"
in: &2float
- &float
go: $t
base: float
- *float
out: &1float
- *float
- go: MaskedMax
asm: "V?MAXP[SD]"
in: &1mask2float
- class: mask
- *float
- *float
out: *1float
- go: Min
asm: "V?MINP[SD]"
in: *2float
out: *1float
- go: MaskedMin
asm: "V?MINP[SD]"
in: *1mask2float
out: *1float
- go: SetElem
asm: "VPINSR[BWDQ]"
in:
- &t
class: vreg
base: $b
- class: greg
base: $b
lanes: 1 # Scalar, darn it!
- class: immediate
immOffset: 0
out:
- *t
# "Normal" multiplication is only available for floats.
# This only covers the single and double precision.
- go: Mul
asm: "VMULP[SD]"
in:
- &fp
go: $t
base: float
- *fp
out:
- *fp
- go: MaskedMul
asm: "VMULP[SD]"
in:
- class: mask
- *fp
- *fp
out:
- *fp
# Integer multiplications.
# MulEvenWiden
# Dword only.
- go: MulEvenWiden
asm: "VPMULDQ"
in:
- &int
go: $t
base: int
- *int
out:
- &int2
go: $t2
base: int
- go: MulEvenWiden
asm: "VPMULUDQ"
in:
- &uint
go: $t
base: uint
- *uint
out:
- &uint2
go: $t2
base: uint
- go: MaskedMulEvenWiden
asm: "VPMULDQ"
in:
- class: mask
- *int
- *int
out:
- *int2
- go: MaskedMulEvenWiden
asm: "VPMULUDQ"
in:
- class: mask
- *uint
- *uint
out:
- *uint2
# MulHigh
# Word only.
# Non-masked
- go: MulHigh
asm: "VPMULHW"
in:
- *int
- *int
out:
- *int2
- go: MulHigh
asm: "VPMULHUW"
in:
- *uint
- *uint
out:
- *uint2
- go: MaskedMulHigh
asm: "VPMULHW"
in:
- class: mask
- *int
- *int
out:
- *int2
- go: MaskedMulHigh
asm: "VPMULHUW"
in:
- class: mask
- *uint
- *uint
out:
- *uint2
# MulLow
# Signed int only.
# Non-masked
- go: MulLow
asm: "VPMULL[WDQ]"
in:
- *int
- *int
out:
- *int2
- go: MaskedMulLow
asm: "VPMULL[WDQ]"
in:
- class: mask
- *int
- *int
out:
- *int2
# Integers
# ShiftAll*
- go: ShiftAllLeft
asm: "VPSLL[WDQ]"
in:
- &any
go: $t
- &vecAsScalar64
treatLikeAScalarOfSize: 64
go: Uint64x2
out:
- *any
- go: MaskedShiftAllLeft
asm: "VPSLL[WDQ]"
in:
- class: mask
- *any
- *vecAsScalar64
out:
- *any
- go: ShiftAllRight
asm: "VPSRL[WDQ]"
in:
- *any
- *vecAsScalar64
out:
- *any
- go: MaskedShiftAllRight
asm: "VPSRL[WDQ]"
in:
- class: mask
- *any
- *vecAsScalar64
out:
- *any
- go: ShiftAllRightSignExtended
asm: "VPSRA[WDQ]"
in:
- &int
go: $t
base: int
- *vecAsScalar64
out:
- *int
- go: MaskedShiftAllRightSignExtended
asm: "VPSRA[WDQ]"
in:
- class: mask
- *int
- *vecAsScalar64
out:
- *int
# Shift* (variable)
- go: ShiftLeft
asm: "VPSLLV[WD]"
in:
- *any
- *any
out:
- *any
- go: MaskedShiftLeft
asm: "VPSLLV[WD]"
in:
- class: mask
- *any
- *any
out:
- *any
# XED data of VPSLLVQ marks the element bits 32 which is off to the actual semantic, we need to overwrite
# it to 64.
- go: ShiftLeft
asm: "VPSLLVQ"
in:
- &anyOverwriteElemBits
go: $t
overwriteElementBits: 64
- *anyOverwriteElemBits
out:
- *anyOverwriteElemBits
- go: MaskedShiftLeft
asm: "VPSLLVQ"
in:
- class: mask
- *anyOverwriteElemBits
- *anyOverwriteElemBits
out:
- *anyOverwriteElemBits
- go: ShiftRight
asm: "VPSRLV[WD]"
in:
- *any
- *any
out:
- *any
- go: MaskedShiftRight
asm: "VPSRLV[WD]"
in:
- class: mask
- *any
- *any
out:
- *any
# XED data of VPSRLVQ needs the same overwrite as VPSLLVQ.
- go: ShiftRight
asm: "VPSRLVQ"
in:
- *anyOverwriteElemBits
- *anyOverwriteElemBits
out:
- *anyOverwriteElemBits
- go: MaskedShiftRight
asm: "VPSRLVQ"
in:
- class: mask
- *anyOverwriteElemBits
- *anyOverwriteElemBits
out:
- *anyOverwriteElemBits
- go: ShiftRightSignExtended
asm: "VPSRAV[WDQ]"
in:
- *any
- *any
out:
- *any
- go: MaskedShiftRightSignExtended
asm: "VPSRAV[WDQ]"
in:
- class: mask
- *any
- *any
out:
- *any
# Rotate
- go: MaskedRotateAllLeft
asm: "VPROL[DQ]"
in:
- class: mask
- *any
- &pureImm
class: immediate
immOffset: 0
out:
- *any
- go: MaskedRotateAllRight
asm: "VPROR[DQ]"
in:
- class: mask
- *any
- *pureImm
out:
- *any
- go: MaskedRotateLeft
asm: "VPROLV[DQ]"
in:
- class: mask
- *any
- *any
out:
- *any
- go: MaskedRotateRight
asm: "VPRORV[DQ]"
in:
- class: mask
- *any
- *any
out:
- *any
# Bizzare shifts.
- go: MaskedShiftAllLeftAndFillUpperFrom
asm: "VPSHLD[WDQ]"
in:
- class: mask
- *any
- *any
- *pureImm
out:
- *any
- go: MaskedShiftAllRightAndFillUpperFrom
asm: "VPSHRD[WDQ]"
in:
- class: mask
- *any
- *any
- *pureImm
out:
- *any
- go: MaskedShiftLeftAndFillUpperFrom
asm: "VPSHLDV[WDQ]"
in:
- *any
- class: mask
- *any
- *any
out:
- *any
- go: MaskedShiftRightAndFillUpperFrom
asm: "VPSHRDV[WDQ]"
in:
- *any
- class: mask
- *any
- *any
out:
- *any