blob: 02daa2ea1e2951b25bc70f343b730e93202f5a51 [file] [log] [blame]
!sum
- go: SetElem
asm: "VPINSR[BWDQ]"
in:
- &t
class: vreg
base: $b
- class: greg
base: $b
lanes: 1 # Scalar, darn it!
- &imm
class: immediate
immOffset: 0
name: index
out:
- *t
- go: SetElem
asm: "VPINSR[DQ]"
in:
- &t
class: vreg
base: int
OverwriteBase: float
- class: greg
base: int
OverwriteBase: float
lanes: 1 # Scalar, darn it!
- &imm
class: immediate
immOffset: 0
name: index
out:
- *t
- go: GetElem
asm: "VPEXTR[BWDQ]"
in:
- class: vreg
base: $b
elemBits: $e
- *imm
out:
- class: greg
base: $b
bits: $e
- go: GetElem
asm: "VPEXTR[DQ]"
in:
- class: vreg
base: int
elemBits: $e
OverwriteBase: float
- *imm
out:
- class: greg
base: int
bits: $e
OverwriteBase: float
- go: "SetHi|SetLo"
regexpTag: "move"
asm: "VINSERTI128|VINSERTI64X4"
inVariant: []
in:
- &i8x2N
class: vreg
base: $t
OverwriteElementBits: 8
- &i8xN
class: vreg
base: $t
OverwriteElementBits: 8
- &imm01 # This immediate should be only 0 or 1
class: immediate
const: 0 # place holder
name: index
out:
- *i8x2N
- go: "GetHi|GetLo"
asm: "VEXTRACTI128|VEXTRACTI64X4"
regexpTag: "move"
inVariant: []
in:
- *i8x2N
- *imm01
out:
- *i8xN
- go: "SetHi|SetLo"
asm: "VINSERTI128|VINSERTI64X4"
regexpTag: "move"
inVariant: []
in:
- &i16x2N
class: vreg
base: $t
OverwriteElementBits: 16
- &i16xN
class: vreg
base: $t
OverwriteElementBits: 16
- *imm01
out:
- *i16x2N
- go: "GetHi|GetLo"
regexpTag: "move"
asm: "VEXTRACTI128|VEXTRACTI64X4"
inVariant: []
in:
- *i16x2N
- *imm01
out:
- *i16xN
- go: "SetHi|SetLo"
regexpTag: "move"
asm: "VINSERTI128|VINSERTI64X4"
inVariant: []
in:
- &i32x2N
class: vreg
base: $t
OverwriteElementBits: 32
- &i32xN
class: vreg
base: $t
OverwriteElementBits: 32
- *imm01
out:
- *i32x2N
- go: "GetHi|GetLo"
regexpTag: "move"
asm: "VEXTRACTI128|VEXTRACTI64X4"
inVariant: []
in:
- *i32x2N
- *imm01
out:
- *i32xN
- go: "SetHi|SetLo"
regexpTag: "move"
asm: "VINSERTI128|VINSERTI64X4"
inVariant: []
in:
- &i64x2N
class: vreg
base: $t
OverwriteElementBits: 64
- &i64xN
class: vreg
base: $t
OverwriteElementBits: 64
- *imm01
out:
- *i64x2N
- go: "GetHi|GetLo"
regexpTag: "move"
asm: "VEXTRACTI128|VEXTRACTI64X4"
inVariant: []
in:
- *i64x2N
- *imm01
out:
- *i64xN
- go: "SetHi|SetLo"
regexpTag: "move"
asm: "VINSERTF128|VINSERTF64X4"
inVariant: []
in:
- &f32x2N
class: vreg
base: $t
OverwriteElementBits: 32
- &f32xN
class: vreg
base: $t
OverwriteElementBits: 32
- *imm01
out:
- *f32x2N
- go: "GetHi|GetLo"
regexpTag: "move"
asm: "VEXTRACTF128|VEXTRACTF64X4"
inVariant: []
in:
- *f32x2N
- *imm01
out:
- *f32xN
- go: "SetHi|SetLo"
regexpTag: "move"
asm: "VINSERTF128|VINSERTF64X4"
inVariant: []
in:
- &f64x2N
class: vreg
base: $t
OverwriteElementBits: 64
- &f64xN
class: vreg
base: $t
OverwriteElementBits: 64
- *imm01
out:
- *f64x2N
- go: "GetHi|GetLo"
regexpTag: "move"
asm: "VEXTRACTF128|VEXTRACTF64X4"
inVariant: []
in:
- *f64x2N
- *imm01
out:
- *f64xN
- go: Permute
asm: "VPERMQ|VPERMPD"
addDoc: !string |-
// The low 2 bits (values 0-3) of each element of indices is used.
operandOrder: "21Type1"
in:
- &anyindices
go: $t
name: indices
overwriteBase: uint
- &any4
go: $t
lanes: 4
out:
- &any
go: $t
- go: Permute
asm: "VPERM[WDQ]|VPERMP[SD]"
addDoc: !string |-
// The low 3 bits (values 0-7) of each element of indices is used.
operandOrder: "21Type1"
in:
- *anyindices
- &any8
go: $t
lanes: 8
out:
- *any
- go: Permute
asm: "VPERM[BWD]|VPERMPS"
addDoc: !string |-
// The low 4 bits (values 0-15) of each element of indices is used.
operandOrder: "21Type1"
in:
- *anyindices
- &any16
go: $t
lanes: 16
out:
- *any
- go: Permute
asm: "VPERM[BW]"
addDoc: !string |-
// The low 5 bits (values 0-31) of each element of indices is used.
operandOrder: "21Type1"
in:
- *anyindices
- &any32
go: $t
lanes: 32
out:
- *any
- go: Permute
asm: "VPERMB"
addDoc: !string |-
// The low 6 bits (values 0-63) of each element of indices is used.
operandOrder: "21Type1"
in:
- *anyindices
- &any64
go: $t
lanes: 64
out:
- *any
- go: ConcatPermute
asm: "VPERMI2[BWDQ]|VPERMI2P[SD]"
# Because we are overwriting the receiver's type, we
# have to move the receiver to be a parameter so that
# we can have no duplication.
operandOrder: "231Type1"
in:
- *anyindices # result in arg 0
- *any
- *any
out:
- *any
- go: Compress
asm: "VPCOMPRESS[BWDQ]|VCOMPRESSP[SD]"
in:
# The mask in Compress is a control mask rather than a write mask, so it's not optional.
- class: mask
- *any
out:
- *any
# For now a non-public method because
# (1) [OverwriteClass] must be set together with [OverwriteBase]
# (2) "simdgen does not support [OverwriteClass] in inputs".
# That means the signature is wrong.
- go: blend
asm: VPBLENDVB
zeroing: false
in:
- &v
go: $t
class: vreg
base: int
- *v
-
class: vreg
base: int
name: mask
out:
- *v
# For AVX512
- go: blend
asm: VPBLENDM[BWDQ]
zeroing: false
in:
- &v
go: $t
bits: 512
class: vreg
base: int
- *v
inVariant:
-
class: mask
out:
- *v
# For AVX512
- go: move
asm: VMOVDQU(8|16|32|64)
zeroing: true
in:
- &v
go: $t
class: vreg
base: int|uint
inVariant:
-
class: mask
out:
- *v
- go: Expand
asm: "VPEXPAND[BWDQ]|VEXPANDP[SD]"
in:
# The mask in Expand is a control mask rather than a write mask, so it's not optional.
- class: mask
- *any
out:
- *any
- go: Broadcast1To2
asm: VPBROADCASTQ
in:
- class: vreg
bits: 128
elemBits: 64
base: $b
out:
- class: vreg
bits: 128
elemBits: 64
base: $b
# weirdly, this one case on AVX2 is memory-operand-only
- go: Broadcast1To2
asm: VPBROADCASTQ
in:
- class: vreg
bits: 128
elemBits: 64
base: int
OverwriteBase: float
out:
- class: vreg
bits: 128
elemBits: 64
base: int
OverwriteBase: float
- go: Broadcast1To4
asm: VPBROADCAST[BWDQ]
in:
- class: vreg
bits: 128
base: $b
out:
- class: vreg
lanes: 4
base: $b
- go: Broadcast1To8
asm: VPBROADCAST[BWDQ]
in:
- class: vreg
bits: 128
base: $b
out:
- class: vreg
lanes: 8
base: $b
- go: Broadcast1To16
asm: VPBROADCAST[BWDQ]
in:
- class: vreg
bits: 128
base: $b
out:
- class: vreg
lanes: 16
base: $b
- go: Broadcast1To32
asm: VPBROADCAST[BWDQ]
in:
- class: vreg
bits: 128
base: $b
out:
- class: vreg
lanes: 32
base: $b
- go: Broadcast1To64
asm: VPBROADCASTB
in:
- class: vreg
bits: 128
base: $b
out:
- class: vreg
lanes: 64
base: $b
- go: Broadcast1To4
asm: VBROADCASTS[SD]
in:
- class: vreg
bits: 128
base: float
out:
- class: vreg
lanes: 4
base: float
- go: Broadcast1To8
asm: VBROADCASTS[SD]
in:
- class: vreg
bits: 128
base: float
out:
- class: vreg
lanes: 8
base: float
- go: Broadcast1To16
asm: VBROADCASTS[SD]
in:
- class: vreg
bits: 128
base: float
out:
- class: vreg
lanes: 16
base: float
# VPSHUFB for 128-bit byte shuffles will be picked with higher priority than VPERMB, given its lower CPU feature requirement. (It's AVX)
- go: PermuteOrZero
asm: VPSHUFB
addDoc: !string |-
// The lower four bits of each byte-sized index in indices select an element from x,
// unless the index's sign bit is set in which case zero is used instead.
in:
- &128any
bits: 128
go: $t
- bits: 128
name: indices
base: int # always signed
out:
- *128any
- go: PermuteOrZeroGrouped
asm: VPSHUFB
addDoc: !string |-
//
// result = {x_group0[indices[0]], x_group0[indices[1]], ..., x_group1[indices[16]], x_group1[indices[17]], ...}
//
// The lower four bits of each byte-sized index in indices select an element from its corresponding group in x,
// unless the index's sign bit is set in which case zero is used instead.
// Each group is of size 128-bit.
in:
- &256Or512any
bits: "256|512"
go: $t
- bits: "256|512"
base: int
name: indices
out:
- *256Or512any
- go: permuteScalars
asm: VPSHUFD
addDoc: !string |-
//
// result = {x[indices[0:2]], x[indices[2:4]], x[indices[4:6]], x[indices[6:8]]}
//
// Indices is four 2-bit values packed into a byte, thus indices[0:2] is the first index.
in:
- *128any
- class: immediate
immOffset: 0
name: indices
hideMaskMethods: true
out:
- *128any
- go: permuteScalarsGrouped
asm: VPSHUFD
addDoc: !string |-
//
// result = {x_group0[indices[0:2]], x_group0[indices[2:4]], x_group0[indices[4:6]], x_group0[indices[6:8]], x_group1[indices[0:2]], ...}
//
// Indices is four 2-bit values packed into a byte, thus indices[0:2] is the first index.
// Each group is of size 128-bit.
in:
- *256Or512any
- class: immediate
immOffset: 0
name: indices
hideMaskMethods: true
out:
- *256Or512any
- go: permuteScalarsLo
asm: VPSHUFLW
addDoc: !string |-
//
// result = {x[indices[0:2]], x[indices[2:4]], x[indices[4:6]], x[indices[6:8]], x[4], x[5], x[6], x[7]}
//
// Indices is four 2-bit values packed into a byte, thus indices[0:2] is the first index.
in:
- &128lanes8
bits: 128
go: $t
elemBits: 16
- class: immediate
immOffset: 0
name: indices
hideMaskMethods: true
out:
- *128lanes8
- go: permuteScalarsLoGrouped
asm: VPSHUFLW
addDoc: !string |-
//
// result = {x_group0[indices[0:2]], x_group0[indices[2:4]], x_group0[indices[4:6]], x_group0[indices[6:8]], x[4], x[5], x[6], x[7],
// x_group1[indices[0:2]], ...}
//
// Indices is four 2-bit values packed into a byte, thus indices[0:2] is the first index.
// Each group is of size 128-bit.
in:
- &256Or512lanes8
bits: "256|512"
go: $t
elemBits: 16
- class: immediate
immOffset: 0
name: indices
hideMaskMethods: true
out:
- *256Or512lanes8
- go: permuteScalarsHi
asm: VPSHUFHW
addDoc: !string |-
//
// result = {x[0], x[1], x[2], x[3], x[indices[0:2]+4], x[indices[2:4]+4], x[indices[4:6]+4], x[indices[6:8]+4]}
//
// Indices is four 2-bit values packed into a byte, thus indices[0:2] is the first index.
in:
- *128lanes8
- class: immediate
immOffset: 0
name: indices
hideMaskMethods: true
out:
- *128lanes8
- go: permuteScalarsHiGrouped
asm: VPSHUFHW
addDoc: !string |-
// result =
//
// {x_group0[0], x_group0[1], x_group0[2], x_group0[3], x_group0[indices[0:2]+4], x_group0[indices[2:4]+4], x_group0[indices[4:6]+4], x_group0[indices[6:8]+4],
// x_group1[0], x_group1[1], x_group1[2], x_group1[3], x_group1[indices[0:2]+4], ...}
//
// Indices is four 2-bit values packed into a byte, thus indices[0:2] is the first index.
// Each group is of size 128-bit.
in:
- *256Or512lanes8
- class: immediate
immOffset: 0
name: indices
hideMaskMethods: true
out:
- *256Or512lanes8
- go: InterleaveHi
asm: VPUNPCKH(QDQ|DQ|WD|WB)
in:
- *128any
- *128any
inVariant: []
out:
- *128any
- go: InterleaveLo
asm: VPUNPCKL(QDQ|DQ|WD|WB)
in:
- *128any
- *128any
inVariant: []
out:
- *128any
- go: InterleaveHiGrouped
asm: VPUNPCKH(QDQ|DQ|WD|WB)
in:
- *256Or512any
- *256Or512any
inVariant: []
out:
- *256Or512any
- go: InterleaveLoGrouped
asm: VPUNPCKL(QDQ|DQ|WD|WB)
in:
- *256Or512any
- *256Or512any
inVariant: []
out:
- *256Or512any
# These are all described separately to carry the name of the constant parameter
- go: concatSelectedConstant
asm: VSHUFPS
width: 32
in:
- &v
go: $t
class: vreg
base: float
bits: 128
- *v
- class: immediate
immOffset: 0
name: h1h0l1l0
inVariant: []
out:
- *v
- go: concatSelectedConstant
asm: VSHUFPS
in:
- &v
go: $t
class: vreg
base: float
bits: 128
OverwriteBase: int
- *v
- class: immediate
immOffset: 0
name: h1h0l1l0
inVariant: []
out:
- *v
- go: concatSelectedConstant
asm: VSHUFPS
in:
- &v
go: $t
class: vreg
base: float
bits: 128
OverwriteBase: uint
- *v
- class: immediate
immOffset: 0
name: h1h0l1l0
inVariant: []
out:
- *v
- go: concatSelectedConstantGrouped
asm: VSHUFPS
in:
- &v
go: $t
class: vreg
base: float
bits: "256|512"
- *v
- class: immediate
immOffset: 0
name: h1h0l1l0
inVariant: []
out:
- *v
- go: concatSelectedConstantGrouped
asm: VSHUFPS
in:
- &v
go: $t
class: vreg
base: float
bits: "256|512"
OverwriteBase: int
- *v
- class: immediate
immOffset: 0
name: h1h0l1l0
inVariant: []
out:
- *v
- go: concatSelectedConstantGrouped
asm: VSHUFPS
in:
- &v
go: $t
class: vreg
base: float
bits: "256|512"
OverwriteBase: uint
- *v
- class: immediate
immOffset: 0
name: h1h0l1l0
inVariant: []
out:
- *v
# 64 bit versions
- go: concatSelectedConstant
asm: VSHUFPD
in:
- &v
go: $t
class: vreg
base: float
bits: 128
- *v
- class: immediate
immOffset: 0
name: hilo
inVariant: []
out:
- *v
- go: concatSelectedConstant
asm: VSHUFPD
in:
- &v
go: $t
class: vreg
base: float
bits: 128
OverwriteBase: int
- *v
- class: immediate
immOffset: 0
name: hilo
inVariant: []
out:
- *v
- go: concatSelectedConstant
asm: VSHUFPD
in:
- &v
go: $t
class: vreg
base: float
bits: 128
OverwriteBase: uint
- *v
- class: immediate
immOffset: 0
name: hilo
inVariant: []
out:
- *v
- go: concatSelectedConstantGrouped
asm: VSHUFPD
in:
- &v
go: $t
class: vreg
base: float
bits: "256|512"
- *v
- class: immediate
immOffset: 0
name: hilos
inVariant: []
out:
- *v
- go: concatSelectedConstantGrouped
asm: VSHUFPD
in:
- &v
go: $t
class: vreg
base: float
bits: "256|512"
OverwriteBase: int
- *v
- class: immediate
immOffset: 0
name: hilos
inVariant: []
out:
- *v
- go: concatSelectedConstantGrouped
asm: VSHUFPD
in:
- &v
go: $t
class: vreg
base: float
bits: "256|512"
OverwriteBase: uint
- *v
- class: immediate
immOffset: 0
name: hilos
inVariant: []
out:
- *v
- go: Select128FromPair
asm: VPERM2F128
operandOrder: II
addDoc: !string |-
// For example,
//
// {40, 41, 50, 51}.NAME(3, 0, {60, 61, 70, 71})
//
// returns {70, 71, 40, 41}.
in:
- &v
go: $t
class: vreg
base: float
bits: 256
- *v
- class: immediate
immOffset: 0
name: "lo, hi"
inVariant: []
out:
- *v
- go: Select128FromPair
asm: VPERM2F128
operandOrder: II
addDoc: !string |-
// For example,
//
// {40, 41, 42, 43, 50, 51, 52, 53}.NAME(3, 0, {60, 61, 62, 63, 70, 71, 72, 73})
//
// returns {70, 71, 72, 73, 40, 41, 42, 43}.
in:
- &v
go: $t
class: vreg
base: float
bits: 256
OverwriteElementBits: 32
- *v
- class: immediate
immOffset: 0
name: "lo, hi"
inVariant: []
out:
- *v
- go: Select128FromPair
asm: VPERM2I128
operandOrder: II
addDoc: !string |-
// For example,
//
// {40, 41, 50, 51}.NAME(3, 0, {60, 61, 70, 71})
//
// returns {70, 71, 40, 41}.
in:
- &v
go: $t
class: vreg
base: int|uint
bits: 256
OverwriteElementBits: 64
- *v
- class: immediate
immOffset: 0
name: "lo, hi"
inVariant: []
out:
- *v
- go: Select128FromPair
asm: VPERM2I128
operandOrder: II
addDoc: !string |-
// For example,
//
// {40, 41, 42, 43, 50, 51, 52, 53}.NAME(3, 0, {60, 61, 62, 63, 70, 71, 72, 73})
//
// returns {70, 71, 72, 73, 40, 41, 42, 43}.
in:
- &v
go: $t
class: vreg
base: int|uint
bits: 256
OverwriteElementBits: 32
- *v
- class: immediate
immOffset: 0
name: "lo, hi"
inVariant: []
out:
- *v
- go: Select128FromPair
asm: VPERM2I128
operandOrder: II
addDoc: !string |-
// For example,
//
// {40, 41, 42, 43, 44, 45, 46, 47, 50, 51, 52, 53, 54, 55, 56, 57}.NAME(3, 0,
// {60, 61, 62, 63, 64, 65, 66, 67, 70, 71, 72, 73, 74, 75, 76, 77})
//
// returns {70, 71, 72, 73, 74, 75, 76, 77, 40, 41, 42, 43, 44, 45, 46, 47}.
in:
- &v
go: $t
class: vreg
base: int|uint
bits: 256
OverwriteElementBits: 16
- *v
- class: immediate
immOffset: 0
name: "lo, hi"
inVariant: []
out:
- *v
- go: Select128FromPair
asm: VPERM2I128
operandOrder: II
addDoc: !string |-
// For example,
//
// {0x40, 0x41, ..., 0x4f, 0x50, 0x51, ..., 0x5f}.NAME(3, 0,
// {0x60, 0x61, ..., 0x6f, 0x70, 0x71, ..., 0x7f})
//
// returns {0x70, 0x71, ..., 0x7f, 0x40, 0x41, ..., 0x4f}.
in:
- &v
go: $t
class: vreg
base: int|uint
bits: 256
OverwriteElementBits: 8
- *v
- class: immediate
immOffset: 0
name: "lo, hi"
inVariant: []
out:
- *v
- go: ConcatShiftBytesRight
asm: VPALIGNR
in:
- &uint128
go: $t
base: uint
bits: 128
- *uint128
- class: immediate
immOffset: 0
name: shift
out:
- *uint128
- go: ConcatShiftBytesRightGrouped
asm: VPALIGNR
in:
- &uint256512
go: $t
base: uint
bits: 256|512
- *uint256512
- class: immediate
immOffset: 0
name: shift
out:
- *uint256512