src/simd/archsimd/_gen/simdgen/ops/BitwiseLogic/go.yaml - go - Git at Google

 !sum
 # In the XED data, *all* floating point bitwise logic operation has their
 # operand type marked as uint. We are not trying to understand why Intel
 # decided that they want FP bit-wise logic operations, but this irregularity
 # has to be dealed with in separate rules with some overwrites.

 # For many bit-wise operations, we have the following non-orthogonal
 # choices:
 #
 # - Non-masked AVX operations have no element width (because it
 # doesn't matter), but only cover 128 and 256 bit vectors.
 #
 # - Masked AVX-512 operations have an element width (because it needs
 # to know how to interpret the mask), and cover 128, 256, and 512 bit
 # vectors. These only cover 32- and 64-bit element widths.
 #
 # - Non-masked AVX-512 operations still have an element width (because
 # they're just the masked operations with an implicit K0 mask) but it
 # doesn't matter! This is the only option for non-masked 512 bit
 # operations, and we can pick any of the element widths.
 #
 # We unify with ALL of these operations and the compiler generator
 # picks when there are multiple options.

 # TODO: We don't currently generate unmasked bit-wise operations on 512 bit
 # vectors of 8- or 16-bit elements. AVX-512 only has *masked* bit-wise
 # operations for 32- and 64-bit elements; while the element width doesn't matter
 # for unmasked operations, right now we don't realize that we can just use the
 # 32- or 64-bit version for the unmasked form. Maybe in the XED decoder we
 # should recognize bit-wise operations when generating unmasked versions and
 # omit the element width.

 # For binary operations, we constrain their two inputs and one output to the
 # same Go type using a variable.

 - go: And
   asm: "VPAND[DQ]?"
   in:
   - &any
     go: $t
   - *any
   out:
   - *any

 - go: And
   asm: "VPANDD" # Fill in the gap, And is missing for Uint8x64 and Int8x64
   inVariant: []
   in: &twoI8x64
   - &i8x64
     go: $t
     overwriteElementBits: 8
   - *i8x64
   out: &oneI8x64
   - *i8x64

 - go: And
   asm: "VPANDD" # Fill in the gap, And is missing for Uint16x32 and Int16x32
   inVariant: []
   in: &twoI16x32
   - &i16x32
     go: $t
     overwriteElementBits: 16
   - *i16x32
   out: &oneI16x32
   - *i16x32

 - go: AndNot
   asm: "VPANDN[DQ]?"
   operandOrder: "21" # switch the arg order
   in:
   - *any
   - *any
   out:
   - *any

 - go: AndNot
   asm: "VPANDND" # Fill in the gap, AndNot is missing for Uint8x64 and Int8x64
   operandOrder: "21" # switch the arg order
   inVariant: []
   in: *twoI8x64
   out: *oneI8x64

 - go: AndNot
   asm: "VPANDND" # Fill in the gap, AndNot is missing for Uint16x32 and Int16x32
   operandOrder: "21" # switch the arg order
   inVariant: []
   in: *twoI16x32
   out: *oneI16x32

 - go: Or
   asm: "VPOR[DQ]?"
   in:
   - *any
   - *any
   out:
   - *any

 - go: Or
   asm: "VPORD" # Fill in the gap, Or is missing for Uint8x64 and Int8x64
   inVariant: []
   in: *twoI8x64
   out: *oneI8x64

 - go: Or
   asm: "VPORD" # Fill in the gap, Or is missing for Uint16x32 and Int16x32
   inVariant: []
   in: *twoI16x32
   out: *oneI16x32

 - go: Xor
   asm: "VPXOR[DQ]?"
   in:
   - *any
   - *any
   out:
   - *any

 - go: Xor
   asm: "VPXORD" # Fill in the gap, Or is missing for Uint8x64 and Int8x64
   inVariant: []
   in: *twoI8x64
   out: *oneI8x64

 - go: Xor
   asm: "VPXORD" # Fill in the gap, Or is missing for Uint16x32 and Int16x32
   inVariant: []
   in: *twoI16x32
   out: *oneI16x32

 - go: tern
   asm: "VPTERNLOGD|VPTERNLOGQ"
   in:
   - &tern_op
     go: $t
   - *tern_op
   - *tern_op
   - class: immediate
     immOffset: 0
     name: table
   inVariant: []
   out:
   - *tern_op
	!sum
	# In the XED data, all floating point bitwise logic operation has their
	# operand type marked as uint. We are not trying to understand why Intel
	# decided that they want FP bit-wise logic operations, but this irregularity
	# has to be dealed with in separate rules with some overwrites.

	# For many bit-wise operations, we have the following non-orthogonal
	# choices:
	#
	# - Non-masked AVX operations have no element width (because it
	# doesn't matter), but only cover 128 and 256 bit vectors.
	#
	# - Masked AVX-512 operations have an element width (because it needs
	# to know how to interpret the mask), and cover 128, 256, and 512 bit
	# vectors. These only cover 32- and 64-bit element widths.
	#
	# - Non-masked AVX-512 operations still have an element width (because
	# they're just the masked operations with an implicit K0 mask) but it
	# doesn't matter! This is the only option for non-masked 512 bit
	# operations, and we can pick any of the element widths.
	#
	# We unify with ALL of these operations and the compiler generator
	# picks when there are multiple options.

	# TODO: We don't currently generate unmasked bit-wise operations on 512 bit
	# vectors of 8- or 16-bit elements. AVX-512 only has masked bit-wise
	# operations for 32- and 64-bit elements; while the element width doesn't matter
	# for unmasked operations, right now we don't realize that we can just use the
	# 32- or 64-bit version for the unmasked form. Maybe in the XED decoder we
	# should recognize bit-wise operations when generating unmasked versions and
	# omit the element width.

	# For binary operations, we constrain their two inputs and one output to the
	# same Go type using a variable.

	- go: And
	asm: "VPAND[DQ]?"
	in:
	- &any
	go: $t
	- *any
	out:
	- *any

	- go: And
	asm: "VPANDD" # Fill in the gap, And is missing for Uint8x64 and Int8x64
	inVariant: []
	in: &twoI8x64
	- &i8x64
	go: $t
	overwriteElementBits: 8
	- *i8x64
	out: &oneI8x64
	- *i8x64

	- go: And
	asm: "VPANDD" # Fill in the gap, And is missing for Uint16x32 and Int16x32
	inVariant: []
	in: &twoI16x32
	- &i16x32
	go: $t
	overwriteElementBits: 16
	- *i16x32
	out: &oneI16x32
	- *i16x32

	- go: AndNot
	asm: "VPANDN[DQ]?"
	operandOrder: "21" # switch the arg order
	in:
	- *any
	- *any
	out:
	- *any

	- go: AndNot
	asm: "VPANDND" # Fill in the gap, AndNot is missing for Uint8x64 and Int8x64
	operandOrder: "21" # switch the arg order
	inVariant: []
	in: *twoI8x64
	out: *oneI8x64

	- go: AndNot
	asm: "VPANDND" # Fill in the gap, AndNot is missing for Uint16x32 and Int16x32
	operandOrder: "21" # switch the arg order
	inVariant: []
	in: *twoI16x32
	out: *oneI16x32

	- go: Or
	asm: "VPOR[DQ]?"
	in:
	- *any
	- *any
	out:
	- *any

	- go: Or
	asm: "VPORD" # Fill in the gap, Or is missing for Uint8x64 and Int8x64
	inVariant: []
	in: *twoI8x64
	out: *oneI8x64

	- go: Or
	asm: "VPORD" # Fill in the gap, Or is missing for Uint16x32 and Int16x32
	inVariant: []
	in: *twoI16x32
	out: *oneI16x32

	- go: Xor
	asm: "VPXOR[DQ]?"
	in:
	- *any
	- *any
	out:
	- *any

	- go: Xor
	asm: "VPXORD" # Fill in the gap, Or is missing for Uint8x64 and Int8x64
	inVariant: []
	in: *twoI8x64
	out: *oneI8x64

	- go: Xor
	asm: "VPXORD" # Fill in the gap, Or is missing for Uint16x32 and Int16x32
	inVariant: []
	in: *twoI16x32
	out: *oneI16x32

	- go: tern
	asm: "VPTERNLOGD\|VPTERNLOGQ"
	in:
	- &tern_op
	go: $t
	- *tern_op
	- *tern_op
	- class: immediate
	immOffset: 0
	name: table
	inVariant: []
	out:
	- *tern_op