internal/simdgen: add galois field instructions

This CL generates CL 684175.

Change-Id: I1b327fd1d3d3aa15cd23523371f186ceef37db76
Reviewed-on: https://go-review.googlesource.com/c/arch/+/684155
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: David Chase <drchase@google.com>
diff --git a/internal/simdgen/categories.yaml b/internal/simdgen/categories.yaml
index b349fc5..4b21d5a 100644
--- a/internal/simdgen/categories.yaml
+++ b/internal/simdgen/categories.yaml
@@ -455,6 +455,32 @@
   extension: "AVX.*"
   documentation: !string |-
     // AddSub subtracts even elements and adds odd elements of two vectors.
+- go: MaskedGaloisFieldAffineTransform
+  masked: "true"
+  commutative: "false"
+  extension: "AVX.*"
+  documentation: !string |-
+    // GaloisFieldAffineTransform computes an affine transformation in GF(2^8):
+    // x is a vector of 8-bit vectors, with each adjacent 8 as a group; y is a vector of 8x8 1-bit matrixes;
+    // imm is an 8-bit vector. The affine transformation is y * x + imm, with each element of y
+    // corresponding to a group of 8 elements in x.
+- go: MaskedGaloisFieldAffineTransformInversed
+  masked: "true"
+  commutative: "false"
+  extension: "AVX.*"
+  documentation: !string |-
+    // GaloisFieldAffineTransform computes an affine transformation in GF(2^8),
+    // with x inversed with reduction polynomial x^8 + x^4 + x^3 + x + 1:
+    // x is a vector of 8-bit vectors, with each adjacent 8 as a group; y is a vector of 8x8 1-bit matrixes;
+    // imm is an 8-bit vector. The affine transformation is y * x + imm, with each element of y
+    // corresponding to a group of 8 elements in x.
+- go: MaskedGaloisFieldMul
+  masked: "true"
+  commutative: "false"
+  extension: "AVX.*"
+  documentation: !string |-
+    // GaloisFieldMul computes element-wise GF(2^8) multiplication with
+    // reduction polynomial x^8 + x^4 + x^3 + x + 1.
 - go: Average
   commutative: "true"
   extension: "AVX.*" # VPAVGB/W are available across various AVX versions
diff --git a/internal/simdgen/go.yaml b/internal/simdgen/go.yaml
index 52fef3b..2d1038d 100644
--- a/internal/simdgen/go.yaml
+++ b/internal/simdgen/go.yaml
@@ -431,6 +431,36 @@
   - *fp
   out:
   - *fp
+- go: MaskedGaloisFieldAffineTransform
+  asm: VGF2P8AFFINEQB
+  in: &AffineArgs
+  - class: mask
+  - &uint8
+    go: $t
+    base: uint
+  - &uint8x8
+    go: $t2
+    base: uint
+  - &pureImmVar
+    class: immediate
+    immOffset: 0
+  out:
+  - *uint8
+
+- go: MaskedGaloisFieldAffineTransformInversed
+  asm: VGF2P8AFFINEINVQB
+  in: *AffineArgs
+  out:
+  - *uint8
+
+- go: MaskedGaloisFieldMul
+  asm: VGF2P8MULB
+  in:
+  - class: mask
+  - *uint8
+  - *uint8
+  out:
+  - *uint8
 # Average (unsigned byte, unsigned word)
 # Instructions: VPAVGB, VPAVGW
 - go: Average
diff --git a/internal/simdgen/ops/GaloisField/categories.yaml b/internal/simdgen/ops/GaloisField/categories.yaml
new file mode 100644
index 0000000..915d3ec
--- /dev/null
+++ b/internal/simdgen/ops/GaloisField/categories.yaml
@@ -0,0 +1,27 @@
+!sum
+- go: MaskedGaloisFieldAffineTransform
+  masked: "true"
+  commutative: "false"
+  extension: "AVX.*"
+  documentation: !string |-
+    // GaloisFieldAffineTransform computes an affine transformation in GF(2^8):
+    // x is a vector of 8-bit vectors, with each adjacent 8 as a group; y is a vector of 8x8 1-bit matrixes;
+    // imm is an 8-bit vector. The affine transformation is y * x + imm, with each element of y
+    // corresponding to a group of 8 elements in x.
+- go: MaskedGaloisFieldAffineTransformInversed
+  masked: "true"
+  commutative: "false"
+  extension: "AVX.*"
+  documentation: !string |-
+    // GaloisFieldAffineTransform computes an affine transformation in GF(2^8),
+    // with x inversed with reduction polynomial x^8 + x^4 + x^3 + x + 1:
+    // x is a vector of 8-bit vectors, with each adjacent 8 as a group; y is a vector of 8x8 1-bit matrixes;
+    // imm is an 8-bit vector. The affine transformation is y * x + imm, with each element of y
+    // corresponding to a group of 8 elements in x.
+- go: MaskedGaloisFieldMul
+  masked: "true"
+  commutative: "false"
+  extension: "AVX.*"
+  documentation: !string |-
+    // GaloisFieldMul computes element-wise GF(2^8) multiplication with
+    // reduction polynomial x^8 + x^4 + x^3 + x + 1.
\ No newline at end of file
diff --git a/internal/simdgen/ops/GaloisField/go.yaml b/internal/simdgen/ops/GaloisField/go.yaml
new file mode 100644
index 0000000..9008ab2
--- /dev/null
+++ b/internal/simdgen/ops/GaloisField/go.yaml
@@ -0,0 +1,31 @@
+!sum
+- go: MaskedGaloisFieldAffineTransform
+  asm: VGF2P8AFFINEQB
+  in: &AffineArgs
+  - class: mask
+  - &uint8
+    go: $t
+    base: uint
+  - &uint8x8
+    go: $t2
+    base: uint
+  - &pureImmVar
+    class: immediate
+    immOffset: 0
+  out:
+  - *uint8
+
+- go: MaskedGaloisFieldAffineTransformInversed
+  asm: VGF2P8AFFINEINVQB
+  in: *AffineArgs
+  out:
+  - *uint8
+
+- go: MaskedGaloisFieldMul
+  asm: VGF2P8MULB
+  in:
+  - class: mask
+  - *uint8
+  - *uint8
+  out:
+  - *uint8
\ No newline at end of file