x86: move rsc.io/x86 here

This code was reviewed on its way into rsc.io/x86:

    https://codereview.appspot.com/120710043
    https://codereview.appspot.com/94500043
    https://codereview.appspot.com/95350044
    https://codereview.appspot.com/95300044
    https://codereview.appspot.com/97100047
    https://codereview.appspot.com/93110044
    https://codereview.appspot.com/91170044
    https://codereview.appspot.com/99000043
    https://codereview.appspot.com/98990043
    https://codereview.appspot.com/100180044

Change-Id: Ie69db9b29ec400f663550158b163ec92a00725b4
Reviewed-on: https://go-review.googlesource.com/13972
Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>
Reviewed-by: Ian Lance Taylor <iant@golang.org>
diff --git a/x86/x86.csv b/x86/x86.csv
new file mode 100644
index 0000000..4755b35
--- /dev/null
+++ b/x86/x86.csv
@@ -0,0 +1,2355 @@
+# x86 instruction set description version 0.01.
+# https://golang.org/x/arch/x86
+#
+# This file contains a block of comment lines, each beginning with #,
+# followed by entries in CSV format. All the # comments are at the top
+# of the file, so a reader can skip past the comments and hand the
+# rest of the file to a standard CSV reader.
+#
+# Each line in the CSV section contains 6 fields:
+#
+#	mnemonic encoding valid-32 valid-64 feature tags
+#
+# The mnemonic, encoding, valid-32, valid-64, and feature columns
+# correspond exactly to the typical table format used to describe
+# an instruction in the Intel x86-64 manual. Note that compared
+# to the manual pages, valid-32 and valid-64 have been swapped.
+# Consult the manual for details about the meaning of these fields [1].
+#
+# The tags column contains additional comma-separated tags with information
+# about the instructions not gleaned from the manual. The known tags are:
+#
+#	pseudo, pseudo64 - this is a pseudo-instruction, redundant with some
+#	  other instruction in the list; this one should be ignored
+#	  for the purposes of decoding.
+#
+#	operand16, operand32, operand64 - this instruction interpretation
+#	  is only valid in 16-bit/32-bit/64-bit operand mode.
+#
+#	address16, address32, address64 - this instruction interpretation
+#	  is only valid in 16-bit/32-bit/64-bit address mode
+#
+#	modrm_regonly - this instruction interpretation
+#	  is only valid if the modrm r/m field denotes a register.
+#
+#	modrm_memonly - this instruction interpretation
+#	  is only valid if the modrm r/m field denotes a memory reference.
+#
+# This file was generated by a program reading the PDF version of
+# the manual, but it was then hand edited to make corrections and
+# add the tags. The eventual plan is for the generator to write the
+# file directly, using the PDF and a list of modifications, but
+# no hand editing. Then, when a new version of the manual comes out,
+# updating the list should be as easy as downloading the new PDF
+# and invoking the generator. However, we are not there yet.
+#
+# [1] In the February 2014 PDF manual at
+# http://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-manual-325462.pdf
+# the instruction format is described in Volume 2, Section 3.1.1, starting on page 487.
+#
+"AAA","37","V","I","",""
+"AAD imm8u","D5 ib","V","I","",""
+"AAD","D5 0A","V","I","","pseudo"
+"AAM imm8u","D4 ib","V","I","",""
+"AAM","D4 0A","V","I","","pseudo"
+"AAS","3F","V","I","",""
+"ADC AL, imm8u","14 ib","V","V","",""
+"ADC AX, imm16","15 iw","V","V","","operand16"
+"ADC EAX, imm32","15 id","V","V","","operand32"
+"ADC RAX, imm32","REX.W + 15 id","N.E.","V","",""
+"ADC r/m16, imm16","81 /2 iw","V","V","","operand16"
+"ADC r/m16, imm8","83 /2 ib","V","V","","operand16"
+"ADC r/m16, r16","11 /r","V","V","","operand16"
+"ADC r/m32, imm32","81 /2 id","V","V","","operand32"
+"ADC r/m32, imm8","83 /2 ib","V","V","","operand32"
+"ADC r/m32, r32","11 /r","V","V","","operand32"
+"ADC r/m64, imm32","REX.W + 81 /2 id","N.E.","V","",""
+"ADC r/m64, imm8","REX.W + 83 /2 ib","N.E.","V","",""
+"ADC r/m64, r64","REX.W + 11 /r","N.E.","V","",""
+"ADC r/m8, imm8u","80 /2 ib","V","V","",""
+"ADC r/m8, imm8u","REX + 80 /2 ib","N.E.","V","","pseudo64"
+"ADC r/m8, r8","10 /r","V","V","",""
+"ADC r/m8, r8","REX + 10 /r","N.E.","V","","pseudo64"
+"ADC r16, r/m16","13 /r","V","V","","operand16"
+"ADC r32, r/m32","13 /r","V","V","","operand32"
+"ADC r64, r/m64","REX.W + 13 /r","N.E.","V","",""
+"ADC r8, r/m8","12 /r","V","V","",""
+"ADC r8, r/m8","REX + 12 /r","N.E.","V","","pseudo64"
+"ADD AL, imm8u","04 ib","V","V","",""
+"ADD AX, imm16","05 iw","V","V","","operand16"
+"ADD EAX, imm32","05 id","V","V","","operand32"
+"ADD RAX, imm32","REX.W + 05 id","N.E.","V","",""
+"ADD r/m16, imm16","81 /0 iw","V","V","","operand16"
+"ADD r/m16, imm8","83 /0 ib","V","V","","operand16"
+"ADD r/m16, r16","01 /r","V","V","","operand16"
+"ADD r/m32, imm32","81 /0 id","V","V","","operand32"
+"ADD r/m32, imm8","83 /0 ib","V","V","","operand32"
+"ADD r/m32, r32","01 /r","V","V","","operand32"
+"ADD r/m64, imm32","REX.W + 81 /0 id","N.E.","V","",""
+"ADD r/m64, imm8","REX.W + 83 /0 ib","N.E.","V","",""
+"ADD r/m64, r64","REX.W + 01 /r","N.E.","V","",""
+"ADD r/m8, imm8u","80 /0 ib","V","V","",""
+"ADD r/m8, imm8u","REX + 80 /0 ib","N.E.","V","","pseudo64"
+"ADD r/m8, r8","00 /r","V","V","",""
+"ADD r/m8, r8","REX + 00 /r","N.E.","V","","pseudo64"
+"ADD r16, r/m16","03 /r","V","V","","operand16"
+"ADD r32, r/m32","03 /r","V","V","","operand32"
+"ADD r64, r/m64","REX.W + 03 /r","N.E.","V","",""
+"ADD r8, r/m8","02 /r","V","V","",""
+"ADD r8, r/m8","REX + 02 /r","N.E.","V","","pseudo64"
+"ADDPD xmm1, xmm2/m128","66 0F 58 /r","V","V","SSE2",""
+"ADDPS xmm1, xmm2/m128","0F 58 /r","V","V","SSE",""
+"ADDSD xmm1, xmm2/m64","F2 0F 58 /r","V","V","SSE2",""
+"ADDSS xmm1, xmm2/m32","F3 0F 58 /r","V","V","SSE",""
+"ADDSUBPD xmm1, xmm2/m128","66 0F D0 /r","V","V","SSE3",""
+"ADDSUBPS xmm1, xmm2/m128","F2 0F D0 /r","V","V","SSE3",""
+"AESDEC xmm1, xmm2/m128","66 0F 38 DE /r","V","V","AES",""
+"AESDECLAST xmm1, xmm2/m128","66 0F 38 DF /r","V","V","AES",""
+"AESENC xmm1, xmm2/m128","66 0F 38 DC /r","V","V","AES",""
+"AESENCLAST xmm1, xmm2/m128","66 0F 38 DD /r","V","V","AES",""
+"AESIMC xmm1, xmm2/m128","66 0F 38 DB /r","V","V","AES",""
+"AESKEYGENASSIST xmm1, xmm2/m128, imm8u","66 0F 3A DF /r ib","V","V","AES",""
+"AND AL, imm8u","24 ib","V","V","",""
+"AND AX, imm16","25 iw","V","V","","operand16"
+"AND EAX, imm32","25 id","V","V","","operand32"
+"AND RAX, imm32","REX.W + 25 id","N.E.","V","",""
+"AND r/m16, imm16","81 /4 iw","V","V","","operand16"
+"AND r/m16, imm8","83 /4 ib","V","V","","operand16"
+"AND r/m16, r16","21 /r","V","V","","operand16"
+"AND r/m32, imm32","81 /4 id","V","V","","operand32"
+"AND r/m32, imm8","83 /4 ib","V","V","","operand32"
+"AND r/m32, r32","21 /r","V","V","","operand32"
+"AND r/m64, imm32","REX.W + 81 /4 id","N.E.","V","",""
+"AND r/m64, imm8","REX.W + 83 /4 ib","N.E.","V","",""
+"AND r/m64, r64","REX.W + 21 /r","N.E.","V","",""
+"AND r/m8, imm8u","80 /4 ib","V","V","",""
+"AND r/m8, imm8u","REX + 80 /4 ib","N.E.","V","","pseudo64"
+"AND r/m8, r8","20 /r","V","V","",""
+"AND r/m8, r8","REX + 20 /r","N.E.","V","","pseudo64"
+"AND r16, r/m16","23 /r","V","V","","operand16"
+"AND r32, r/m32","23 /r","V","V","","operand32"
+"AND r64, r/m64","REX.W + 23 /r","N.E.","V","",""
+"AND r8, r/m8","22 /r","V","V","",""
+"AND r8, r/m8","REX + 22 /r","N.E.","V","","pseudo64"
+"ANDN r32a, r32b, r/m32","VEX.NDS.LZ.0F38.W0 F2 /r","V","V","BMI1",""
+"ANDN r64a, r64b, r/m64","VEX.NDS.LZ.0F38.W1 F2 /r","N.E.","V","BMI1",""
+"ANDNPD xmm1, xmm2/m128","66 0F 55 /r","V","V","SSE2",""
+"ANDNPS xmm1, xmm2/m128","0F 55 /r","V","V","SSE",""
+"ANDPD xmm1, xmm2/m128","66 0F 54 /r","V","V","SSE2",""
+"ANDPS xmm1, xmm2/m128","0F 54 /r","V","V","SSE",""
+"ARPL r/m16, r16","63 /r","V","N.E.","",""
+"BEXTR r32a, r/m32, r32b","VEX.NDS.LZ.0F38.W0 F7 /r","V","V","BMI1",""
+"BEXTR r64a, r/m64, r64b","VEX.NDS.LZ.0F38.W1 F7 /r","N.E.","V","BMI1",""
+"BLENDPD xmm1, xmm2/m128, imm8u","66 0F 3A 0D /r ib","V","V","SSE4_1",""
+"BLENDPS xmm1, xmm2/m128, imm8u","66 0F 3A 0C /r ib","V","V","SSE4_1",""
+"BLENDVPD xmm1, xmm2/m128, <XMM0>","66 0F 38 15 /r","V","V","SSE4_1",""
+"BLENDVPS xmm1, xmm2/m128, <XMM0>","66 0F 38 14 /r","V","V","SSE4_1",""
+"BLSI r32, r/m32","VEX.NDD.LZ.0F38.W0 F3 /3","V","V","BMI1",""
+"BLSI r64, r/m64","VEX.NDD.LZ.0F38.W1 F3 /3","N.E.","V","BMI1",""
+"BLSMSK r32, r/m32","VEX.NDD.LZ.0F38.W0 F3 /2","V","V","BMI1",""
+"BLSMSK r64, r/m64","VEX.NDD.LZ.0F38.W1 F3 /2","N.E.","V","BMI1",""
+"BLSR r32, r/m32","VEX.NDD.LZ.0F38.W0 F3 /1","V","V","BMI1",""
+"BLSR r64, r/m64","VEX.NDD.LZ.0F38.W1 F3 /1","N.E.","V","BMI1",""
+"BOUND r16, m16&16","62 /r","V","I","","operand16"
+"BOUND r32, m32&32","62 /r","V","I","","operand32"
+"BSF r16, r/m16","0F BC /r","V","V","","operand16"
+"BSF r32, r/m32","0F BC /r","V","V","","operand32"
+"BSF r64, r/m64","REX.W + 0F BC /r","N.E.","V","",""
+"BSR r16, r/m16","0F BD /r","V","V","","operand16"
+"BSR r32, r/m32","0F BD /r","V","V","","operand32"
+"BSR r64, r/m64","REX.W + 0F BD /r","N.E.","V","",""
+"BSWAP r16op","0F C8+rd","V","V","","operand16"
+"BSWAP r32op","0F C8+rd","V","V","","operand32"
+"BSWAP r64op","REX.W + 0F C8+rd","N.E.","V","","operand64"
+"BT r/m16, imm8u","0F BA /4 ib","V","V","","operand16"
+"BT r/m16, r16","0F A3 /r","V","V","","operand16"
+"BT r/m32, imm8u","0F BA /4 ib","V","V","","operand32"
+"BT r/m32, r32","0F A3 /r","V","V","","operand32"
+"BT r/m64, imm8u","REX.W + 0F BA /4 ib","N.E.","V","",""
+"BT r/m64, r64","REX.W + 0F A3 /r","N.E.","V","",""
+"BTC r/m16, imm8u","0F BA /7 ib","V","V","","operand16"
+"BTC r/m16, r16","0F BB /r","V","V","","operand16"
+"BTC r/m32, imm8u","0F BA /7 ib","V","V","","operand32"
+"BTC r/m32, r32","0F BB /r","V","V","","operand32"
+"BTC r/m64, imm8u","REX.W + 0F BA /7 ib","N.E.","V","",""
+"BTC r/m64, r64","REX.W + 0F BB /r","N.E.","V","",""
+"BTR r/m16, imm8u","0F BA /6 ib","V","V","","operand16"
+"BTR r/m16, r16","0F B3 /r","V","V","","operand16"
+"BTR r/m32, imm8u","0F BA /6 ib","V","V","","operand32"
+"BTR r/m32, r32","0F B3 /r","V","V","","operand32"
+"BTR r/m64, imm8u","REX.W + 0F BA /6 ib","N.E.","V","",""
+"BTR r/m64, r64","REX.W + 0F B3 /r","N.E.","V","",""
+"BTS r/m16, imm8u","0F BA /5 ib","V","V","","operand16"
+"BTS r/m16, r16","0F AB /r","V","V","","operand16"
+"BTS r/m32, imm8u","0F BA /5 ib","V","V","","operand32"
+"BTS r/m32, r32","0F AB /r","V","V","","operand32"
+"BTS r/m64, imm8u","REX.W + 0F BA /5 ib","N.E.","V","",""
+"BTS r/m64, r64","REX.W + 0F AB /r","N.E.","V","",""
+"BZHI r32a, r/m32, r32b","VEX.NDS.LZ.0F38.W0 F5 /r","V","V","BMI2",""
+"BZHI r64a, r/m64, r64b","VEX.NDS.LZ.0F38.W1 F5 /r","N.E.","V","BMI2",""
+"CALL r/m16","FF /2","V","N.E.","","operand16"
+"CALL r/m32","FF /2","V","N.E.","","operand32"
+"CALL r/m64","FF /2","N.E.","V","",""
+"CALL rel16","E8 cw","V","N.S.","","operand16"
+"CALL rel32","E8 cd","V","V","","operand32"
+"CALL rel32","E8 cd","N.S.","V","","operand16,operand64"
+"CBW","98","V","V","","operand16"
+"CDQ","99","V","V","","operand32"
+"CDQE","REX.W + 98","N.E.","V","",""
+"CLC","F8","V","V","",""
+"CLD","FC","V","V","",""
+"CLFLUSH m8","0F AE /7","V","V","",""
+"CLI","FA","V","V","",""
+"CLTS","0F 06","V","V","",""
+"CMC","F5","V","V","",""
+"CMOVA r16, r/m16","0F 47 /r","V","V","","operand16"
+"CMOVA r32, r/m32","0F 47 /r","V","V","","operand32"
+"CMOVA r64, r/m64","REX.W + 0F 47 /r","N.E.","V","",""
+"CMOVAE r16, r/m16","0F 43 /r","V","V","","operand16"
+"CMOVAE r32, r/m32","0F 43 /r","V","V","","operand32"
+"CMOVAE r64, r/m64","REX.W + 0F 43 /r","N.E.","V","",""
+"CMOVB r16, r/m16","0F 42 /r","V","V","","operand16"
+"CMOVB r32, r/m32","0F 42 /r","V","V","","operand32"
+"CMOVB r64, r/m64","REX.W + 0F 42 /r","N.E.","V","",""
+"CMOVBE r16, r/m16","0F 46 /r","V","V","","operand16"
+"CMOVBE r32, r/m32","0F 46 /r","V","V","","operand32"
+"CMOVBE r64, r/m64","REX.W + 0F 46 /r","N.E.","V","",""
+"CMOVC r16, r/m16","0F 42 /r","V","V","","pseudo,operand16"
+"CMOVC r32, r/m32","0F 42 /r","V","V","","pseudo,operand32"
+"CMOVC r64, r/m64","REX.W + 0F 42 /r","N.E.","V","","pseudo,"
+"CMOVE r16, r/m16","0F 44 /r","V","V","","operand16"
+"CMOVE r32, r/m32","0F 44 /r","V","V","","operand32"
+"CMOVE r64, r/m64","REX.W + 0F 44 /r","N.E.","V","",""
+"CMOVG r16, r/m16","0F 4F /r","V","V","","operand16"
+"CMOVG r32, r/m32","0F 4F /r","V","V","","operand32"
+"CMOVG r64, r/m64","REX.W + 0F 4F /r","N.E.","V","",""
+"CMOVGE r16, r/m16","0F 4D /r","V","V","","operand16"
+"CMOVGE r32, r/m32","0F 4D /r","V","V","","operand32"
+"CMOVGE r64, r/m64","REX.W + 0F 4D /r","N.E.","V","",""
+"CMOVL r16, r/m16","0F 4C /r","V","V","","operand16"
+"CMOVL r32, r/m32","0F 4C /r","V","V","","operand32"
+"CMOVL r64, r/m64","REX.W + 0F 4C /r","N.E.","V","",""
+"CMOVLE r16, r/m16","0F 4E /r","V","V","","operand16"
+"CMOVLE r32, r/m32","0F 4E /r","V","V","","operand32"
+"CMOVLE r64, r/m64","REX.W + 0F 4E /r","N.E.","V","",""
+"CMOVNA r16, r/m16","0F 46 /r","V","V","","pseudo,operand16"
+"CMOVNA r32, r/m32","0F 46 /r","V","V","","pseudo,operand32"
+"CMOVNA r64, r/m64","REX.W + 0F 46 /r","N.E.","V","","pseudo,"
+"CMOVNAE r16, r/m16","0F 42 /r","V","V","","pseudo,operand16"
+"CMOVNAE r32, r/m32","0F 42 /r","V","V","","pseudo,operand32"
+"CMOVNAE r64, r/m64","REX.W + 0F 42 /r","N.E.","V","","pseudo,"
+"CMOVNB r16, r/m16","0F 43 /r","V","V","","pseudo,operand16"
+"CMOVNB r32, r/m32","0F 43 /r","V","V","","pseudo,operand32"
+"CMOVNB r64, r/m64","REX.W + 0F 43 /r","N.E.","V","","pseudo,"
+"CMOVNBE r16, r/m16","0F 47 /r","V","V","","pseudo,operand16"
+"CMOVNBE r32, r/m32","0F 47 /r","V","V","","pseudo,operand32"
+"CMOVNBE r64, r/m64","REX.W + 0F 47 /r","N.E.","V","","pseudo,"
+"CMOVNC r16, r/m16","0F 43 /r","V","V","","pseudo,operand16"
+"CMOVNC r32, r/m32","0F 43 /r","V","V","","pseudo,operand32"
+"CMOVNC r64, r/m64","REX.W + 0F 43 /r","N.E.","V","","pseudo,"
+"CMOVNE r16, r/m16","0F 45 /r","V","V","","operand16"
+"CMOVNE r32, r/m32","0F 45 /r","V","V","","operand32"
+"CMOVNE r64, r/m64","REX.W + 0F 45 /r","N.E.","V","",""
+"CMOVNG r16, r/m16","0F 4E /r","V","V","","pseudo,operand16"
+"CMOVNG r32, r/m32","0F 4E /r","V","V","","pseudo,operand32"
+"CMOVNG r64, r/m64","REX.W + 0F 4E /r","N.E.","V","","pseudo,"
+"CMOVNGE r16, r/m16","0F 4C /r","V","V","","pseudo,operand16"
+"CMOVNGE r32, r/m32","0F 4C /r","V","V","","pseudo,operand32"
+"CMOVNGE r64, r/m64","REX.W + 0F 4C /r","N.E.","V","","pseudo,"
+"CMOVNL r16, r/m16","0F 4D /r","V","V","","pseudo,operand16"
+"CMOVNL r32, r/m32","0F 4D /r","V","V","","pseudo,operand32"
+"CMOVNL r64, r/m64","REX.W + 0F 4D /r","N.E.","V","","pseudo,"
+"CMOVNLE r16, r/m16","0F 4F /r","V","V","","pseudo,operand16"
+"CMOVNLE r32, r/m32","0F 4F /r","V","V","","pseudo,operand32"
+"CMOVNLE r64, r/m64","REX.W + 0F 4F /r","N.E.","V","","pseudo,"
+"CMOVNO r16, r/m16","0F 41 /r","V","V","","operand16"
+"CMOVNO r32, r/m32","0F 41 /r","V","V","","operand32"
+"CMOVNO r64, r/m64","REX.W + 0F 41 /r","N.E.","V","",""
+"CMOVNP r16, r/m16","0F 4B /r","V","V","","operand16"
+"CMOVNP r32, r/m32","0F 4B /r","V","V","","operand32"
+"CMOVNP r64, r/m64","REX.W + 0F 4B /r","N.E.","V","",""
+"CMOVNS r16, r/m16","0F 49 /r","V","V","","operand16"
+"CMOVNS r32, r/m32","0F 49 /r","V","V","","operand32"
+"CMOVNS r64, r/m64","REX.W + 0F 49 /r","N.E.","V","",""
+"CMOVNZ r16, r/m16","0F 45 /r","V","V","","pseudo,operand16"
+"CMOVNZ r32, r/m32","0F 45 /r","V","V","","pseudo,operand32"
+"CMOVNZ r64, r/m64","REX.W + 0F 45 /r","N.E.","V","","pseudo,"
+"CMOVO r16, r/m16","0F 40 /r","V","V","","operand16"
+"CMOVO r32, r/m32","0F 40 /r","V","V","","operand32"
+"CMOVO r64, r/m64","REX.W + 0F 40 /r","N.E.","V","",""
+"CMOVP r16, r/m16","0F 4A /r","V","V","","operand16"
+"CMOVP r32, r/m32","0F 4A /r","V","V","","operand32"
+"CMOVP r64, r/m64","REX.W + 0F 4A /r","N.E.","V","",""
+"CMOVPE r16, r/m16","0F 4A /r","V","V","","pseudo,operand16"
+"CMOVPE r32, r/m32","0F 4A /r","V","V","","pseudo,operand32"
+"CMOVPE r64, r/m64","REX.W + 0F 4A /r","N.E.","V","","pseudo,"
+"CMOVPO r16, r/m16","0F 4B /r","V","V","","pseudo,operand16"
+"CMOVPO r32, r/m32","0F 4B /r","V","V","","pseudo,operand32"
+"CMOVPO r64, r/m64","REX.W + 0F 4B /r","N.E.","V","","pseudo,"
+"CMOVS r16, r/m16","0F 48 /r","V","V","","operand16"
+"CMOVS r32, r/m32","0F 48 /r","V","V","","operand32"
+"CMOVS r64, r/m64","REX.W + 0F 48 /r","N.E.","V","",""
+"CMOVZ r16, r/m16","0F 44 /r","V","V","","pseudo,operand16"
+"CMOVZ r32, r/m32","0F 44 /r","V","V","","pseudo,operand32"
+"CMOVZ r64, r/m64","REX.W + 0F 44 /r","N.E.","V","","pseudo,"
+"CMP AL, imm8u","3C ib","V","V","",""
+"CMP AX, imm16","3D iw","V","V","","operand16"
+"CMP EAX, imm32","3D id","V","V","","operand32"
+"CMP RAX, imm32","REX.W + 3D id","N.E.","V","",""
+"CMP r/m16, imm16","81 /7 iw","V","V","","operand16"
+"CMP r/m16, imm8","83 /7 ib","V","V","","operand16"
+"CMP r/m16, r16","39 /r","V","V","","operand16"
+"CMP r/m32, imm32","81 /7 id","V","V","","operand32"
+"CMP r/m32, imm8","83 /7 ib","V","V","","operand32"
+"CMP r/m32, r32","39 /r","V","V","","operand32"
+"CMP r/m64, imm32","REX.W + 81 /7 id","N.E.","V","",""
+"CMP r/m64, imm8","REX.W + 83 /7 ib","N.E.","V","",""
+"CMP r/m64, r64","REX.W + 39 /r","N.E.","V","",""
+"CMP r/m8, imm8u","80 /7 ib","V","V","",""
+"CMP r/m8, imm8u","REX + 80 /7 ib","N.E.","V","","pseudo64"
+"CMP r/m8, r8","38 /r","V","V","",""
+"CMP r/m8, r8","REX + 38 /r","N.E.","V","","pseudo64"
+"CMP r16, r/m16","3B /r","V","V","","operand16"
+"CMP r32, r/m32","3B /r","V","V","","operand32"
+"CMP r64, r/m64","REX.W + 3B /r","N.E.","V","",""
+"CMP r8, r/m8","3A /r","V","V","",""
+"CMP r8, r/m8","REX + 3A /r","N.E.","V","","pseudo64"
+"CMPPD xmm1, xmm2/m128, imm8u","66 0F C2 /r ib","V","V","SSE2",""
+"CMPPS xmm1, xmm2/m128, imm8u","0F C2 /r ib","V","V","SSE",""
+"CMPS m16, m16","A7","V","V","","pseudo"
+"CMPS m32, m32","A7","V","V","","pseudo"
+"CMPS m64, m64","REX.W + A7","N.E.","V","","pseudo"
+"CMPS m8, m8","A6","V","V","","pseudo"
+"CMPSB","A6","V","V","",""
+"CMPSD","A7","V","V","","operand32"
+"CMPSD_XMM xmm1, xmm2/m64, imm8u","F2 0F C2 /r ib","V","V","SSE2",""
+"CMPSQ","REX.W + A7","N.E.","V","",""
+"CMPSS xmm1, xmm2/m32, imm8u","F3 0F C2 /r ib","V","V","SSE",""
+"CMPSW","A7","V","V","","operand16"
+"CMPXCHG r/m16, r16","0F B1 /r","V","V","","operand16"
+"CMPXCHG r/m32, r32","0F B1 /r","V","V","","operand32"
+"CMPXCHG r/m64, r64","REX.W + 0F B1 /r","N.E.","V","",""
+"CMPXCHG r/m8, r8","0F B0 /r","V","V","",""
+"CMPXCHG r/m8, r8","REX + 0F B0 /r","N.E.","V","","pseudo64"
+"CMPXCHG16B m128","REX.W + 0F C7 /1","N.E.","V","",""
+"CMPXCHG8B m64","0F C7 /1","V","V","","operand16,operand32"
+"COMISD xmm1, xmm2/m64","66 0F 2F /r","V","V","SSE2",""
+"COMISS xmm1, xmm2/m32","0F 2F /r","V","V","SSE",""
+"CPUID","0F A2","V","V","",""
+"CQO","REX.W + 99","N.E.","V","",""
+"CRC32 r32, r/m16","F2 0F 38 F1 /r","V","V","","operand16"
+"CRC32 r32, r/m32","F2 0F 38 F1 /r","V","V","","operand32"
+"CRC32 r32, r/m8","F2 0F 38 F0 /r","V","V","","operand16,operand32"
+"CRC32 r32, r/m8","F2 REX 0F 38 F0 /r","N.E.","V","","pseudo64"
+"CRC32 r64, r/m64","F2 REX.W 0F 38 F1 /r","N.E.","V","",""
+"CRC32 r64, r/m8","F2 REX.W 0F 38 F0 /r","N.E.","V","",""
+"CVTDQ2PD xmm1, xmm2/m64","F3 0F E6 /r","V","V","SSE2",""
+"CVTDQ2PS xmm1, xmm2/m128","0F 5B /r","V","V","SSE2",""
+"CVTPD2DQ xmm1, xmm2/m128","F2 0F E6 /r","V","V","SSE2",""
+"CVTPD2PI mm, xmm/m128","66 0F 2D /r","V","V","",""
+"CVTPD2PS xmm1, xmm2/m128","66 0F 5A /r","V","V","SSE2",""
+"CVTPI2PD xmm, mm/m64","66 0F 2A /r","V","V","",""
+"CVTPI2PS xmm, mm/m64","0F 2A /r","V","V","",""
+"CVTPS2DQ xmm1, xmm2/m128","66 0F 5B /r","V","V","SSE2",""
+"CVTPS2PD xmm1, xmm2/m64","0F 5A /r","V","V","SSE2",""
+"CVTPS2PI mm, xmm/m64","0F 2D /r","V","V","",""
+"CVTSD2SI r32, xmm/m64","F2 0F 2D /r","V","V","SSE2","operand16,operand32"
+"CVTSD2SI r64, xmm/m64","F2 REX.W 0F 2D /r","N.E.","V","SSE2",""
+"CVTSD2SS xmm1, xmm2/m64","F2 0F 5A /r","V","V","SSE2",""
+"CVTSI2SD xmm, r/m32","F2 0F 2A /r","V","V","SSE2","operand16,operand32"
+"CVTSI2SD xmm, r/m64","F2 REX.W 0F 2A /r","N.E.","V","SSE2",""
+"CVTSI2SS xmm, r/m32","F3 0F 2A /r","V","V","SSE","operand16,operand32"
+"CVTSI2SS xmm, r/m64","F3 REX.W 0F 2A /r","N.E.","V","SSE",""
+"CVTSS2SD xmm1, xmm2/m32","F3 0F 5A /r","V","V","SSE2",""
+"CVTSS2SI r32, xmm/m32","F3 0F 2D /r","V","V","SSE","operand16,operand32"
+"CVTSS2SI r64, xmm/m32","F3 REX.W 0F 2D /r","N.E.","V","SSE",""
+"CVTTPD2DQ xmm1, xmm2/m128","66 0F E6 /r","V","V","SSE2",""
+"CVTTPD2PI mm, xmm/m128","66 0F 2C /r","V","V","",""
+"CVTTPS2DQ xmm1, xmm2/m128","F3 0F 5B /r","V","V","SSE2",""
+"CVTTPS2PI mm, xmm/m64","0F 2C /r","V","V","",""
+"CVTTSD2SI r32, xmm/m64","F2 0F 2C /r","V","V","SSE2","operand16,operand32"
+"CVTTSD2SI r64, xmm/m64","F2 REX.W 0F 2C /r","N.E.","V","SSE2",""
+"CVTTSS2SI r32, xmm/m32","F3 0F 2C /r","V","V","SSE","operand16,operand32"
+"CVTTSS2SI r64, xmm/m32","F3 REX.W 0F 2C /r","N.E.","V","SSE",""
+"CWD","99","V","V","","operand16"
+"CWDE","98","V","V","","operand32"
+"DAA","27","V","I","",""
+"DAS","2F","V","I","",""
+"DEC r/m16","FF /1","V","V","","operand16"
+"DEC r/m32","FF /1","V","V","","operand32"
+"DEC r/m64","REX.W + FF /1","N.E.","V","",""
+"DEC r/m8","FE /1","V","V","",""
+"DEC r/m8","REX + FE /1","N.E.","V","","pseudo64"
+"DEC r16op","48+rw","V","N.E.","","operand16"
+"DEC r32op","48+rd","V","N.E.","","operand32"
+"DIV r/m16","F7 /6","V","V","","operand16"
+"DIV r/m32","F7 /6","V","V","","operand32"
+"DIV r/m64","REX.W + F7 /6","N.E.","V","",""
+"DIV r/m8","F6 /6","V","V","",""
+"DIV r/m8","REX + F6 /6","N.E.","V","","pseudo64"
+"DIVPD xmm1, xmm2/m128","66 0F 5E /r","V","V","SSE2",""
+"DIVPS xmm1, xmm2/m128","0F 5E /r","V","V","SSE",""
+"DIVSD xmm1, xmm2/m64","F2 0F 5E /r","V","V","SSE2",""
+"DIVSS xmm1, xmm2/m32","F3 0F 5E /r","V","V","SSE",""
+"DPPD xmm1, xmm2/m128, imm8u","66 0F 3A 41 /r ib","V","V","SSE4_1",""
+"DPPS xmm1, xmm2/m128, imm8u","66 0F 3A 40 /r ib","V","V","SSE4_1",""
+"EMMS","0F 77","V","V","",""
+"ENTER imm16u, 0","C8 iw 00","V","V","","pseudo"
+"ENTER imm16u, 1","C8 iw 01","V","V","","pseudo"
+"ENTER imm16u, imm8u","C8 iw ib","V","V","",""
+"EXTRACTPS r/m32, xmm1, imm8u","66 0F 3A 17 /r ib","V","V","SSE4_1",""
+"F2XM1","D9 F0","V","V","",""
+"FABS","D9 E1","V","V","",""
+"FADD ST(0), ST(i)","D8 C0+i","V","V","",""
+"FADD ST(i), ST(0)","DC C0+i","V","V","",""
+"FADD m32fp","D8 /0","V","V","",""
+"FADD m64fp","DC /0","V","V","",""
+"FADDP ST(i), ST(0)","DE C0+i","V","V","",""
+"FADDP","DE C1","V","V","","pseudo"
+"FBLD m80dec","DF /4","V","V","",""
+"FBSTP m80bcd","DF /6","V","V","",""
+"FCHS","D9 E0","V","V","",""
+"FCLEX","9B DB E2","V","V","","pseudo"
+"FCMOVB ST(0), ST(i)","DA C0+i","V","V","",""
+"FCMOVBE ST(0), ST(i)","DA D0+i","V","V","",""
+"FCMOVE ST(0), ST(i)","DA C8+i","V","V","",""
+"FCMOVNB ST(0), ST(i)","DB C0+i","V","V","",""
+"FCMOVNBE ST(0), ST(i)","DB D0+i","V","V","",""
+"FCMOVNE ST(0), ST(i)","DB C8+i","V","V","",""
+"FCMOVNU ST(0), ST(i)","DB D8+i","V","V","",""
+"FCMOVU ST(0), ST(i)","DA D8+i","V","V","",""
+"FCOM ST(i)","D8 D0+i","V","V","",""
+"FCOM m32fp","D8 /2","V","V","",""
+"FCOM m64fp","DC /2","V","V","",""
+"FCOM","D8 D1","V","V","","pseudo"
+"FCOMI ST, ST(i)","DB F0+i","V","V","",""
+"FCOMIP ST, ST(i)","DF F0+i","V","V","",""
+"FCOMP ST(i)","D8 D8+i","V","V","",""
+"FCOMP m32fp","D8 /3","V","V","",""
+"FCOMP m64fp","DC /3","V","V","",""
+"FCOMP","D8 D9","V","V","","pseudo"
+"FCOMPP","DE D9","V","V","",""
+"FCOS","D9 FF","V","V","",""
+"FDECSTP","D9 F6","V","V","",""
+"FDIV ST(0), ST(i)","D8 F0+i","V","V","",""
+"FDIV ST(i), ST(0)","DC F8+i","V","V","",""
+"FDIV m32fp","D8 /6","V","V","",""
+"FDIV m64fp","DC /6","V","V","",""
+"FDIVP ST(i), ST(0)","DE F8+i","V","V","",""
+"FDIVP","DE F9","V","V","","pseudo"
+"FDIVR ST(0), ST(i)","D8 F8+i","V","V","",""
+"FDIVR ST(i), ST(0)","DC F0+i","V","V","",""
+"FDIVR m32fp","D8 /7","V","V","",""
+"FDIVR m64fp","DC /7","V","V","",""
+"FDIVRP ST(i), ST(0)","DE F0+i","V","V","",""
+"FDIVRP","DE F1","V","V","","pseudo"
+"FFREE ST(i)","DD C0+i","V","V","",""
+"FFREEP ST(i)","DF C0+i","V","V","",""
+"FIADD m16int","DE /0","V","V","",""
+"FIADD m32int","DA /0","V","V","",""
+"FICOM m16int","DE /2","V","V","",""
+"FICOM m32int","DA /2","V","V","",""
+"FICOMP m16int","DE /3","V","V","",""
+"FICOMP m32int","DA /3","V","V","",""
+"FIDIV m16int","DE /6","V","V","",""
+"FIDIV m32int","DA /6","V","V","",""
+"FIDIVR m16int","DE /7","V","V","",""
+"FIDIVR m32int","DA /7","V","V","",""
+"FILD m16int","DF /0","V","V","",""
+"FILD m32int","DB /0","V","V","",""
+"FILD m64int","DF /5","V","V","",""
+"FIMUL m16int","DE /1","V","V","",""
+"FIMUL m32int","DA /1","V","V","",""
+"FINCSTP","D9 F7","V","V","",""
+"FINIT","9B DB E3","V","V","","pseudo"
+"FIST m16int","DF /2","V","V","",""
+"FIST m32int","DB /2","V","V","",""
+"FISTP m16int","DF /3","V","V","",""
+"FISTP m32int","DB /3","V","V","",""
+"FISTP m64int","DF /7","V","V","",""
+"FISTTP m16int","DF /1","V","V","",""
+"FISTTP m32int","DB /1","V","V","",""
+"FISTTP m64int","DD /1","V","V","",""
+"FISUB m16int","DE /4","V","V","",""
+"FISUB m32int","DA /4","V","V","",""
+"FISUBR m16int","DE /5","V","V","",""
+"FISUBR m32int","DA /5","V","V","",""
+"FLD ST(i)","D9 C0+i","V","V","",""
+"FLD m32fp","D9 /0","V","V","",""
+"FLD m64fp","DD /0","V","V","",""
+"FLD m80fp","DB /5","V","V","",""
+"FLD1","D9 E8","V","V","",""
+"FLDCW m2byte","D9 /5","V","V","",""
+"FLDENV m14/28byte","D9 /4","V","V","",""
+"FLDL2E","D9 EA","V","V","",""
+"FLDL2T","D9 E9","V","V","",""
+"FLDLG2","D9 EC","V","V","",""
+"FLDPI","D9 EB","V","V","",""
+"FMUL ST(0), ST(i)","D8 C8+i","V","V","",""
+"FMUL ST(i), ST(0)","DC C8+i","V","V","",""
+"FMUL m32fp","D8 /1","V","V","",""
+"FMUL m64fp","DC /1","V","V","",""
+"FMULP ST(i), ST(0)","DE C8+i","V","V","",""
+"FMULP","DE C9","V","V","","pseudo"
+"FNCLEX","DB E2","V","V","",""
+"FNINIT","DB E3","V","V","",""
+"FNOP","D9 D0","V","V","",""
+"FNSAVE m94/108byte","DD /6","V","V","",""
+"FNSTCW m2byte","D9 /7","V","V","",""
+"FNSTENV m14/28byte","D9 /6","V","V","",""
+"FNSTSW AX","DF E0","V","V","",""
+"FNSTSW m2byte","DD /7","V","V","",""
+"FPATAN","D9 F3","V","V","",""
+"FPREM","D9 F8","V","V","",""
+"FPREM1","D9 F5","V","V","",""
+"FPTAN","D9 F2","V","V","",""
+"FRNDINT","D9 FC","V","V","",""
+"FRSTOR m94/108byte","DD /4","V","V","",""
+"FSAVE m94/108byte","9B DD /6","V","V","","pseudo"
+"FSCALE","D9 FD","V","V","",""
+"FSIN","D9 FE","V","V","",""
+"FSINCOS","D9 FB","V","V","",""
+"FSQRT","D9 FA","V","V","",""
+"FST ST(i)","DD D0+i","V","V","",""
+"FST m32fp","D9 /2","V","V","",""
+"FST m64fp","DD /2","V","V","",""
+"FSTCW m2byte","9B D9 /7","V","V","","pseudo"
+"FSTENV m14/28byte","9B D9 /6","V","V","","pseudo"
+"FSTP ST(i)","DD D8+i","V","V","",""
+"FSTP m32fp","D9 /3","V","V","",""
+"FSTP m64fp","DD /3","V","V","",""
+"FSTP m80fp","DB /7","V","V","",""
+"FSTSW AX","9B DF E0","V","V","","pseudo"
+"FSTSW m2byte","9B DD /7","V","V","","pseudo"
+"FSUB ST(0), ST(i)","D8 E0+i","V","V","",""
+"FSUB ST(i), ST(0)","DC E8+i","V","V","",""
+"FSUB m32fp","D8 /4","V","V","",""
+"FSUB m64fp","DC /4","V","V","",""
+"FSUBP ST(i), ST(0)","DE E8+i","V","V","",""
+"FSUBP","DE E9","V","V","","pseudo"
+"FSUBR ST(0), ST(i)","D8 E8+i","V","V","",""
+"FSUBR ST(i), ST(0)","DC E0+i","V","V","",""
+"FSUBR m32fp","D8 /5","V","V","",""
+"FSUBR m64fp","DC /5","V","V","",""
+"FSUBRP ST(i), ST(0)","DE E0+i","V","V","",""
+"FSUBRP","DE E1","V","V","","pseudo"
+"FTST","D9 E4","V","V","",""
+"FUCOM ST(i)","DD E0+i","V","V","",""
+"FUCOM","DD E1","V","V","","pseudo"
+"FUCOMI ST, ST(i)","DB E8+i","V","V","",""
+"FUCOMIP ST, ST(i)","DF E8+i","V","V","",""
+"FUCOMP ST(i)","DD E8+i","V","V","",""
+"FUCOMP","DD E9","V","V","","pseudo"
+"FUCOMPP","DA E9","V","V","",""
+"FWAIT","9B","V","V","",""
+"FXAM","D9 E5","V","V","",""
+"FXCH ST(i)","D9 C8+i","V","V","",""
+"FXCH","D9 C9","V","V","","pseudo"
+"FXRSTOR m512byte","0F AE /1","V","V","","operand16,operand32"
+"FXRSTOR64 m512byte","REX.W + 0F AE /1","N.E.","V","",""
+"FXSAVE m512byte","0F AE /0","V","V","","operand16,operand32"
+"FXSAVE64 m512byte","REX.W + 0F AE /0","N.E.","V","",""
+"FXTRACT","D9 F4","V","V","",""
+"FYL2X","D9 F1","V","V","",""
+"FYL2XP1","D9 F9","V","V","",""
+"HADDPD xmm1, xmm2/m128","66 0F 7C /r","V","V","SSE3",""
+"HADDPS xmm1, xmm2/m128","F2 0F 7C /r","V","V","SSE3",""
+"HLT","F4","V","V","",""
+"HSUBPD xmm1, xmm2/m128","66 0F 7D /r","V","V","SSE3",""
+"HSUBPS xmm1, xmm2/m128","F2 0F 7D /r","V","V","SSE3",""
+"ICEBP","F1","V","V","",""
+"IDIV r/m16","F7 /7","V","V","","operand16"
+"IDIV r/m32","F7 /7","V","V","","operand32"
+"IDIV r/m64","REX.W + F7 /7","N.E.","V","",""
+"IDIV r/m8","F6 /7","V","V","",""
+"IDIV r/m8","REX + F6 /7","N.E.","V","","pseudo64"
+"IMUL r/m16","F7 /5","V","V","","operand16"
+"IMUL r/m32","F7 /5","V","V","","operand32"
+"IMUL r/m64","REX.W + F7 /5","N.E.","V","",""
+"IMUL r/m8","F6 /5","V","V","",""
+"IMUL r16, r/m16","0F AF /r","V","V","","operand16"
+"IMUL r16, r/m16, imm16","69 /r iw","V","V","","operand16"
+"IMUL r16, r/m16, imm8","6B /r ib","V","V","","operand16"
+"IMUL r32, r/m32","0F AF /r","V","V","","operand32"
+"IMUL r32, r/m32, imm32","69 /r id","V","V","","operand32"
+"IMUL r32, r/m32, imm8","6B /r ib","V","V","","operand32"
+"IMUL r64, r/m64","REX.W + 0F AF /r","N.E.","V","",""
+"IMUL r64, r/m64, imm32","REX.W + 69 /r id","N.E.","V","",""
+"IMUL r64, r/m64, imm8","REX.W + 6B /r ib","N.E.","V","",""
+"IN AL, DX","EC","V","V","",""
+"IN AL, imm8u","E4 ib","V","V","",""
+"IN AX, DX","ED","V","V","","operand16"
+"IN AX, imm8u","E5 ib","V","V","","operand16"
+"IN EAX, DX","ED","V","V","","operand32,operand64"
+"IN EAX, imm8u","E5 ib","V","V","","operand32,operand64"
+"INC r/m16","FF /0","V","V","","operand16"
+"INC r/m32","FF /0","V","V","","operand32"
+"INC r/m64","REX.W + FF /0","N.E.","V","",""
+"INC r/m8","FE /0","V","V","",""
+"INC r/m8","REX + FE /0","N.E.","V","","pseudo64"
+"INC r16op","40+rw","V","N.E.","","operand16"
+"INC r32op","40+rd","V","N.E.","","operand32"
+"INS m16, DX","6D","V","V","","pseudo"
+"INS m32, DX","6D","V","V","","pseudo"
+"INS m8, DX","6C","V","V","","pseudo"
+"INSB","6C","V","V","",""
+"INSD","6D","V","V","","operand32,operand64"
+"INSERTPS xmm1, xmm2/m32, imm8u","66 0F 3A 21 /r ib","V","V","SSE4_1",""
+"INSW","6D","V","V","","operand16"
+"INT 3","CC","V","V","",""
+"INT imm8u","CD ib","V","V","",""
+"INTO","CE","V","I","",""
+"INVD","0F 08","V","V","",""
+"INVLPG m","0F 01 /7","V","V","",""
+"INVPCID r32, m128","66 0F 38 82 /r","V","N.E.","INVPCID",""
+"INVPCID r64, m128","66 0F 38 82 /r","N.E.","V","INVPCID",""
+"IRET","CF","V","V","","operand16"
+"IRETD","CF","V","V","","operand32"
+"IRETQ","REX.W + CF","N.E.","V","",""
+"JA rel16","0F 87 cw","V","N.S.","","operand16"
+"JA rel32","0F 87 cd","V","V","","operand32"
+"JA rel8","77 cb","V","V","",""
+"JAE rel16","0F 83 cw","V","N.S.","","operand16"
+"JAE rel32","0F 83 cd","V","V","","operand32"
+"JAE rel8","73 cb","V","V","",""
+"JB rel16","0F 82 cw","V","N.S.","","operand16"
+"JB rel32","0F 82 cd","V","V","","operand32"
+"JB rel8","72 cb","V","V","",""
+"JBE rel16","0F 86 cw","V","N.S.","","operand16"
+"JBE rel32","0F 86 cd","V","V","","operand32"
+"JBE rel8","76 cb","V","V","",""
+"JC rel16","0F 82 cw","V","N.S.","","pseudo"
+"JC rel32","0F 82 cd","V","V","","pseudo"
+"JC rel8","72 cb","V","V","","pseudo"
+"JCXZ rel8","E3 cb","V","N.E.","","address16"
+"JE rel16","0F 84 cw","V","N.S.","","operand16"
+"JE rel32","0F 84 cd","V","V","","operand32"
+"JE rel8","74 cb","V","V","",""
+"JECXZ rel8","E3 cb","V","V","","address32"
+"JG rel16","0F 8F cw","V","N.S.","","operand16"
+"JG rel32","0F 8F cd","V","V","","operand32"
+"JG rel8","7F cb","V","V","",""
+"JGE rel16","0F 8D cw","V","N.S.","","operand16"
+"JGE rel32","0F 8D cd","V","V","","operand32"
+"JGE rel8","7D cb","V","V","",""
+"JL rel16","0F 8C cw","V","N.S.","","operand16"
+"JL rel32","0F 8C cd","V","V","","operand32"
+"JL rel8","7C cb","V","V","",""
+"JLE rel16","0F 8E cw","V","N.S.","","operand16"
+"JLE rel32","0F 8E cd","V","V","","operand32"
+"JLE rel8","7E cb","V","V","",""
+"JMP r/m16","FF /4","V","N.S.","","operand16"
+"JMP r/m32","FF /4","V","N.S.","","operand32"
+"JMP r/m64","FF /4","N.E.","V","",""
+"JMP rel16","E9 cw","V","N.S.","","operand16"
+"JMP rel32","E9 cd","V","V","","operand32"
+"JMP rel32","E9 cd","N.S.","V","","operand16,operand64"
+"JMP rel8","EB cb","V","V","",""
+"JNA rel16","0F 86 cw","V","N.S.","","pseudo"
+"JNA rel32","0F 86 cd","V","V","","pseudo"
+"JNA rel8","76 cb","V","V","","pseudo"
+"JNAE rel16","0F 82 cw","V","N.S.","","pseudo"
+"JNAE rel32","0F 82 cd","V","V","","pseudo"
+"JNAE rel8","72 cb","V","V","","pseudo"
+"JNB rel16","0F 83 cw","V","N.S.","","pseudo"
+"JNB rel32","0F 83 cd","V","V","","pseudo"
+"JNB rel8","73 cb","V","V","","pseudo"
+"JNBE rel16","0F 87 cw","V","N.S.","","pseudo"
+"JNBE rel32","0F 87 cd","V","V","","pseudo"
+"JNBE rel8","77 cb","V","V","","pseudo"
+"JNC rel16","0F 83 cw","V","N.S.","","pseudo"
+"JNC rel32","0F 83 cd","V","V","","pseudo"
+"JNC rel8","73 cb","V","V","","pseudo"
+"JNE rel16","0F 85 cw","V","N.S.","","operand16"
+"JNE rel32","0F 85 cd","V","V","","operand32"
+"JNE rel8","75 cb","V","V","",""
+"JNG rel16","0F 8E cw","V","N.S.","","pseudo"
+"JNG rel32","0F 8E cd","V","V","","pseudo"
+"JNG rel8","7E cb","V","V","","pseudo"
+"JNGE rel16","0F 8C cw","V","N.S.","","pseudo"
+"JNGE rel32","0F 8C cd","V","V","","pseudo"
+"JNGE rel8","7C cb","V","V","","pseudo"
+"JNL rel16","0F 8D cw","V","N.S.","","pseudo"
+"JNL rel32","0F 8D cd","V","V","","pseudo"
+"JNL rel8","7D cb","V","V","","pseudo"
+"JNLE rel16","0F 8F cw","V","N.S.","","pseudo"
+"JNLE rel32","0F 8F cd","V","V","","pseudo"
+"JNLE rel8","7F cb","V","V","","pseudo"
+"JNO rel16","0F 81 cw","V","N.S.","","operand16"
+"JNO rel32","0F 81 cd","V","V","","operand32"
+"JNO rel8","71 cb","V","V","",""
+"JNP rel16","0F 8B cw","V","N.S.","","operand16"
+"JNP rel32","0F 8B cd","V","V","","operand32"
+"JNP rel8","7B cb","V","V","",""
+"JNS rel16","0F 89 cw","V","N.S.","","operand16"
+"JNS rel32","0F 89 cd","V","V","","operand32"
+"JNS rel8","79 cb","V","V","",""
+"JNZ rel16","0F 85 cw","V","N.S.","","pseudo"
+"JNZ rel32","0F 85 cd","V","V","","pseudo"
+"JNZ rel8","75 cb","V","V","","pseudo"
+"JO rel16","0F 80 cw","V","N.S.","","operand16"
+"JO rel32","0F 80 cd","V","V","","operand32"
+"JO rel8","70 cb","V","V","",""
+"JP rel16","0F 8A cw","V","N.S.","","operand16"
+"JP rel32","0F 8A cd","V","V","","operand32"
+"JP rel8","7A cb","V","V","",""
+"JPE rel16","0F 8A cw","V","N.S.","","pseudo"
+"JPE rel32","0F 8A cd","V","V","","pseudo"
+"JPE rel8","7A cb","V","V","","pseudo"
+"JPO rel16","0F 8B cw","V","N.S.","","pseudo"
+"JPO rel32","0F 8B cd","V","V","","pseudo"
+"JPO rel8","7B cb","V","V","","pseudo"
+"JRCXZ rel8","E3 cb","N.E.","V","","address64"
+"JS rel16","0F 88 cw","V","N.S.","","operand16"
+"JS rel32","0F 88 cd","V","V","","operand32"
+"JS rel8","78 cb","V","V","",""
+"JZ rel16","0F 84 cw","V","N.S.","","pseudo"
+"JZ rel32","0F 84 cd","V","V","","pseudo"
+"JZ rel8","74 cb","V","V","","pseudo"
+"JA rel32","0F 87 cd","N.S.","V","","operand16,operand64"
+"JAE rel32","0F 83 cd","N.S.","V","","operand16,operand64"
+"JB rel32","0F 82 cd","N.S.","V","","operand16,operand64"
+"JBE rel32","0F 86 cd","N.S.","V","","operand16,operand64"
+"JE rel32","0F 84 cd","N.S.","V","","operand16,operand64"
+"JG rel32","0F 8F cd","N.S.","V","","operand16,operand64"
+"JGE rel32","0F 8D cd","N.S.","V","","operand16,operand64"
+"JL rel32","0F 8C cd","N.S.","V","","operand16,operand64"
+"JLE rel32","0F 8E cd","N.S.","V","","operand16,operand64"
+"JNE rel32","0F 85 cd","N.S.","V","","operand16,operand64"
+"JNO rel32","0F 81 cd","N.S.","V","","operand16,operand64"
+"JNP rel32","0F 8B cd","N.S.","V","","operand16,operand64"
+"JNS rel32","0F 89 cd","N.S.","V","","operand16,operand64"
+"JO rel32","0F 80 cd","N.S.","V","","operand16,operand64"
+"JP rel32","0F 8A cd","N.S.","V","","operand16,operand64"
+"JS rel32","0F 88 cd","N.S.","V","","operand16,operand64"
+"LAHF","9F","V","V","",""
+"LAR r16, r/m16","0F 02 /r","V","V","","operand16"
+"LAR r32, r32/m16","0F 02 /r","V","V","","operand32"
+"LAR r64, r64/m16","0F 02 /r","V","V","","operand64"
+"LCALL m16:16","FF /3","V","V","","operand16"
+"LCALL m16:32","FF /3","V","V","","operand32"
+"LCALL m16:64","REX.W + FF /3","N.E.","V","",""
+"LCALL ptr16:16","9A cd","V","I","","operand16"
+"LCALL ptr16:32","9A cp","V","I","","operand32"
+"LDDQU xmm1, m128","F2 0F F0 /r","V","V","SSE3",""
+"LDMXCSR m32","0F AE /2","V","V","SSE",""
+"LDS r16, m16:16","C5 /r","V","I","","operand16"
+"LDS r32, m16:32","C5 /r","V","I","","operand32"
+"LEA r16, m","8D /r","V","V","","operand16"
+"LEA r32, m","8D /r","V","V","","operand32"
+"LEA r64, m","REX.W + 8D /r","N.E.","V","",""
+"LEAVE","C9","N.E.","V","","operand32,operand64"
+"LEAVE","C9","V","N.E.","","operand32"
+"LEAVE","C9","V","V","","operand16"
+"LES r16, m16:16","C4 /r","V","I","","operand16"
+"LES r32, m16:32","C4 /r","V","I","","operand32"
+"LFENCE","0F AE E8","V","V","",""
+"LFS r16, m16:16","0F B4 /r","V","V","","operand16"
+"LFS r32, m16:32","0F B4 /r","V","V","","operand32"
+"LFS r64, m16:64","REX.W + 0F B4 /r","N.E.","V","",""
+"LGDT m16&32","0F 01 /2","V","N.E.","",""
+"LGDT m16&64","0F 01 /2","N.E.","V","",""
+"LGS r16, m16:16","0F B5 /r","V","V","","operand16"
+"LGS r32, m16:32","0F B5 /r","V","V","","operand32"
+"LGS r64, m16:64","REX.W + 0F B5 /r","N.E.","V","",""
+"LIDT m16&32","0F 01 /3","V","N.E.","",""
+"LIDT m16&64","0F 01 /3","N.E.","V","",""
+"LJMP m16:16","FF /5","V","V","","operand16"
+"LJMP m16:32","FF /5","V","V","","operand32"
+"LJMP m16:64","REX.W + FF /5","N.E.","V","",""
+"LJMP ptr16:16","EA cd","V","I","","operand16"
+"LJMP ptr16:32","EA cp","V","I","","operand32"
+"LLDT r/m16","0F 00 /2","V","V","",""
+"LMSW r/m16","0F 01 /6","V","V","",""
+"LOCK","F0","V","V","","pseudo"
+"LODS m16","AD","V","V","","pseudo"
+"LODS m32","AD","V","V","","pseudo"
+"LODS m64","REX.W + AD","N.E.","V","","pseudo"
+"LODS m8","AC","V","V","","pseudo"
+"LODSB","AC","V","V","",""
+"LODSD","AD","V","V","","operand32"
+"LODSQ","REX.W + AD","N.E.","V","",""
+"LODSW","AD","V","V","","operand16"
+"LOOP rel8","E2 cb","V","V","",""
+"LOOPE rel8","E1 cb","V","V","",""
+"LOOPNE rel8","E0 cb","V","V","",""
+"LRET imm16u","CA iw","V","V","",""
+"LRET","CB","V","V","",""
+"LSL r16, r/m16","0F 03 /r","V","V","","operand16"
+"LSL r32, r32/m16","0F 03 /r","V","V","","operand32"
+"LSL r64, r32/m16","REX.W + 0F 03 /r","V","V","",""
+"LSS r16, m16:16","0F B2 /r","V","V","","operand16"
+"LSS r32, m16:32","0F B2 /r","V","V","","operand32"
+"LSS r64, m16:64","REX.W + 0F B2 /r","N.E.","V","",""
+"LTR r/m16","0F 00 /3","V","V","",""
+"LZCNT r16, r/m16","F3 0F BD /r","V","V","LZCNT","operand16"
+"LZCNT r32, r/m32","F3 0F BD /r","V","V","LZCNT","operand32"
+"LZCNT r64, r/m64","REX.W + F3 0F BD /r","N.E.","V","LZCNT",""
+"MASKMOVDQU xmm1, xmm2","66 0F F7 /r","V","V","SSE2",""
+"MASKMOVQ mm1, mm2","0F F7 /r","V","V","",""
+"MAXPD xmm1, xmm2/m128","66 0F 5F /r","V","V","SSE2",""
+"MAXPS xmm1, xmm2/m128","0F 5F /r","V","V","SSE",""
+"MAXSD xmm1, xmm2/m64","F2 0F 5F /r","V","V","SSE2",""
+"MAXSS xmm1, xmm2/m32","F3 0F 5F /r","V","V","SSE",""
+"MFENCE","0F AE F0","V","V","",""
+"MINPD xmm1, xmm2/m128","66 0F 5D /r","V","V","SSE2",""
+"MINPS xmm1, xmm2/m128","0F 5D /r","V","V","SSE",""
+"MINSD xmm1, xmm2/m64","F2 0F 5D /r","V","V","SSE2",""
+"MINSS xmm1, xmm2/m32","F3 0F 5D /r","V","V","SSE",""
+"MONITOR","0F 01 C8","V","V","",""
+"MOV AL, moffs8","A0 cm","V","V","",""
+"MOV AL, moffs8","REX.W + A0 cm","N.E.","V","",""
+"MOV AX, moffs16","A1 cm","V","V","","operand16"
+"MOV CR0-CR7, rmf32","0F 22 /r","V","N.E.","",""
+"MOV CR0-CR7, rmf64","0F 22 /r","N.E.","V","",""
+"MOV DR0-DR7, rmf32","0F 23 /r","V","N.E.","",""
+"MOV DR0-DR7, rmf64","0F 23 /r","N.E.","V","",""
+"MOV EAX, moffs32","A1 cm","V","V","","operand32"
+"MOV RAX, moffs64","REX.W + A1 cm","V","V","",""
+"MOV Sreg, r/m16","8E /r","V","V","","operand16"
+"MOV Sreg, r32/m16","8E /r","V","V","","operand32"
+"MOV Sreg, r64/m16","REX.W + 8E /r","N.E.","V","","operand64"
+"MOV TR0-TR7, rmf32","0F 26 /r","V","N.E.","",""
+"MOV TR0-TR7, rmf64","0F 26 /r","N.E.","V","",""
+"MOV moffs16, AX","A3 cm","V","V","","operand16"
+"MOV moffs32, EAX","A3 cm","V","V","","operand32"
+"MOV moffs64, RAX","REX.W + A3 cm","V","V","",""
+"MOV moffs8, AL","A2 cm","V","V","",""
+"MOV moffs8, AL","REX.W + A2 cm","N.E.","V","",""
+"MOV r/m16,  imm16","C7 /0 iw","V","V","","operand16"
+"MOV r/m16, Sreg","8C /r","V","V","","operand16"
+"MOV r64/m16, Sreg","REX.W + 8C /r","N.E.","V","","operand64"
+"MOV r32/m16, Sreg","8C /r","V","V","","operand32"
+"MOV r/m16, r16","89 /r","V","V","","operand16"
+"MOV r/m32,  imm32","C7 /0 id","V","V","","operand32"
+"MOV r/m32, r32","89 /r","V","V","","operand32"
+"MOV r/m64,  imm32","REX.W + C7 /0 id","N.E.","V","",""
+"MOV r/m64, r64","REX.W + 89 /r","V","V","","operand64"
+"MOV r/m8,  imm8u","C6 /0 ib","V","V","",""
+"MOV r/m8,  imm8u","REX + C6 /0 ib","N.E.","V","","pseudo64"
+"MOV r/m8, r8","88 /r","V","V","",""
+"MOV r/m8, r8","REX + 88 /r","N.E.","V","","pseudo64"
+"MOV r16op, imm16","B8+rw iw","V","V","","operand16"
+"MOV r16, r/m16","8B /r","V","V","","operand16"
+"MOV r32op, imm32","B8+rd id","V","V","","operand32"
+"MOV r32, r/m32","8B /r","V","V","","operand32"
+"MOV r64op, imm64","REX.W + B8+rd io","N.E.","V","",""
+"MOV r64, r/m64","REX.W + 8B /r","V","V","","operand64"
+"MOV r8op, imm8u","B0+rb ib","V","V","",""
+"MOV r8op, imm8u","REX + B0+rb ib","N.E.","V","","pseudo64"
+"MOV r8, r/m8","8A /r","V","V","",""
+"MOV r8, r/m8","REX + 8A /r","N.E.","V","","pseudo64"
+"MOV rmf32, CR0-CR7","0F 20 /r","V","N.E.","",""
+"MOV rmf32, DR0-DR7","0F 21 /r","V","N.E.","",""
+"MOV rmf32, TR0-TR7","0F 24 /r","V","N.E.","",""
+"MOV rmf64, CR0-CR7","0F 20 /r","N.E.","V","",""
+"MOV rmf64, DR0-DR7","0F 21 /r","N.E.","V","",""
+"MOV rmf64, TR0-TR7","0F 24 /r","N.E.","V","",""
+"MOVAPD xmm1, xmm2/m128","66 0F 28 /r","V","V","SSE2",""
+"MOVAPD xmm2/m128, xmm1","66 0F 29 /r","V","V","SSE2",""
+"MOVAPS xmm1, xmm2/m128","0F 28 /r","V","V","SSE",""
+"MOVAPS xmm2/m128, xmm1","0F 29 /r","V","V","SSE",""
+"MOVBE m16, r16","0F 38 F1 /r","V","V","","operand16"
+"MOVBE m32, r32","0F 38 F1 /r","V","V","","operand32"
+"MOVBE m64, r64","REX.W + 0F 38 F1 /r","N.E.","V","",""
+"MOVBE r16, m16","0F 38 F0 /r","V","V","","operand16"
+"MOVBE r32, m32","0F 38 F0 /r","V","V","","operand32"
+"MOVBE r64, m64","REX.W + 0F 38 F0 /r","N.E.","V","",""
+"MOVD mm, r/m32","0F 6E /r","V","V","MMX","operand16,operand32"
+"MOVD r/m32, mm","0F 7E /r","V","V","MMX","operand16,operand32"
+"MOVD r/m32, xmm","66 0F 7E /r","V","V","SSE2","operand16,operand32"
+"MOVD xmm, r/m32","66 0F 6E /r","V","V","SSE2","operand16,operand32"
+"MOVDDUP xmm1, xmm2/m64","F2 0F 12 /r","V","V","SSE3",""
+"MOVDQ2Q mm, xmm2","F2 0F D6 /r","V","V","",""
+"MOVDQA xmm1, xmm2/m128","66 0F 6F /r","V","V","SSE2",""
+"MOVDQA xmm2/m128, xmm1","66 0F 7F /r","V","V","SSE2",""
+"MOVDQU xmm1, xmm2/m128","F3 0F 6F /r","V","V","SSE2",""
+"MOVDQU xmm2/m128, xmm1","F3 0F 7F /r","V","V","SSE2",""
+"MOVHLPS xmm1, xmm2","0F 12 /r","V","V","SSE","modrm_regonly"
+"MOVHPD xmm, xmm2/m64","66 0F 16 /r","V","V","SSE2",""
+"MOVHPD xmm2/m64, xmm","66 0F 17 /r","V","V","SSE2",""
+"MOVHPS m64, xmm","0F 17 /r","V","V","SSE",""
+"MOVHPS xmm, m64","0F 16 /r","V","V","SSE","modrm_memonly"
+"MOVLHPS xmm1, xmm2","0F 16 /r","V","V","SSE","modrm_regonly"
+"MOVLPD xmm, xmm2/m64","66 0F 12 /r","V","V","SSE2",""
+"MOVLPD xmm2/m64, xmm","66 0F 13 /r","V","V","SSE2",""
+"MOVLPS m64, xmm","0F 13 /r","V","V","SSE",""
+"MOVLPS xmm, m64","0F 12 /r","V","V","SSE","modrm_memonly"
+"MOVMSKPD r32, xmm2","66 0F 50 /r","V","V","SSE2",""
+"MOVMSKPS r32, xmm2","0F 50 /r","V","V","SSE",""
+"MOVNTDQ m128, xmm","66 0F E7 /r","V","V","SSE2",""
+"MOVNTDQA xmm1, m128","66 0F 38 2A /r","V","V","SSE4_1",""
+"MOVNTI m32, r32","0F C3 /r","V","V","","operand16,operand32"
+"MOVNTI m64, r64","REX.W + 0F C3 /r","N.E.","V","",""
+"MOVNTPD m128, xmm","66 0F 2B /r","V","V","SSE2",""
+"MOVNTPS m128, xmm","0F 2B /r","V","V","SSE",""
+"MOVNTQ m64, mm","0F E7 /r","V","V","",""
+"MOVNTSD m64, xmm","F2 0F 2B /r","V","V","SSE",""
+"MOVNTSS m32, xmm","F3 0F 2B /r","V","V","SSE",""
+"MOVQ mm, mm/m64","0F 6F /r","V","V","MMX",""
+"MOVQ mm, r/m64","REX.W + 0F 6E /r","N.E.","V","MMX",""
+"MOVQ mm/m64, mm","0F 7F /r","V","V","MMX",""
+"MOVQ r/m64, mm","REX.W + 0F 7E /r","N.E.","V","MMX",""
+"MOVQ r/m64, xmm","66 REX.W 0F 7E /r","N.E.","V","SSE2",""
+"MOVQ xmm, r/m64","66 REX.W 0F 6E /r","N.E.","V","SSE2",""
+"MOVQ xmm1, xmm2/m64","F3 0F 7E /r","V","V","SSE2",""
+"MOVQ xmm2/m64, xmm1","66 0F D6 /r","V","V","SSE2",""
+"MOVQ2DQ xmm1, mm2","F3 0F D6 /r","V","V","",""
+"MOVS m16, m16","A5","V","V","","pseudo"
+"MOVS m32, m32","A5","V","V","","pseudo"
+"MOVS m64, m64","REX.W + A5","N.E.","V","","pseudo"
+"MOVS m8, m8","A4","V","V","","pseudo"
+"MOVSB","A4","V","V","",""
+"MOVSD","A5","V","V","","operand32"
+"MOVSD_XMM xmm1, xmm2/m64","F2 0F 10 /r","V","V","SSE2",""
+"MOVSD_XMM xmm2/m64, xmm1","F2 0F 11 /r","V","V","SSE2",""
+"MOVSHDUP xmm1, xmm2/m128","F3 0F 16 /r","V","V","SSE3",""
+"MOVSLDUP xmm1, xmm2/m128","F3 0F 12 /r","V","V","SSE3",""
+"MOVSQ","REX.W + A5","N.E.","V","",""
+"MOVSS xmm1, xmm2/m32","F3 0F 10 /r","V","V","SSE",""
+"MOVSS xmm2/m32, xmm","F3 0F 11 /r","V","V","SSE",""
+"MOVSW","A5","V","V","","operand16"
+"MOVSX r16, r/m16","0F BF /r","V","V","","operand16"
+"MOVSX r16, r/m8","0F BE /r","V","V","","operand16"
+"MOVSX r32, r/m16","0F BF /r","V","V","","operand32"
+"MOVSX r32, r/m8","0F BE /r","V","V","","operand32"
+"MOVSX r64, r/m16","REX.W + 0F BF /r","N.E.","V","",""
+"MOVSX r64, r/m8","REX.W + 0F BE /r","N.E.","V","",""
+"MOVSXD r16, r/m32","63 /r","N.E.","V","","operand16"
+"MOVSXD r32, r/m32","63 /r","N.E.","V","","operand32"
+"MOVSXD r64, r/m32","REX.W + 63 /r","N.E.","V","",""
+"MOVUPD xmm1, xmm2/m128","66 0F 10 /r","V","V","SSE2",""
+"MOVUPD xmm2/m128, xmm","66 0F 11 /r","V","V","SSE2",""
+"MOVUPS xmm1, xmm2/m128","0F 10 /r","V","V","SSE",""
+"MOVUPS xmm2/m128, xmm1","0F 11 /r","V","V","SSE",""
+"MOVZX r16, r/m16","0F B7 /r","V","V","","operand16"
+"MOVZX r16, r/m8","0F B6 /r","V","V","","operand16"
+"MOVZX r32, r/m16","0F B7 /r","V","V","","operand32"
+"MOVZX r32, r/m8","0F B6 /r","V","V","","operand32"
+"MOVZX r64, r/m16","REX.W + 0F B7 /r","N.E.","V","",""
+"MOVZX r64, r/m8","REX.W + 0F B6 /r","N.E.","V","",""
+"MPSADBW xmm1, xmm2/m128, imm8u","66 0F 3A 42 /r ib","V","V","SSE4_1",""
+"MUL r/m16","F7 /4","V","V","","operand16"
+"MUL r/m32","F7 /4","V","V","","operand32"
+"MUL r/m64","REX.W + F7 /4","N.E.","V","",""
+"MUL r/m8","F6 /4","V","V","",""
+"MUL r/m8","REX + F6 /4","N.E.","V","","pseudo64"
+"MULPD xmm1, xmm2/m128","66 0F 59 /r","V","V","SSE2",""
+"MULPS xmm1, xmm2/m128","0F 59 /r","V","V","SSE",""
+"MULSD xmm1, xmm2/m64","F2 0F 59 /r","V","V","SSE2",""
+"MULSS xmm1, xmm2/m32","F3 0F 59 /r","V","V","SSE",""
+"MULX r32a, r32b, r/m32","VEX.NDD.LZ.F2.0F38.W0 F6 /r","V","V","BMI2",""
+"MULX r64a, r64b, r/m64","VEX.NDD.LZ.F2.0F38.W1 F6 /r","N.E.","V","BMI2",""
+"MWAIT","0F 01 C9","V","V","",""
+"NEG r/m16","F7 /3","V","V","","operand16"
+"NEG r/m32","F7 /3","V","V","","operand32"
+"NEG r/m64","REX.W + F7 /3","N.E.","V","",""
+"NEG r/m8","F6 /3","V","V","",""
+"NEG r/m8","REX + F6 /3","N.E.","V","","pseudo64"
+"NOP r/m16","0F 1F /0","V","V","","operand16"
+"NOP r/m32","0F 1F /0","V","V","","operand32"
+"NOP","90","V","V","","pseudo"
+"NOT r/m16","F7 /2","V","V","","operand16"
+"NOT r/m32","F7 /2","V","V","","operand32"
+"NOT r/m64","REX.W + F7 /2","N.E.","V","",""
+"NOT r/m8","F6 /2","V","V","",""
+"NOT r/m8","REX + F6 /2","N.E.","V","","pseudo64"
+"OR AL, imm8u","0C ib","V","V","",""
+"OR AX, imm16","0D iw","V","V","","operand16"
+"OR EAX, imm32","0D id","V","V","","operand32"
+"OR RAX, imm32","REX.W + 0D id","N.E.","V","",""
+"OR r/m16, imm16","81 /1 iw","V","V","","operand16"
+"OR r/m16, imm8","83 /1 ib","V","V","","operand16"
+"OR r/m16, r16","09 /r","V","V","","operand16"
+"OR r/m32, imm32","81 /1 id","V","V","","operand32"
+"OR r/m32, imm8","83 /1 ib","V","V","","operand32"
+"OR r/m32, r32","09 /r","V","V","","operand32"
+"OR r/m64, imm32","REX.W + 81 /1 id","N.E.","V","",""
+"OR r/m64, imm8","REX.W + 83 /1 ib","N.E.","V","",""
+"OR r/m64, r64","REX.W + 09 /r","N.E.","V","",""
+"OR r/m8, imm8u","80 /1 ib","V","V","",""
+"OR r/m8, imm8u","REX + 80 /1 ib","N.E.","V","","pseudo64"
+"OR r/m8, r8","08 /r","V","V","",""
+"OR r/m8, r8","REX + 08 /r","N.E.","V","","pseudo64"
+"OR r16, r/m16","0B /r","V","V","","operand16"
+"OR r32, r/m32","0B /r","V","V","","operand32"
+"OR r64, r/m64","REX.W + 0B /r","N.E.","V","",""
+"OR r8, r/m8","0A /r","V","V","",""
+"OR r8, r/m8","REX + 0A /r","N.E.","V","","pseudo64"
+"ORPD xmm1, xmm2/m128","66 0F 56 /r","V","V","SSE2",""
+"ORPS xmm1, xmm2/m128","0F 56 /r","V","V","SSE",""
+"OUT DX, AL","EE","V","V","",""
+"OUT DX, AX","EF","V","V","","operand16"
+"OUT DX, EAX","EF","V","V","","operand32,operand64"
+"OUT imm8u, AL","E6 ib","V","V","",""
+"OUT imm8u, AX","E7 ib","V","V","","operand16"
+"OUT imm8u, EAX","E7 ib","V","V","","operand32,operand64"
+"OUTS DX, m16","6F","V","V","","pseudo"
+"OUTS DX, m32","6F","V","V","","pseudo"
+"OUTS DX, m8","6E","V","V","","pseudo"
+"OUTSB","6E","V","V","",""
+"OUTSD","6F","V","V","","operand32,operand64"
+"OUTSW","6F","V","V","","operand16"
+"PABSB mm1, mm2/m64","0F 38 1C /r","V","V","SSSE3",""
+"PABSB xmm1, xmm2/m128","66 0F 38 1C /r","V","V","SSSE3",""
+"PABSD mm1, mm2/m64","0F 38 1E /r","V","V","SSSE3",""
+"PABSD xmm1, xmm2/m128","66 0F 38 1E /r","V","V","SSSE3",""
+"PABSW mm1, mm2/m64","0F 38 1D /r","V","V","SSSE3",""
+"PABSW xmm1, xmm2/m128","66 0F 38 1D /r","V","V","SSSE3",""
+"PACKSSDW mm1, mm2/m64","0F 6B /r","V","V","MMX",""
+"PACKSSDW xmm1, xmm2/m128","66 0F 6B /r","V","V","SSE2",""
+"PACKSSWB mm1, mm2/m64","0F 63 /r","V","V","MMX",""
+"PACKSSWB xmm1, xmm2/m128","66 0F 63 /r","V","V","SSE2",""
+"PACKUSDW xmm1, xmm2/m128","66 0F 38 2B /r","V","V","SSE4_1",""
+"PACKUSWB mm, mm/m64","0F 67 /r","V","V","MMX",""
+"PACKUSWB xmm1, xmm2/m128","66 0F 67 /r","V","V","SSE2",""
+"PADDB mm, mm/m64","0F FC /r","V","V","MMX",""
+"PADDB xmm1, xmm2/m128","66 0F FC /r","V","V","SSE2",""
+"PADDD mm, mm/m64","0F FE /r","V","V","MMX",""
+"PADDD xmm1, xmm2/m128","66 0F FE /r","V","V","SSE2",""
+"PADDQ mm1, mm2/m64","0F D4 /r","V","V","SSE2",""
+"PADDQ xmm1, xmm2/m128","66 0F D4 /r","V","V","SSE2",""
+"PADDSB mm, mm/m64","0F EC /r","V","V","MMX",""
+"PADDSB xmm1, xmm2/m128","66 0F EC /r","V","V","SSE2",""
+"PADDSW mm, mm/m64","0F ED /r","V","V","MMX",""
+"PADDSW xmm1, xmm2/m128","66 0F ED /r","V","V","SSE2",""
+"PADDUSB mm, mm/m64","0F DC /r","V","V","MMX",""
+"PADDUSB xmm1, xmm2/m128","66 0F DC /r","V","V","SSE2",""
+"PADDUSW mm, mm/m64","0F DD /r","V","V","MMX",""
+"PADDUSW xmm1, xmm2/m128","66 0F DD /r","V","V","SSE2",""
+"PADDW mm, mm/m64","0F FD /r","V","V","MMX",""
+"PADDW xmm1, xmm2/m128","66 0F FD /r","V","V","SSE2",""
+"PALIGNR mm1, mm2/m64, imm8u","0F 3A 0F /r ib","V","V","SSSE3",""
+"PALIGNR xmm1, xmm2/m128, imm8u","66 0F 3A 0F /r ib","V","V","SSSE3",""
+"PAND mm, mm/m64","0F DB /r","V","V","MMX",""
+"PAND xmm1, xmm2/m128","66 0F DB /r","V","V","SSE2",""
+"PANDN mm, mm/m64","0F DF /r","V","V","MMX",""
+"PANDN xmm1, xmm2/m128","66 0F DF /r","V","V","SSE2",""
+"PAUSE","F3 90","V","V","","pseudo,keepop"
+"PAVGB mm1, mm2/m64","0F E0 /r","V","V","SSE",""
+"PAVGB xmm1, xmm2/m128","66 0F E0 /r","V","V","SSE2",""
+"PAVGW mm1, mm2/m64","0F E3 /r","V","V","SSE",""
+"PAVGW xmm1, xmm2/m128","66 0F E3 /r","V","V","SSE2",""
+"PBLENDVB xmm1, xmm2/m128, <XMM0>","66 0F 38 10 /r","V","V","SSE4_1",""
+"PBLENDW xmm1, xmm2/m128, imm8u","66 0F 3A 0E /r ib","V","V","SSE4_1",""
+"PCLMULQDQ xmm1, xmm2/m128, imm8u","66 0F 3A 44 /r ib","V","V","CLMUL",""
+"PCMPEQB mm, mm/m64","0F 74 /r","V","V","MMX",""
+"PCMPEQB xmm1, xmm2/m128","66 0F 74 /r","V","V","SSE2",""
+"PCMPEQD mm, mm/m64","0F 76 /r","V","V","MMX",""
+"PCMPEQD xmm1, xmm2/m128","66 0F 76 /r","V","V","SSE2",""
+"PCMPEQQ xmm1, xmm2/m128","66 0F 38 29 /r","V","V","SSE4_1",""
+"PCMPEQW mm, mm/m64","0F 75 /r","V","V","MMX",""
+"PCMPEQW xmm1, xmm2/m128","66 0F 75 /r","V","V","SSE2",""
+"PCMPESTRI xmm1, xmm2/m128, imm8u","66 0F 3A 61 /r ib","V","V","SSE4_2",""
+"PCMPESTRM xmm1, xmm2/m128, imm8u","66 0F 3A 60 /r ib","V","V","SSE4_2",""
+"PCMPGTB mm, mm/m64","0F 64 /r","V","V","MMX",""
+"PCMPGTB xmm1, xmm2/m128","66 0F 64 /r","V","V","SSE2",""
+"PCMPGTD mm, mm/m64","0F 66 /r","V","V","MMX",""
+"PCMPGTD xmm1, xmm2/m128","66 0F 66 /r","V","V","SSE2",""
+"PCMPGTQ xmm1, xmm2/m128","66 0F 38 37 /r","V","V","SSE4_2",""
+"PCMPGTW mm, mm/m64","0F 65 /r","V","V","MMX",""
+"PCMPGTW xmm1, xmm2/m128","66 0F 65 /r","V","V","SSE2",""
+"PCMPISTRI xmm1, xmm2/m128, imm8u","66 0F 3A 63 /r ib","V","V","SSE4_2",""
+"PCMPISTRM xmm1, xmm2/m128, imm8u","66 0F 3A 62 /r ib","V","V","SSE4_2",""
+"PDEP r32a, r32b, r/m32","VEX.NDS.LZ.F2.0F38.W0 F5 /r","V","V","BMI2",""
+"PDEP r64a, r64b, r/m64","VEX.NDS.LZ.F2.0F38.W1 F5 /r","N.E.","V","BMI2",""
+"PEXT r32a, r32b, r/m32","VEX.NDS.LZ.F3.0F38.W0 F5 /r","V","V","BMI2",""
+"PEXT r64a, r64b, r/m64","VEX.NDS.LZ.F3.0F38.W1 F5 /r","N.E.","V","BMI2",""
+"PEXTRB r32/m8, xmm1, imm8u","66 0F 3A 14 /r ib","V","V","SSE4_1",""
+"PEXTRD r/m32, xmm1, imm8u","66 0F 3A 16 /r ib","V","V","SSE4_1","operand16,operand32"
+"PEXTRQ r/m64, xmm1, imm8u","66 REX.W 0F 3A 16 /r ib","N.E.","V","SSE4_1",""
+"PEXTRW r32, mm2, imm8u","0F C5 /r ib","V","V","SSE",""
+"PEXTRW r32, xmm2, imm8u","66 0F C5 /r ib","V","V","SSE2",""
+"PEXTRW r32/m16, xmm1, imm8u","66 0F 3A 15 /r ib","V","V","SSE4_1",""
+"PHADDD mm1, mm2/m64","0F 38 02 /r","V","V","SSSE3",""
+"PHADDD xmm1, xmm2/m128","66 0F 38 02 /r","V","V","SSSE3",""
+"PHADDSW mm1, mm2/m64","0F 38 03 /r","V","V","SSSE3",""
+"PHADDSW xmm1, xmm2/m128","66 0F 38 03 /r","V","V","SSSE3",""
+"PHADDW mm1, mm2/m64","0F 38 01 /r","V","V","SSSE3",""
+"PHADDW xmm1, xmm2/m128","66 0F 38 01 /r","V","V","SSSE3",""
+"PHMINPOSUW xmm1, xmm2/m128","66 0F 38 41 /r","V","V","SSE4_1",""
+"PHSUBD mm1, mm2/m64","0F 38 06 /r","V","V","SSSE3",""
+"PHSUBD xmm1, xmm2/m128","66 0F 38 06 /r","V","V","SSSE3",""
+"PHSUBSW mm1, mm2/m64","0F 38 07 /r","V","V","SSSE3",""
+"PHSUBSW xmm1, xmm2/m128","66 0F 38 07 /r","V","V","SSSE3",""
+"PHSUBW mm1, mm2/m64","0F 38 05 /r","V","V","SSSE3",""
+"PHSUBW xmm1, xmm2/m128","66 0F 38 05 /r","V","V","SSSE3",""
+"PINSRB xmm1, r32/m8, imm8u","66 0F 3A 20 /r ib","V","V","SSE4_1",""
+"PINSRD xmm1, r/m32, imm8u","66 0F 3A 22 /r ib","V","V","SSE4_1","operand16,operand32"
+"PINSRQ xmm1, r/m64, imm8u","66 REX.W 0F 3A 22 /r ib","N.E.","V","SSE4_1",""
+"PINSRW mm, r32/m16, imm8u","0F C4 /r ib","V","V","SSE",""
+"PINSRW xmm, r32/m16, imm8u","66 0F C4 /r ib","V","V","SSE2",""
+"PMADDUBSW mm1, mm2/m64","0F 38 04 /r","V","V","SSSE3",""
+"PMADDUBSW xmm1, xmm2/m128","66 0F 38 04 /r","V","V","SSSE3",""
+"PMADDWD mm, mm/m64","0F F5 /r","V","V","MMX",""
+"PMADDWD xmm1, xmm2/m128","66 0F F5 /r","V","V","SSE2",""
+"PMAXSB xmm1, xmm2/m128","66 0F 38 3C /r","V","V","SSE4_1",""
+"PMAXSD xmm1, xmm2/m128","66 0F 38 3D /r","V","V","SSE4_1",""
+"PMAXSW mm1, mm2/m64","0F EE /r","V","V","SSE",""
+"PMAXSW xmm1, xmm2/m128","66 0F EE /r","V","V","SSE2",""
+"PMAXUB mm1, mm2/m64","0F DE /r","V","V","SSE",""
+"PMAXUB xmm1, xmm2/m128","66 0F DE /r","V","V","SSE2",""
+"PMAXUD xmm1, xmm2/m128","66 0F 38 3F /r","V","V","SSE4_1",""
+"PMAXUW xmm1, xmm2/m128","66 0F 38 3E /r","V","V","SSE4_1",""
+"PMINSB xmm1, xmm2/m128","66 0F 38 38 /r","V","V","SSE4_1",""
+"PMINSD xmm1, xmm2/m128","66 0F 38 39 /r","V","V","SSE4_1",""
+"PMINSW mm1, mm2/m64","0F EA /r","V","V","SSE",""
+"PMINSW xmm1, xmm2/m128","66 0F EA /r","V","V","SSE2",""
+"PMINUB mm1, mm2/m64","0F DA /r","V","V","SSE",""
+"PMINUB xmm1, xmm2/m128","66 0F DA /r","V","V","SSE2",""
+"PMINUD xmm1, xmm2/m128","66 0F 38 3B /r","V","V","SSE4_1",""
+"PMINUW xmm1, xmm2/m128","66 0F 38 3A /r","V","V","SSE4_1",""
+"PMOVMSKB r32, mm2","0F D7 /r","V","V","SSE",""
+"PMOVMSKB r32, xmm2","66 0F D7 /r","V","V","SSE2",""
+"PMOVSXBD xmm1, xmm2/m32","66 0F 38 21 /r","V","V","SSE4_1",""
+"PMOVSXBQ xmm1, xmm2/m16","66 0F 38 22 /r","V","V","SSE4_1",""
+"PMOVSXBW xmm1, xmm2/m64","66 0F 38 20 /r","V","V","SSE4_1",""
+"PMOVSXDQ xmm1, xmm2/m64","66 0F 38 25 /r","V","V","SSE4_1",""
+"PMOVSXWD xmm1, xmm2/m64","66 0F 38 23 /r","V","V","SSE4_1",""
+"PMOVSXWQ xmm1, xmm2/m32","66 0F 38 24 /r","V","V","SSE4_1",""
+"PMOVZXBD xmm1, xmm2/m32","66 0F 38 31 /r","V","V","SSE4_1",""
+"PMOVZXBQ xmm1, xmm2/m16","66 0F 38 32 /r","V","V","SSE4_1",""
+"PMOVZXBW xmm1, xmm2/m64","66 0F 38 30 /r","V","V","SSE4_1",""
+"PMOVZXDQ xmm1, xmm2/m64","66 0F 38 35 /r","V","V","SSE4_1",""
+"PMOVZXWD xmm1, xmm2/m64","66 0F 38 33 /r","V","V","SSE4_1",""
+"PMOVZXWQ xmm1, xmm2/m32","66 0F 38 34 /r","V","V","SSE4_1",""
+"PMULDQ xmm1, xmm2/m128","66 0F 38 28 /r","V","V","SSE4_1",""
+"PMULHRSW mm1, mm2/m64","0F 38 0B /r","V","V","SSSE3",""
+"PMULHRSW xmm1, xmm2/m128","66 0F 38 0B /r","V","V","SSSE3",""
+"PMULHUW mm1, mm2/m64","0F E4 /r","V","V","SSE",""
+"PMULHUW xmm1, xmm2/m128","66 0F E4 /r","V","V","SSE2",""
+"PMULHW mm, mm/m64","0F E5 /r","V","V","MMX",""
+"PMULHW xmm1, xmm2/m128","66 0F E5 /r","V","V","SSE2",""
+"PMULLD xmm1, xmm2/m128","66 0F 38 40 /r","V","V","SSE4_1",""
+"PMULLW mm, mm/m64","0F D5 /r","V","V","MMX",""
+"PMULLW xmm1, xmm2/m128","66 0F D5 /r","V","V","SSE2",""
+"PMULUDQ mm1, mm2/m64","0F F4 /r","V","V","SSE2",""
+"PMULUDQ xmm1, xmm2/m128","66 0F F4 /r","V","V","SSE2",""
+"POP DS","1F","V","I","",""
+"POP ES","07","V","I","",""
+"POP FS","0F A1","N.E.","V","","operand32,operand64"
+"POP FS","0F A1","V","N.E.","","operand32"
+"POP FS","0F A1","V","V","","operand16"
+"POP GS","0F A9","N.E.","V","","operand32,operand64"
+"POP GS","0F A9","V","N.E.","","operand32"
+"POP GS","0F A9","V","V","","operand16"
+"POP SS","17","V","I","",""
+"POP r/m16","8F /0","V","V","","operand16"
+"POP r/m32","8F /0","V","N.E.","","operand32"
+"POP r/m64","8F /0","N.E.","V","","operand32,operand64"
+"POP r16op","58+rw","V","V","","operand16"
+"POP r32op","58+rd","V","N.E.","","operand32"
+"POP r64op","58+rd","N.E.","V","","operand32,operand64"
+"POPA","61","V","I","","operand16"
+"POPAD","61","V","I","","operand32"
+"POPCNT r16, r/m16","F3 0F B8 /r","V","V","","operand16"
+"POPCNT r32, r/m32","F3 0F B8 /r","V","V","","operand32"
+"POPCNT r64, r/m64","F3 REX.W 0F B8 /r","N.E.","V","",""
+"POPF","9D","V","V","","operand16"
+"POPFD","9D","V","N.E.","","operand32"
+"POPFQ","9D","N.E.","V","","operand32,operand64"
+"POR mm, mm/m64","0F EB /r","V","V","MMX",""
+"POR xmm1, xmm2/m128","66 0F EB /r","V","V","SSE2",""
+"PREFETCHNTA m8","0F 18 /0","V","V","",""
+"PREFETCHT0 m8","0F 18 /1","V","V","",""
+"PREFETCHT1 m8","0F 18 /2","V","V","",""
+"PREFETCHT2 m8","0F 18 /3","V","V","",""
+"PREFETCHW m8","0F 0D /1","V","V","PRFCHW",""
+"PSADBW mm1, mm2/m64","0F F6 /r","V","V","SSE",""
+"PSADBW xmm1, xmm2/m128","66 0F F6 /r","V","V","SSE2",""
+"PSHUFB mm1, mm2/m64","0F 38 00 /r","V","V","SSSE3",""
+"PSHUFB xmm1, xmm2/m128","66 0F 38 00 /r","V","V","SSSE3",""
+"PSHUFD xmm1, xmm2/m128, imm8u","66 0F 70 /r ib","V","V","SSE2",""
+"PSHUFHW xmm1, xmm2/m128, imm8u","F3 0F 70 /r ib","V","V","SSE2",""
+"PSHUFLW xmm1, xmm2/m128, imm8u","F2 0F 70 /r ib","V","V","SSE2",""
+"PSHUFW mm1, mm2/m64, imm8u","0F 70 /r ib","V","V","",""
+"PSIGNB mm1, mm2/m64","0F 38 08 /r","V","V","SSSE3",""
+"PSIGNB xmm1, xmm2/m128","66 0F 38 08 /r","V","V","SSSE3",""
+"PSIGND mm1, mm2/m64","0F 38 0A /r","V","V","SSSE3",""
+"PSIGND xmm1, xmm2/m128","66 0F 38 0A /r","V","V","SSSE3",""
+"PSIGNW mm1, mm2/m64","0F 38 09 /r","V","V","SSSE3",""
+"PSIGNW xmm1, xmm2/m128","66 0F 38 09 /r","V","V","SSSE3",""
+"PSLLD mm, mm/m64","0F F2 /r","V","V","MMX",""
+"PSLLD mm2, imm8u","0F 72 /6 ib","V","V","MMX",""
+"PSLLD xmm1, xmm2/m128","66 0F F2 /r","V","V","SSE2",""
+"PSLLD xmm2, imm8u","66 0F 72 /6 ib","V","V","SSE2",""
+"PSLLDQ xmm2, imm8u","66 0F 73 /7 ib","V","V","SSE2",""
+"PSLLQ mm, mm/m64","0F F3 /r","V","V","MMX",""
+"PSLLQ mm2, imm8u","0F 73 /6 ib","V","V","MMX",""
+"PSLLQ xmm1, xmm2/m128","66 0F F3 /r","V","V","SSE2",""
+"PSLLQ xmm2, imm8u","66 0F 73 /6 ib","V","V","SSE2",""
+"PSLLW mm, mm/m64","0F F1 /r","V","V","MMX",""
+"PSLLW mm2, imm8u","0F 71 /6 ib","V","V","MMX",""
+"PSLLW xmm1, xmm2/m128","66 0F F1 /r","V","V","SSE2",""
+"PSLLW xmm2, imm8u","66 0F 71 /6 ib","V","V","SSE2",""
+"PSRAD mm, mm/m64","0F E2 /r","V","V","MMX",""
+"PSRAD mm2, imm8u","0F 72 /4 ib","V","V","MMX",""
+"PSRAD xmm1, xmm2/m128","66 0F E2 /r","V","V","SSE2",""
+"PSRAD xmm2, imm8u","66 0F 72 /4 ib","V","V","SSE2",""
+"PSRAW mm, mm/m64","0F E1 /r","V","V","MMX",""
+"PSRAW mm2, imm8u","0F 71 /4 ib","V","V","MMX",""
+"PSRAW xmm1, xmm2/m128","66 0F E1 /r","V","V","SSE2",""
+"PSRAW xmm2, imm8u","66 0F 71 /4 ib","V","V","SSE2",""
+"PSRLD mm, mm/m64","0F D2 /r","V","V","MMX",""
+"PSRLD mm2, imm8u","0F 72 /2 ib","V","V","MMX",""
+"PSRLD xmm1, xmm2/m128","66 0F D2 /r","V","V","SSE2",""
+"PSRLD xmm2, imm8u","66 0F 72 /2 ib","V","V","SSE2",""
+"PSRLDQ xmm2, imm8u","66 0F 73 /3 ib","V","V","SSE2",""
+"PSRLQ mm, mm/m64","0F D3 /r","V","V","MMX",""
+"PSRLQ mm2, imm8u","0F 73 /2 ib","V","V","MMX",""
+"PSRLQ xmm1, xmm2/m128","66 0F D3 /r","V","V","SSE2",""
+"PSRLQ xmm2, imm8u","66 0F 73 /2 ib","V","V","SSE2",""
+"PSRLW mm, mm/m64","0F D1 /r","V","V","MMX",""
+"PSRLW mm2, imm8u","0F 71 /2 ib","V","V","MMX",""
+"PSRLW xmm1, xmm2/m128","66 0F D1 /r","V","V","SSE2",""
+"PSRLW xmm2, imm8u","66 0F 71 /2 ib","V","V","SSE2",""
+"PSUBB mm, mm/m64","0F F8 /r","V","V","MMX",""
+"PSUBB xmm1, xmm2/m128","66 0F F8 /r","V","V","SSE2",""
+"PSUBD mm, mm/m64","0F FA /r","V","V","MMX",""
+"PSUBD xmm1, xmm2/m128","66 0F FA /r","V","V","SSE2",""
+"PSUBQ mm1, mm2/m64","0F FB /r","V","V","SSE2",""
+"PSUBQ xmm1, xmm2/m128","66 0F FB /r","V","V","SSE2",""
+"PSUBSB mm, mm/m64","0F E8 /r","V","V","MMX",""
+"PSUBSB xmm1, xmm2/m128","66 0F E8 /r","V","V","SSE2",""
+"PSUBSW mm, mm/m64","0F E9 /r","V","V","MMX",""
+"PSUBSW xmm1, xmm2/m128","66 0F E9 /r","V","V","SSE2",""
+"PSUBUSB mm, mm/m64","0F D8 /r","V","V","MMX",""
+"PSUBUSB xmm1, xmm2/m128","66 0F D8 /r","V","V","SSE2",""
+"PSUBUSW mm, mm/m64","0F D9 /r","V","V","MMX",""
+"PSUBUSW xmm1, xmm2/m128","66 0F D9 /r","V","V","SSE2",""
+"PSUBW mm, mm/m64","0F F9 /r","V","V","MMX",""
+"PSUBW xmm1, xmm2/m128","66 0F F9 /r","V","V","SSE2",""
+"PTEST xmm1, xmm2/m128","66 0F 38 17 /r","V","V","SSE4_1",""
+"PUNPCKHBW mm, mm/m64","0F 68 /r","V","V","MMX",""
+"PUNPCKHBW xmm1, xmm2/m128","66 0F 68 /r","V","V","SSE2",""
+"PUNPCKHDQ mm, mm/m64","0F 6A /r","V","V","MMX",""
+"PUNPCKHDQ xmm1, xmm2/m128","66 0F 6A /r","V","V","SSE2",""
+"PUNPCKHQDQ xmm1, xmm2/m128","66 0F 6D /r","V","V","SSE2",""
+"PUNPCKHWD mm, mm/m64","0F 69 /r","V","V","MMX",""
+"PUNPCKHWD xmm1, xmm2/m128","66 0F 69 /r","V","V","SSE2",""
+"PUNPCKLBW mm, mm/m32","0F 60 /r","V","V","MMX",""
+"PUNPCKLBW xmm1, xmm2/m128","66 0F 60 /r","V","V","SSE2",""
+"PUNPCKLDQ mm, mm/m32","0F 62 /r","V","V","MMX",""
+"PUNPCKLDQ xmm1, xmm2/m128","66 0F 62 /r","V","V","SSE2",""
+"PUNPCKLQDQ xmm1, xmm2/m128","66 0F 6C /r","V","V","SSE2",""
+"PUNPCKLWD mm, mm/m32","0F 61 /r","V","V","MMX",""
+"PUNPCKLWD xmm1, xmm2/m128","66 0F 61 /r","V","V","SSE2",""
+"PUSH CS","0E","V","I","",""
+"PUSH DS","1E","V","I","",""
+"PUSH ES","06","V","I","",""
+"PUSH FS","0F A0","V","V","",""
+"PUSH GS","0F A8","V","V","",""
+"PUSH SS","16","V","I","",""
+"PUSH imm16","68 iw","V","V","","operand16"
+"PUSH imm32","68 id","V","V","","operand32,operand64"
+"PUSH imm8","6A ib","V","V","",""
+"PUSH r/m16","FF /6","V","V","","operand16"
+"PUSH r/m32","FF /6","V","N.E.","","operand32"
+"PUSH r/m64","FF /6","N.E.","V","","operand32,operand64"
+"PUSH r16op","50+rw","V","V","","operand16"
+"PUSH r32op","50+rd","V","N.E.","","operand32"
+"PUSH r64op","50+rd","N.E.","V","","operand32,operand64"
+"PUSHA","60","V","I","","operand16"
+"PUSHAD","60","V","I","","operand32"
+"PUSHF","9C","V","V","","operand16"
+"PUSHFD","9C","V","N.E.","","operand32"
+"PUSHFQ","9C","N.E.","V","","operand32,operand64"
+"PXOR mm, mm/m64","0F EF /r","V","V","MMX",""
+"PXOR xmm1, xmm2/m128","66 0F EF /r","V","V","SSE2",""
+"RCL r/m16, 1","D1 /2","V","V","","operand16"
+"RCL r/m16, CL","D3 /2","V","V","","operand16"
+"RCL r/m16, imm8u","C1 /2 ib","V","V","","operand16"
+"RCL r/m32, 1","D1 /2","V","V","","operand32"
+"RCL r/m32, CL","D3 /2","V","V","","operand32"
+"RCL r/m32, imm8u","C1 /2 ib","V","V","","operand32"
+"RCL r/m64, 1","REX.W + D1 /2","N.E.","V","",""
+"RCL r/m64, CL","REX.W + D3 /2","N.E.","V","",""
+"RCL r/m64, imm8u","REX.W + C1 /2 ib","N.E.","V","",""
+"RCL r/m8, 1","D0 /2","V","V","",""
+"RCL r/m8, 1","REX + D0 /2","N.E.","V","","pseudo64"
+"RCL r/m8, CL","D2 /2","V","V","",""
+"RCL r/m8, CL","REX + D2 /2","N.E.","V","","pseudo64"
+"RCL r/m8, imm8u","C0 /2 ib","V","V","",""
+"RCL r/m8, imm8u","REX + C0 /2 ib","N.E.","V","","pseudo64"
+"RCPPS xmm1, xmm2/m128","0F 53 /r","V","V","SSE",""
+"RCPSS xmm1, xmm2/m32","F3 0F 53 /r","V","V","SSE",""
+"RCR r/m16, 1","D1 /3","V","V","","operand16"
+"RCR r/m16, CL","D3 /3","V","V","","operand16"
+"RCR r/m16, imm8u","C1 /3 ib","V","V","","operand16"
+"RCR r/m32, 1","D1 /3","V","V","","operand32"
+"RCR r/m32, CL","D3 /3","V","V","","operand32"
+"RCR r/m32, imm8u","C1 /3 ib","V","V","","operand32"
+"RCR r/m64, 1","REX.W + D1 /3","N.E.","V","",""
+"RCR r/m64, CL","REX.W + D3 /3","N.E.","V","",""
+"RCR r/m64, imm8u","REX.W + C1 /3 ib","N.E.","V","",""
+"RCR r/m8, 1","D0 /3","V","V","",""
+"RCR r/m8, 1","REX + D0 /3","N.E.","V","","pseudo64"
+"RCR r/m8, CL","D2 /3","V","V","",""
+"RCR r/m8, CL","REX + D2 /3","N.E.","V","","pseudo64"
+"RCR r/m8, imm8u","C0 /3 ib","V","V","",""
+"RCR r/m8, imm8u","REX + C0 /3 ib","N.E.","V","","pseudo64"
+"RDFSBASE r/m32","F3 0F AE /0","I","V","FSGSBASE","modrm_regonly,operand16,operand32"
+"RDFSBASE r/m64","REX.W + F3 0F AE /0","I","V","FSGSBASE","modrm_regonly"
+"RDGSBASE r/m32","F3 0F AE /1","I","V","FSGSBASE","modrm_regonly,operand16,operand32"
+"RDGSBASE r/m64","REX.W + F3 0F AE /1","I","V","FSGSBASE","modrm_regonly"
+"RDMSR","0F 32","V","V","",""
+"RDPMC","0F 33","V","V","",""
+"RDRAND r64","REX.W + 0F C7 /6","I","V","RDRAND",""
+"RDRAND rmf16","0F C7 /6","V","V","RDRAND","operand16,modrm_regonly"
+"RDRAND rmf32","0F C7 /6","V","V","RDRAND","operand32,modrm_regonly"
+"RDTSC","0F 31","V","V","",""
+"RDTSCP","0F 01 F9","V","V","",""
+"REP INS m16, DX","F3 6D","V","V","","pseudo"
+"REP INS m32, DX","F3 6D","V","V","","pseudo"
+"REP INS m8, DX","F3 6C","N.E.","V","","pseudo"
+"REP INS m8, DX","F3 6C","V","V","","pseudo"
+"REP INS r/m32, DX","F3 6D","N.E.","V","","pseudo"
+"REP LODS AL","F3 AC","V","V","","pseudo"
+"REP LODS AL","F3 REX.W AC","N.E.","V","","pseudo"
+"REP LODS AX","F3 AD","V","V","","pseudo"
+"REP LODS EAX","F3 AD","V","V","","pseudo"
+"REP LODS RAX","F3 REX.W AD","N.E.","V","","pseudo"
+"REP MOVS m16, m16","F3 A5","V","V","","pseudo"
+"REP MOVS m32, m32","F3 A5","V","V","","pseudo"
+"REP MOVS m64, m64","F3 REX.W A5","N.E.","V","","pseudo"
+"REP MOVS m8, m8","F3 A4","V","V","","pseudo"
+"REP MOVS m8, m8","F3 REX.W A4","N.E.","V","","pseudo"
+"REP OUTS DX, r/m16","F3 6F","V","V","","pseudo"
+"REP OUTS DX, r/m32","F3 6F","V","V","","pseudo"
+"REP OUTS DX, r/m32","F3 REX.W 6F","N.E.","V","","pseudo"
+"REP OUTS DX, r/m8","F3 6E","V","V","","pseudo"
+"REP OUTS DX, r/m8","F3 REX.W 6E","N.E.","V","","pseudo"
+"REP STOS m16","F3 AB","V","V","","pseudo"
+"REP STOS m32","F3 AB","V","V","","pseudo"
+"REP STOS m64","F3 REX.W AB","N.E.","V","","pseudo"
+"REP STOS m8","F3 AA","V","V","","pseudo"
+"REP STOS m8","F3 REX.W AA","N.E.","V","","pseudo"
+"REPE CMPS m16, m16","F3 A7","V","V","","pseudo"
+"REPE CMPS m32, m32","F3 A7","V","V","","pseudo"
+"REPE CMPS m64, m64","F3 REX.W A7","N.E.","V","","pseudo"
+"REPE CMPS m8, m8","F3 A6","V","V","","pseudo"
+"REPE CMPS m8, m8","F3 REX.W A6","N.E.","V","","pseudo"
+"REPE SCAS m16","F3 AF","V","V","","pseudo"
+"REPE SCAS m32","F3 AF","V","V","","pseudo"
+"REPE SCAS m8","F3 AE","V","V","","pseudo"
+"REPE SCAS m8","F3 REX.W AE","N.E.","V","","pseudo"
+"RET imm16u","C2 iw","V","V","",""
+"RET","C3","V","V","",""
+"ROL r/m16, 1","D1 /0","V","V","","operand16"
+"ROL r/m16, CL","D3 /0","V","V","","operand16"
+"ROL r/m16, imm8u","C1 /0 ib","V","V","","operand16"
+"ROL r/m32, 1","D1 /0","V","V","","operand32"
+"ROL r/m32, CL","D3 /0","V","V","","operand32"
+"ROL r/m32, imm8u","C1 /0 ib","V","V","","operand32"
+"ROL r/m64, 1","REX.W + D1 /0","N.E.","V","",""
+"ROL r/m64, CL","REX.W + D3 /0","N.E.","V","",""
+"ROL r/m64, imm8u","REX.W + C1 /0 ib","V","V","",""
+"ROL r/m8, 1","D0 /0","V","V","",""
+"ROL r/m8, 1","REX + D0 /0","N.E.","V","","pseudo64"
+"ROL r/m8, CL","D2 /0","V","V","",""
+"ROL r/m8, CL","REX + D2 /0","N.E.","V","","pseudo64"
+"ROL r/m8, imm8u","C0 /0 ib","V","V","",""
+"ROL r/m8, imm8u","REX + C0 /0 ib","N.E.","V","","pseudo64"
+"ROR r/m16, 1","D1 /1","V","V","","operand16"
+"ROR r/m16, CL","D3 /1","V","V","","operand16"
+"ROR r/m16, imm8u","C1 /1 ib","V","V","","operand16"
+"ROR r/m32, 1","D1 /1","V","V","","operand32"
+"ROR r/m32, CL","D3 /1","V","V","","operand32"
+"ROR r/m32, imm8u","C1 /1 ib","V","V","","operand32"
+"ROR r/m64, 1","REX.W + D1 /1","N.E.","V","",""
+"ROR r/m64, CL","REX.W + D3 /1","N.E.","V","",""
+"ROR r/m64, imm8u","REX.W + C1 /1 ib","V","V","",""
+"ROR r/m8, 1","D0 /1","V","V","",""
+"ROR r/m8, 1","REX + D0 /1","N.E.","V","","pseudo64"
+"ROR r/m8, CL","D2 /1","V","V","",""
+"ROR r/m8, CL","REX + D2 /1","N.E.","V","","pseudo64"
+"ROR r/m8, imm8u","C0 /1 ib","V","V","",""
+"ROR r/m8, imm8u","REX + C0 /1 ib","N.E.","V","","pseudo64"
+"RORX r32, r/m32, imm8u","VEX.LZ.F2.0F3A.W0 F0 /r ib","V","V","BMI2",""
+"RORX r64, r/m64, imm8u","VEX.LZ.F2.0F3A.W1 F0 /r ib","N.E.","V","BMI2",""
+"ROUNDPD xmm1, xmm2/m128, imm8u","66 0F 3A 09 /r ib","V","V","SSE4_1",""
+"ROUNDPS xmm1, xmm2/m128, imm8u","66 0F 3A 08 /r ib","V","V","SSE4_1",""
+"ROUNDSD xmm1, xmm2/m64, imm8u","66 0F 3A 0B /r ib","V","V","SSE4_1",""
+"ROUNDSS xmm1, xmm2/m32, imm8u","66 0F 3A 0A /r ib","V","V","SSE4_1",""
+"RSM","0F AA","V","V","",""
+"RSQRTPS xmm1, xmm2/m128","0F 52 /r","V","V","SSE",""
+"RSQRTSS xmm1, xmm2/m32","F3 0F 52 /r","V","V","SSE",""
+"SAHF","9E","V","V","",""
+"SAL r/m16, 1","D1 /4","V","V","","pseudo"
+"SAL r/m16, CL","D3 /4","V","V","","pseudo"
+"SAL r/m16, imm8","C1 /4 ib","V","V","","pseudo"
+"SAL r/m32, 1","D1 /4","V","V","","pseudo"
+"SAL r/m32, CL","D3 /4","V","V","","pseudo"
+"SAL r/m32, imm8","C1 /4 ib","V","V","","pseudo"
+"SAL r/m64, 1","REX.W + D1 /4","N.E.","V","","pseudo"
+"SAL r/m64, CL","REX.W + D3 /4","N.E.","V","","pseudo"
+"SAL r/m64, imm8","REX.W + C1 /4 ib","N.E.","V","","pseudo"
+"SAL r/m8, 1","D0 /4","V","V","","pseudo"
+"SAL r/m8, 1","REX + D0 /4","N.E.","V","","pseudo"
+"SAL r/m8, CL","D2 /4","V","V","","pseudo"
+"SAL r/m8, CL","REX + D2 /4","N.E.","V","","pseudo"
+"SAL r/m8, imm8u","C0 /4 ib","V","V","","pseudo"
+"SAL r/m8, imm8u","REX + C0 /4 ib","N.E.","V","","pseudo"
+"SAR r/m16, 1","D1 /7","V","V","","operand16"
+"SAR r/m16, CL","D3 /7","V","V","","operand16"
+"SAR r/m16, imm8u","C1 /7 ib","V","V","","operand16"
+"SAR r/m32, 1","D1 /7","V","V","","operand32"
+"SAR r/m32, CL","D3 /7","V","V","","operand32"
+"SAR r/m32, imm8u","C1 /7 ib","V","V","","operand32"
+"SAR r/m64, 1","REX.W + D1 /7","N.E.","V","",""
+"SAR r/m64, CL","REX.W + D3 /7","N.E.","V","",""
+"SAR r/m64, imm8u","REX.W + C1 /7 ib","N.E.","V","",""
+"SAR r/m8, 1","D0 /7","V","V","",""
+"SAR r/m8, 1","REX + D0 /7","N.E.","V","","pseudo64"
+"SAR r/m8, CL","D2 /7","V","V","",""
+"SAR r/m8, CL","REX + D2 /7","N.E.","V","","pseudo64"
+"SAR r/m8, imm8u","C0 /7 ib","V","V","",""
+"SAR r/m8, imm8u","REX + C0 /7 ib","N.E.","V","","pseudo64"
+"SARX r32a, r/m32, r32b","VEX.NDS.LZ.F3.0F38.W0 F7 /r","V","V","BMI2",""
+"SARX r64a, r/m64, r64b","VEX.NDS.LZ.F3.0F38.W1 F7 /r","N.E.","V","BMI2",""
+"SBB AL, imm8u","1C ib","V","V","",""
+"SBB AX, imm16","1D iw","V","V","","operand16"
+"SBB EAX, imm32","1D id","V","V","","operand32"
+"SBB RAX, imm32","REX.W + 1D id","N.E.","V","",""
+"SBB r/m16, imm16","81 /3 iw","V","V","","operand16"
+"SBB r/m16, imm8","83 /3 ib","V","V","","operand16"
+"SBB r/m16, r16","19 /r","V","V","","operand16"
+"SBB r/m32, imm32","81 /3 id","V","V","","operand32"
+"SBB r/m32, imm8","83 /3 ib","V","V","","operand32"
+"SBB r/m32, r32","19 /r","V","V","","operand32"
+"SBB r/m64, imm32","REX.W + 81 /3 id","N.E.","V","",""
+"SBB r/m64, imm8","REX.W + 83 /3 ib","N.E.","V","",""
+"SBB r/m64, r64","REX.W + 19 /r","N.E.","V","",""
+"SBB r/m8, imm8u","80 /3 ib","V","V","",""
+"SBB r/m8, imm8u","REX + 80 /3 ib","N.E.","V","","pseudo64"
+"SBB r/m8, r8","18 /r","V","V","",""
+"SBB r/m8, r8","REX + 18 /r","N.E.","V","","pseudo64"
+"SBB r16, r/m16","1B /r","V","V","","operand16"
+"SBB r32, r/m32","1B /r","V","V","","operand32"
+"SBB r64, r/m64","REX.W + 1B /r","N.E.","V","",""
+"SBB r8, r/m8","1A /r","V","V","",""
+"SBB r8, r/m8","REX + 1A /r","N.E.","V","","pseudo64"
+"SCAS m16","AF","V","V","","pseudo"
+"SCAS m32","AF","V","V","","pseudo"
+"SCAS m64","REX.W + AF","N.E.","V","","pseudo"
+"SCAS m8","AE","V","V","","pseudo"
+"SCASB","AE","V","V","",""
+"SCASD","AF","V","V","","operand32"
+"SCASQ","REX.W + AF","N.E.","V","",""
+"SCASW","AF","V","V","","operand16"
+"SETA r/m8","0F 97 /r","V","V","",""
+"SETA r/m8","REX + 0F 97 /r","N.E.","V","","pseudo64"
+"SETAE r/m8","0F 93 /r","V","V","",""
+"SETAE r/m8","REX + 0F 93 /r","N.E.","V","","pseudo64"
+"SETB r/m8","0F 92 /r","V","V","",""
+"SETB r/m8","REX + 0F 92 /r","N.E.","V","","pseudo64"
+"SETBE r/m8","0F 96 /r","V","V","",""
+"SETBE r/m8","REX + 0F 96 /r","N.E.","V","","pseudo64"
+"SETC r/m8","0F 92 /r","V","V","","pseudo"
+"SETC r/m8","REX + 0F 92 /r","N.E.","V","","pseudo"
+"SETE r/m8","0F 94 /r","V","V","",""
+"SETE r/m8","REX + 0F 94 /r","N.E.","V","","pseudo64"
+"SETG r/m8","0F 9F /r","V","V","",""
+"SETG r/m8","REX + 0F 9F /r","N.E.","V","","pseudo64"
+"SETGE r/m8","0F 9D /r","V","V","",""
+"SETGE r/m8","REX + 0F 9D /r","N.E.","V","","pseudo64"
+"SETL r/m8","0F 9C /r","V","V","",""
+"SETL r/m8","REX + 0F 9C /r","N.E.","V","","pseudo64"
+"SETLE r/m8","0F 9E /r","V","V","",""
+"SETLE r/m8","REX + 0F 9E /r","N.E.","V","","pseudo64"
+"SETNA r/m8","0F 96 /r","V","V","","pseudo"
+"SETNA r/m8","REX + 0F 96 /r","N.E.","V","","pseudo"
+"SETNAE r/m8","0F 92 /r","V","V","","pseudo"
+"SETNAE r/m8","REX + 0F 92 /r","N.E.","V","","pseudo"
+"SETNB r/m8","0F 93 /r","V","V","","pseudo"
+"SETNB r/m8","REX + 0F 93 /r","N.E.","V","","pseudo"
+"SETNBE r/m8","0F 97 /r","V","V","","pseudo"
+"SETNBE r/m8","REX + 0F 97 /r","N.E.","V","","pseudo"
+"SETNC r/m8","0F 93 /r","V","V","","pseudo"
+"SETNC r/m8","REX + 0F 93 /r","N.E.","V","","pseudo"
+"SETNE r/m8","0F 95 /r","V","V","",""
+"SETNE r/m8","REX + 0F 95 /r","N.E.","V","","pseudo64"
+"SETNG r/m8","0F 9E /r","V","V","","pseudo"
+"SETNG r/m8","REX + 0F 9E /r","N.E.","V","","pseudo"
+"SETNGE r/m8","0F 9C /r","V","V","","pseudo"
+"SETNGE r/m8","REX + 0F 9C /r","N.E.","V","","pseudo"
+"SETNL r/m8","0F 9D /r","V","V","","pseudo"
+"SETNL r/m8","REX + 0F 9D /r","N.E.","V","","pseudo"
+"SETNLE r/m8","0F 9F /r","V","V","","pseudo"
+"SETNLE r/m8","REX + 0F 9F /r","N.E.","V","","pseudo"
+"SETNO r/m8","0F 91 /r","V","V","",""
+"SETNO r/m8","REX + 0F 91 /r","N.E.","V","","pseudo64"
+"SETNP r/m8","0F 9B /r","V","V","",""
+"SETNP r/m8","REX + 0F 9B /r","N.E.","V","","pseudo64"
+"SETNS r/m8","0F 99 /r","V","V","",""
+"SETNS r/m8","REX + 0F 99 /r","N.E.","V","","pseudo64"
+"SETNZ r/m8","0F 95 /r","V","V","","pseudo"
+"SETNZ r/m8","REX + 0F 95 /r","N.E.","V","","pseudo"
+"SETO r/m8","0F 90 /r","V","V","",""
+"SETO r/m8","REX + 0F 90 /r","N.E.","V","","pseudo64"
+"SETP r/m8","0F 9A /r","V","V","",""
+"SETP r/m8","REX + 0F 9A /r","N.E.","V","","pseudo64"
+"SETPE r/m8","0F 9A /r","V","V","","pseudo"
+"SETPE r/m8","REX + 0F 9A /r","N.E.","V","","pseudo"
+"SETPO r/m8","0F 9B /r","V","V","","pseudo"
+"SETPO r/m8","REX + 0F 9B /r","N.E.","V","","pseudo"
+"SETS r/m8","0F 98 /r","V","V","",""
+"SETS r/m8","REX + 0F 98 /r","N.E.","V","","pseudo64"
+"SETZ r/m8","0F 94 /r","V","V","","pseudo"
+"SETZ r/m8","REX + 0F 94 /r","N.E.","V","","pseudo"
+"SFENCE","0F AE F8","V","V","",""
+"SGDT m","0F 01 /0","V","V","",""
+"SHL r/m16, 1","D1 /4","V","V","","operand16"
+"SHL r/m16, CL","D3 /4","V","V","","operand16"
+"SHL r/m16, imm8u","C1 /4 ib","V","V","","operand16"
+"SHL r/m32, 1","D1 /4","V","V","","operand32"
+"SHL r/m32, CL","D3 /4","V","V","","operand32"
+"SHL r/m32, imm8u","C1 /4 ib","V","V","","operand32"
+"SHL r/m64, 1","REX.W + D1 /4","N.E.","V","",""
+"SHL r/m64, CL","REX.W + D3 /4","N.E.","V","",""
+"SHL r/m64, imm8u","REX.W + C1 /4 ib","N.E.","V","",""
+"SHL r/m8, 1","D0 /4","V","V","",""
+"SHL r/m8, 1","REX + D0 /4","N.E.","V","","pseudo64"
+"SHL r/m8, CL","D2 /4","V","V","",""
+"SHL r/m8, CL","REX + D2 /4","N.E.","V","","pseudo64"
+"SHL r/m8, imm8u","C0 /4 ib","V","V","",""
+"SHL r/m8, imm8u","REX + C0 /4 ib","N.E.","V","","pseudo64"
+"SHLD r/m16, r16, CL","0F A5 /r","V","V","","operand16"
+"SHLD r/m16, r16, imm8u","0F A4 /r ib","V","V","","operand16"
+"SHLD r/m32, r32, CL","0F A5 /r","V","V","","operand32"
+"SHLD r/m32, r32, imm8u","0F A4 /r ib","V","V","","operand32"
+"SHLD r/m64, r64, CL","REX.W + 0F A5 /r","N.E.","V","",""
+"SHLD r/m64, r64, imm8u","REX.W + 0F A4 /r ib","N.E.","V","",""
+"SHLX r32a, r/m32, r32b","VEX.NDS.LZ.66.0F38.W0 F7 /r","V","V","BMI2",""
+"SHLX r64a, r/m64, r64b","VEX.NDS.LZ.66.0F38.W1 F7 /r","N.E.","V","BMI2",""
+"SHR r/m16, 1","D1 /5","V","V","","operand16"
+"SHR r/m16, CL","D3 /5","V","V","","operand16"
+"SHR r/m16, imm8u","C1 /5 ib","V","V","","operand16"
+"SHR r/m32, 1","D1 /5","V","V","","operand32"
+"SHR r/m32, CL","D3 /5","V","V","","operand32"
+"SHR r/m32, imm8u","C1 /5 ib","V","V","","operand32"
+"SHR r/m64, 1","REX.W + D1 /5","N.E.","V","",""
+"SHR r/m64, CL","REX.W + D3 /5","N.E.","V","",""
+"SHR r/m64, imm8u","REX.W + C1 /5 ib","N.E.","V","",""
+"SHR r/m8, 1","D0 /5","V","V","",""
+"SHR r/m8, 1","REX + D0 /5","N.E.","V","","pseudo64"
+"SHR r/m8, CL","D2 /5","V","V","",""
+"SHR r/m8, CL","REX + D2 /5","N.E.","V","","pseudo64"
+"SHR r/m8, imm8u","C0 /5 ib","V","V","",""
+"SHR r/m8, imm8u","REX + C0 /5 ib","N.E.","V","","pseudo64"
+"SHRD r/m16, r16, CL","0F AD /r","V","V","","operand16"
+"SHRD r/m16, r16, imm8u","0F AC /r ib","V","V","","operand16"
+"SHRD r/m32, r32, CL","0F AD /r","V","V","","operand32"
+"SHRD r/m32, r32, imm8u","0F AC /r ib","V","V","","operand32"
+"SHRD r/m64, r64, CL","REX.W + 0F AD /r","N.E.","V","",""
+"SHRD r/m64, r64, imm8u","REX.W + 0F AC /r ib","N.E.","V","",""
+"SHRX r32a, r/m32, r32b","VEX.NDS.LZ.F2.0F38.W0 F7 /r","V","V","BMI2",""
+"SHRX r64a, r/m64, r64b","VEX.NDS.LZ.F2.0F38.W1 F7 /r","N.E.","V","BMI2",""
+"SHUFPD xmm1, xmm2/m128, imm8u","66 0F C6 /r ib","V","V","SSE2",""
+"SHUFPS xmm1, xmm2/m128, imm8u","0F C6 /r ib","V","V","SSE",""
+"SIDT m","0F 01 /1","V","V","",""
+"SLDT r/m16","0F 00 /0","V","V","","operand16"
+"SLDT r32/m16","0F 00 /0","V","V","","operand32"
+"SLDT r64/m16","REX.W + 0F 00 /0","V","V","",""
+"SMSW r/m16","0F 01 /4","V","V","","operand16"
+"SMSW r32/m16","0F 01 /4","V","V","","operand32"
+"SMSW r64/m16","REX.W + 0F 01 /4","V","V","",""
+"SQRTPD xmm1, xmm2/m128","66 0F 51 /r","V","V","SSE2",""
+"SQRTPS xmm1, xmm2/m128","0F 51 /r","V","V","SSE",""
+"SQRTSD xmm1, xmm2/m64","F2 0F 51 /r","V","V","SSE2",""
+"SQRTSS xmm1, xmm2/m32","F3 0F 51 /r","V","V","SSE",""
+"STC","F9","V","V","",""
+"STD","FD","V","V","",""
+"STI","FB","V","V","",""
+"STMXCSR m32","0F AE /3","V","V","SSE",""
+"STOS m16","AB","V","V","","pseudo"
+"STOS m32","AB","V","V","","pseudo"
+"STOS m64","REX.W + AB","N.E.","V","","pseudo"
+"STOS m8","AA","V","V","","pseudo"
+"STOSB","AA","V","V","",""
+"STOSD","AB","V","V","","operand32"
+"STOSQ","REX.W + AB","N.E.","V","",""
+"STOSW","AB","V","V","","operand16"
+"STR r/m16","0F 00 /1","V","V","","operand16"
+"STR r32/m16","0F 00 /1","V","V","","operand32"
+"STR r64/m16","0F 00 /1","V","V","","operand64"
+"SUB AL, imm8u","2C ib","V","V","",""
+"SUB AX, imm16","2D iw","V","V","","operand16"
+"SUB EAX, imm32","2D id","V","V","","operand32"
+"SUB RAX, imm32","REX.W + 2D id","N.E.","V","",""
+"SUB r/m16, imm16","81 /5 iw","V","V","","operand16"
+"SUB r/m16, imm8","83 /5 ib","V","V","","operand16"
+"SUB r/m16, r16","29 /r","V","V","","operand16"
+"SUB r/m32, imm32","81 /5 id","V","V","","operand32"
+"SUB r/m32, imm8","83 /5 ib","V","V","","operand32"
+"SUB r/m32, r32","29 /r","V","V","","operand32"
+"SUB r/m64, imm32","REX.W + 81 /5 id","N.E.","V","",""
+"SUB r/m64, imm8","REX.W + 83 /5 ib","N.E.","V","",""
+"SUB r/m64, r64","REX.W + 29 /r","N.E.","V","",""
+"SUB r/m8, imm8u","80 /5 ib","V","V","",""
+"SUB r/m8, imm8u","REX + 80 /5 ib","N.E.","V","","pseudo64"
+"SUB r/m8, r8","28 /r","V","V","",""
+"SUB r/m8, r8","REX + 28 /r","N.E.","V","","pseudo64"
+"SUB r16, r/m16","2B /r","V","V","","operand16"
+"SUB r32, r/m32","2B /r","V","V","","operand32"
+"SUB r64, r/m64","REX.W + 2B /r","N.E.","V","",""
+"SUB r8, r/m8","2A /r","V","V","",""
+"SUB r8, r/m8","REX + 2A /r","N.E.","V","","pseudo64"
+"SUBPD xmm1, xmm2/m128","66 0F 5C /r","V","V","SSE2",""
+"SUBPS xmm1 xmm2/m128","0F 5C /r","V","V","SSE",""
+"SUBSD xmm1, xmm2/m64","F2 0F 5C /r","V","V","SSE2",""
+"SUBSS xmm1, xmm2/m32","F3 0F 5C /r","V","V","SSE",""
+"SWAPGS","0F 01 F8","I","V","",""
+"SYSCALL","0F 05","I","V","",""
+"SYSENTER","0F 34","V","V","",""
+"SYSEXIT","0F 35","V","V","",""
+"SYSEXIT","REX.W + 0F 35","V","V","",""
+"SYSRET","0F 07","I","V","",""
+"SYSRET","REX.W + 0F 07","I","V","","pseudo"
+"TEST AL, imm8u","A8 ib","V","V","",""
+"TEST AX, imm16","A9 iw","V","V","","operand16"
+"TEST EAX, imm32","A9 id","V","V","","operand32"
+"TEST RAX, imm32","REX.W + A9 id","N.E.","V","",""
+"TEST r/m16, imm16","F7 /0 iw","V","V","","operand16"
+"TEST r/m16, r16","85 /r","V","V","","operand16"
+"TEST r/m32, imm32","F7 /0 id","V","V","","operand32"
+"TEST r/m32, r32","85 /r","V","V","","operand32"
+"TEST r/m64, imm32","REX.W + F7 /0 id","N.E.","V","",""
+"TEST r/m64, r64","REX.W + 85 /r","N.E.","V","",""
+"TEST r/m8, imm8u","F6 /0 ib","V","V","",""
+"TEST r/m8, imm8u","REX + F6 /0 ib","N.E.","V","","pseudo64"
+"TEST r/m8, r8","84 /r","V","V","",""
+"TEST r/m8, r8","REX + 84 /r","N.E.","V","","pseudo64"
+"TZCNT r16, r/m16","F3 0F BC /r","V","V","BMI1","operand16"
+"TZCNT r32, r/m32","F3 0F BC /r","V","V","BMI1","operand32"
+"TZCNT r64, r/m64","REX.W + F3 0F BC /r","N.E.","V","BMI1",""
+"UCOMISD xmm1, xmm2/m64","66 0F 2E /r","V","V","SSE2",""
+"UCOMISS xmm1, xmm2/m32","0F 2E /r","V","V","SSE",""
+"UD1","0F B9","V","V","",""
+"UD2","0F 0B","V","V","",""
+"UNPCKHPD xmm1, xmm2/m128","66 0F 15 /r","V","V","SSE2",""
+"UNPCKHPS xmm1, xmm2/m128","0F 15 /r","V","V","SSE",""
+"UNPCKLPD xmm1, xmm2/m128","66 0F 14 /r","V","V","SSE2",""
+"UNPCKLPS xmm1, xmm2/m128","0F 14 /r","V","V","SSE",""
+"VADDPD xmm1, xmm2, xmm3/m128","VEX.NDS.128.66.0F.WIG 58 /r","V","V","AVX",""
+"VADDPD ymm1, ymm2, ymm3/m256","VEX.NDS.256.66.0F.WIG 58 /r","V","V","AVX",""
+"VADDPS xmm1, xmm2, xmm3/m128","VEX.NDS.128.0F.WIG 58 /r","V","V","AVX",""
+"VADDPS ymm1, ymm2, ymm3/m256","VEX.NDS.256.0F.WIG 58 /r","V","V","AVX",""
+"VADDSD xmm1, xmm2, xmm3/m64","VEX.NDS.LIG.F2.0F.WIG 58 /r","V","V","AVX",""
+"VADDSS xmm1, xmm2, xmm3/m32","VEX.NDS.LIG.F3.0F.WIG 58 /r","V","V","AVX",""
+"VADDSUBPD xmm1, xmm2, xmm3/m128","VEX.NDS.128.66.0F.WIG D0 /r","V","V","AVX",""
+"VADDSUBPD ymm1, ymm2, ymm3/m256","VEX.NDS.256.66.0F.WIG D0 /r","V","V","AVX",""
+"VADDSUBPS xmm1, xmm2, xmm3/m128","VEX.NDS.128.F2.0F.WIG D0 /r","V","V","AVX",""
+"VADDSUBPS ymm1, ymm2, ymm3/m256","VEX.NDS.256.F2.0F.WIG D0 /r","V","V","AVX",""
+"VAESDEC xmm1, xmm2, xmm3/m128","VEX.NDS.128.66.0F38.WIG DE /r","V","V","Both AES and AVX flags",""
+"VAESDECLAST xmm1, xmm2, xmm3/m128","VEX.NDS.128.66.0F38.WIG DF /r","V","V","Both AES and AVX flags",""
+"VAESENC xmm1, xmm2, xmm3/m128","VEX.NDS.128.66.0F38.WIG DC /r","V","V","Both AES and AVX flags",""
+"VAESENCLAST xmm1, xmm2, xmm3/m128","VEX.NDS.128.66.0F38.WIG DD /r","V","V","Both AES and AVX flags",""
+"VAESIMC xmm1, xmm2/m128","VEX.128.66.0F38.WIG DB /r","V","V","Both AES and AVX flags",""
+"VAESKEYGENASSIST xmm1, xmm2/m128, imm8","VEX.128.66.0F3A.WIG DF /r ib","V","V","Both AES and AVX flags",""
+"VANDNPD xmm1, xmm2, xmm3/m128","VEX.NDS.128.66.0F.WIG 55 /r","V","V","AVX",""
+"VANDNPD ymm1, ymm2, ymm3/m256","VEX.NDS.256.66.0F.WIG 55 /r","V","V","AVX",""
+"VANDNPS xmm1, xmm2, xmm3/m128","VEX.NDS.128.0F.WIG 55 /r","V","V","AVX",""
+"VANDNPS ymm1, ymm2, ymm3/m256","VEX.NDS.256.0F.WIG 55 /r","V","V","AVX",""
+"VANDPD xmm1, xmm2, xmm3/m128","VEX.NDS.128.66.0F.WIG 54 /r","V","V","AVX",""
+"VANDPD ymm1, ymm2, ymm3/m256","VEX.NDS.256.66.0F.WIG 54 /r","V","V","AVX",""
+"VANDPS xmm1, xmm2, xmm3/m128","VEX.NDS.128.0F.WIG 54 /r","V","V","AVX",""
+"VANDPS ymm1, ymm2, ymm3/m256","VEX.NDS.256.0F.WIG 54 /r","V","V","AVX",""
+"VBLENDPD xmm1, xmm2, xmm3/m128, imm8","VEX.NDS.128.66.0F3A.WIG 0D /r ib","V","V","AVX",""
+"VBLENDPD ymm1, ymm2, ymm3/m256, imm8","VEX.NDS.256.66.0F3A.WIG 0D /r ib","V","V","AVX",""
+"VBLENDPS xmm1, xmm2, xmm3/m128, imm8","VEX.NDS.128.66.0F3A.WIG 0C /r ib","V","V","AVX",""
+"VBLENDPS ymm1, ymm2, ymm3/m256, imm8","VEX.NDS.256.66.0F3A.WIG 0C /r ib","V","V","AVX",""
+"VBLENDVPD xmm1, xmm2, xmm3/m128, xmm4","VEX.NDS.128.66.0F3A.W0 4B /r /is4","V","V","AVX",""
+"VBLENDVPD ymm1, ymm2, ymm3/m256, ymm4","VEX.NDS.256.66.0F3A.W0 4B /r /is4","V","V","AVX",""
+"VBLENDVPS xmm1, xmm2, xmm3/m128, xmm4","VEX.NDS.128.66.0F3A.W0 4A /r /is4","V","V","AVX",""
+"VBLENDVPS ymm1, ymm2, ymm3/m256, ymm4","VEX.NDS.256.66.0F3A.W0 4A /r /is4","V","V","AVX",""
+"VBROADCASTF128 ymm1, m128","VEX.256.66.0F38.W0 1A /r","V","V","AVX",""
+"VBROADCASTI128 ymm1, m128","VEX.256.66.0F38.W0 5A /r","V","V","AVX2",""
+"VBROADCASTSD ymm1, m64","VEX.256.66.0F38.W0 19 /r","V","V","AVX",""
+"VBROADCASTSD ymm1, xmm2","VEX.256.66.0F38.W0 19 /r","V","V","AVX2",""
+"VBROADCASTSS xmm1, m32","VEX.128.66.0F38.W0 18 /r","V","V","AVX",""
+"VBROADCASTSS xmm1, xmm2","VEX.128.66.0F38.W0 18 /r","V","V","AVX2",""
+"VBROADCASTSS ymm1, m32","VEX.256.66.0F38.W0 18 /r","V","V","AVX",""
+"VBROADCASTSS ymm1, xmm2","VEX.256.66.0F38.W0 18 /r","V","V","AVX2",""
+"VCMPPD xmm1, xmm2, xmm3/m128, imm8","VEX.NDS.128.66.0F.WIG C2 /r ib","V","V","AVX",""
+"VCMPPD ymm1, ymm2, ymm3/m256, imm8","VEX.NDS.256.66.0F.WIG C2 /r ib","V","V","AVX",""
+"VCMPPS xmm1, xmm2, xmm3/m128, imm8","VEX.NDS.128.0F.WIG C2 /r ib","V","V","AVX",""
+"VCMPPS ymm1, ymm2, ymm3/m256, imm8","VEX.NDS.256.0F.WIG C2 /r ib","V","V","AVX",""
+"VCMPSD xmm1, xmm2, xmm3/m64, imm8","VEX.NDS.LIG.F2.0F.WIG C2 /r ib","V","V","AVX",""
+"VCMPSS xmm1, xmm2, xmm3/m32, imm8","VEX.NDS.LIG.F3.0F.WIG C2 /r ib","V","V","AVX",""
+"VCOMISD xmm1, xmm2/m64","VEX.LIG.66.0F.WIG 2F /r","V","V","AVX",""
+"VCOMISS xmm1, xmm2/m32","VEX.LIG.0F.WIG 2F /r","V","V","AVX",""
+"VCVTDQ2PD xmm1, xmm2/m64","VEX.128.F3.0F.WIG E6 /r","V","V","AVX",""
+"VCVTDQ2PD ymm1, xmm2/m128","VEX.256.F3.0F.WIG E6 /r","V","V","AVX",""
+"VCVTDQ2PS xmm1, xmm2/m128","VEX.128.0F.WIG 5B /r","V","V","AVX",""
+"VCVTDQ2PS ymm1, ymm2/m256","VEX.256.0F.WIG 5B /r","V","V","AVX",""
+"VCVTPD2DQ xmm1, xmm2/m128","VEX.128.F2.0F.WIG E6 /r","V","V","AVX",""
+"VCVTPD2DQ xmm1, ymm2/m256","VEX.256.F2.0F.WIG E6 /r","V","V","AVX",""
+"VCVTPD2PS xmm1, xmm2/m128","VEX.128.66.0F.WIG 5A /r","V","V","AVX",""
+"VCVTPD2PS xmm1, ymm2/m256","VEX.256.66.0F.WIG 5A /r","V","V","AVX",""
+"VCVTPH2PS xmm1, xmm2/m64","VEX.128.66.0F38.W0 13 /r","V","V","F16C",""
+"VCVTPH2PS ymm1, xmm2/m128","VEX.256.66.0F38.W0 13 /r","V","V","F16C",""
+"VCVTPS2DQ xmm1, xmm2/m128","VEX.128.66.0F.WIG 5B /r","V","V","AVX",""
+"VCVTPS2DQ ymm1, ymm2/m256","VEX.256.66.0F.WIG 5B /r","V","V","AVX",""
+"VCVTPS2PD xmm1, xmm2/m64","VEX.128.0F.WIG 5A /r","V","V","AVX",""
+"VCVTPS2PD ymm1, xmm2/m128","VEX.256.0F.WIG 5A /r","V","V","AVX",""
+"VCVTPS2PH xmm1/m128, ymm2, imm8","VEX.256.66.0F3A.W0 1D /r ib","V","V","F16C",""
+"VCVTPS2PH xmm1/m64, xmm2, imm8","VEX.128.66.0F3A.W0.1D /r ib","V","V","F16C",""
+"VCVTSD2SI r32, xmm1/m64","VEX.LIG.F2.0F.W0 2D /r","V","V","AVX",""
+"VCVTSD2SI r64, xmm1/m64","VEX.LIG.F2.0F.W1 2D /r","N.E.","V","AVX",""
+"VCVTSD2SS xmm1, xmm2, xmm3/m64","VEX.NDS.LIG.F2.0F.WIG 5A /r","V","V","AVX",""
+"VCVTSI2SD xmm1, xmm2, r/m32","VEX.NDS.LIG.F2.0F.W0 2A /r","V","V","AVX",""
+"VCVTSI2SD xmm1, xmm2, r/m64","VEX.NDS.LIG.F2.0F.W1 2A /r","N.E.","V","AVX",""
+"VCVTSI2SS xmm1, xmm2, r/m32","VEX.NDS.LIG.F3.0F.W0 2A /r","V","V","AVX",""
+"VCVTSI2SS xmm1, xmm2, r/m64","VEX.NDS.LIG.F3.0F.W1 2A /r","N.E.","V","AVX",""
+"VCVTSS2SD xmm1, xmm2, xmm3/m32","VEX.NDS.LIG.F3.0F.WIG 5A /r","V","V","AVX",""
+"VCVTSS2SI r32, xmm1/m32","VEX.LIG.F3.0F.W0 2D /r","V","V","AVX",""
+"VCVTSS2SI r64, xmm1/m32","VEX.LIG.F3.0F.W1 2D /r","N.E.","V","AVX",""
+"VCVTTPD2DQ xmm1, xmm2/m128","VEX.128.66.0F.WIG E6 /r","V","V","AVX",""
+"VCVTTPD2DQ xmm1, ymm2/m256","VEX.256.66.0F.WIG E6 /r","V","V","AVX",""
+"VCVTTPS2DQ xmm1, xmm2/m128","VEX.128.F3.0F.WIG 5B /r","V","V","AVX",""
+"VCVTTPS2DQ ymm1, ymm2/m256","VEX.256.F3.0F.WIG 5B /r","V","V","AVX",""
+"VCVTTSD2SI r32, xmm1/m64","VEX.LIG.F2.0F.W0 2C /r","V","V","AVX",""
+"VCVTTSD2SI r64, xmm1/m64","VEX.LIG.F2.0F.W1 2C /r","N.E.","V","AVX",""
+"VCVTTSS2SI r32, xmm1/m32","VEX.LIG.F3.0F.W0 2C /r","V","V","AVX",""
+"VCVTTSS2SI r64, xmm1/m32","VEX.LIG.F3.0F.W1 2C /r","N.E.","V","AVX",""
+"VDIVPD xmm1, xmm2, xmm3/m128","VEX.NDS.128.66.0F.WIG 5E /r","V","V","AVX",""
+"VDIVPD ymm1, ymm2, ymm3/m256","VEX.NDS.256.66.0F.WIG 5E /r","V","V","AVX",""
+"VDIVPS xmm1, xmm2, xmm3/m128","VEX.NDS.128.0F.WIG 5E /r","V","V","AVX",""
+"VDIVPS ymm1, ymm2, ymm3/m256","VEX.NDS.256.0F.WIG 5E /r","V","V","AVX",""
+"VDIVSD xmm1, xmm2, xmm3/m64","VEX.NDS.LIG.F2.0F.WIG 5E /r","V","V","AVX",""
+"VDIVSS xmm1, xmm2, xmm3/m32","VEX.NDS.LIG.F3.0F.WIG 5E /r","V","V","AVX",""
+"VDPPD xmm1, xmm2, xmm3/m128, imm8","VEX.NDS.128.66.0F3A.WIG 41 /r ib","V","V","AVX",""
+"VDPPS xmm1, xmm2, xmm3/m128, imm8","VEX.NDS.128.66.0F3A.WIG 40 /r ib","V","V","AVX",""
+"VDPPS ymm1, ymm2, ymm3/m256, imm8","VEX.NDS.256.66.0F3A.WIG 40 /r ib","V","V","AVX",""
+"VERR r/m16","0F 00 /4","V","V","",""
+"VERW r/m16","0F 00 /5","V","V","",""
+"VEXTRACTF128 xmm1/m128, ymm2, imm8","VEX.256.66.0F3A.W0 19 /r ib","V","V","AVX",""
+"VEXTRACTI128 xmm1/m128, ymm2, imm8","VEX.256.66.0F3A.W0 39 /r ib","V","V","AVX2",""
+"VEXTRACTPS r/m32, xmm1, imm8","VEX.128.66.0F3A.WIG 17 /r ib","V","V","AVX",""
+"VFMADD132PD xmm0, xmm1, xmm2/m128","VEX.DDS.128.66.0F38.W1 98 /r","V","V","FMA",""
+"VFMADD132PD ymm0, ymm1, ymm2/m256","VEX.DDS.256.66.0F38.W1 98 /r","V","V","FMA",""
+"VFMADD132PS xmm0, xmm1, xmm2/m128","VEX.DDS.128.66.0F38.W0 98 /r","V","V","FMA",""
+"VFMADD132PS ymm0, ymm1, ymm2/m256","VEX.DDS.256.66.0F38.W0 98 /r","V","V","FMA",""
+"VFMADD132SD xmm0, xmm1, xmm2/m64","VEX.DDS.LIG.128.66.0F38.W1 99 /r","V","V","FMA",""
+"VFMADD132SS xmm0, xmm1, xmm2/m32","VEX.DDS.LIG.128.66.0F38.W0 99 /r","V","V","FMA",""
+"VFMADD213PD xmm0, xmm1, xmm2/m128","VEX.DDS.128.66.0F38.W1 A8 /r","V","V","FMA",""
+"VFMADD213PD ymm0, ymm1, ymm2/m256","VEX.DDS.256.66.0F38.W1 A8 /r","V","V","FMA",""
+"VFMADD213PS xmm0, xmm1, xmm2/m128","VEX.DDS.128.66.0F38.W0 A8 /r","V","V","FMA",""
+"VFMADD213PS ymm0, ymm1, ymm2/m256","VEX.DDS.256.66.0F38.W0 A8 /r","V","V","FMA",""
+"VFMADD213SD xmm0, xmm1, xmm2/m64","VEX.DDS.LIG.128.66.0F38.W1 A9 /r","V","V","FMA",""
+"VFMADD213SS xmm0, xmm1, xmm2/m32","VEX.DDS.LIG.128.66.0F38.W0 A9 /r","V","V","FMA",""
+"VFMADD231PD xmm0, xmm1, xmm2/m128","VEX.DDS.128.66.0F38.W1 B8 /r","V","V","FMA",""
+"VFMADD231PD ymm0, ymm1, ymm2/m256","VEX.DDS.256.66.0F38.W1 B8 /r","V","V","FMA",""
+"VFMADD231PS xmm0, xmm1, xmm2/m128","VEX.DDS.128.66.0F38.W0 B8 /r","V","V","FMA",""
+"VFMADD231PS ymm0, ymm1, ymm2/m256","VEX.DDS.256.66.0F38.W0 B8 /r","V","V","FMA",""
+"VFMADD231SD xmm0, xmm1, xmm2/m64","VEX.DDS.LIG.128.66.0F38.W1 B9 /r","V","V","FMA",""
+"VFMADD231SS xmm0, xmm1, xmm2/m32","VEX.DDS.LIG.128.66.0F38.W0 B9 /r","V","V","FMA",""
+"VFMADDSUB132PD xmm0, xmm1, xmm2/m128","VEX.DDS.128.66.0F38.W1 96 /r","V","V","FMA",""
+"VFMADDSUB132PD ymm0, ymm1, ymm2/m256","VEX.DDS.256.66.0F38.W1 96 /r","V","V","FMA",""
+"VFMADDSUB132PS xmm0, xmm1, xmm2/m128","VEX.DDS.128.66.0F38.W0 96 /r","V","V","FMA",""
+"VFMADDSUB132PS ymm0, ymm1, ymm2/m256","VEX.DDS.256.66.0F38.W0 96 /r","V","V","FMA",""
+"VFMADDSUB213PD xmm0, xmm1, xmm2/m128","VEX.DDS.128.66.0F38.W1 A6 /r","V","V","FMA",""
+"VFMADDSUB213PD ymm0, ymm1, ymm2/m256","VEX.DDS.256.66.0F38.W1 A6 /r","V","V","FMA",""
+"VFMADDSUB213PS xmm0, xmm1, xmm2/m128","VEX.DDS.128.66.0F38.W0 A6 /r","V","V","FMA",""
+"VFMADDSUB213PS ymm0, ymm1, ymm2/m256","VEX.DDS.256.66.0F38.W0 A6 /r","V","V","FMA",""
+"VFMADDSUB231PD xmm0, xmm1, xmm2/m128","VEX.DDS.128.66.0F38.W1 B6 /r","V","V","FMA",""
+"VFMADDSUB231PD ymm0, ymm1, ymm2/m256","VEX.DDS.256.66.0F38.W1 B6 /r","V","V","FMA",""
+"VFMADDSUB231PS xmm0, xmm1, xmm2/m128","VEX.DDS.128.66.0F38.W0 B6 /r","V","V","FMA",""
+"VFMADDSUB231PS ymm0, ymm1, ymm2/m256","VEX.DDS.256.66.0F38.W0 B6 /r","V","V","FMA",""
+"VFMSUB132PD xmm0, xmm1, xmm2/m128","VEX.DDS.128.66.0F38.W1 9A /r","V","V","FMA",""
+"VFMSUB132PD ymm0, ymm1, ymm2/m256","VEX.DDS.256.66.0F38.W1 9A /r","V","V","FMA",""
+"VFMSUB132PS xmm0, xmm1, xmm2/m128","VEX.DDS.128.66.0F38.W0 9A /r","V","V","FMA",""
+"VFMSUB132PS ymm0, ymm1, ymm2/m256","VEX.DDS.256.66.0F38.W0 9A /r","V","V","FMA",""
+"VFMSUB132SD xmm0, xmm1, xmm2/m64","VEX.DDS.LIG.128.66.0F38.W1 9B /r","V","V","FMA",""
+"VFMSUB132SS xmm0, xmm1, xmm2/m32","VEX.DDS.LIG.128.66.0F38.W0 9B /r","V","V","FMA",""
+"VFMSUB213PD xmm0, xmm1, xmm2/m128","VEX.DDS.128.66.0F38.W1 AA /r","V","V","FMA",""
+"VFMSUB213PD ymm0, ymm1, ymm2/m256","VEX.DDS.256.66.0F38.W1 AA /r","V","V","FMA",""
+"VFMSUB213PS xmm0, xmm1, xmm2/m128","VEX.DDS.128.66.0F38.W0 AA /r","V","V","FMA",""
+"VFMSUB213PS ymm0, ymm1, ymm2/m256","VEX.DDS.256.66.0F38.W0 AA /r","V","V","FMA",""
+"VFMSUB213SD xmm0, xmm1, xmm2/m64","VEX.DDS.LIG.128.66.0F38.W1 AB /r","V","V","FMA",""
+"VFMSUB213SS xmm0, xmm1, xmm2/m32","VEX.DDS.LIG.128.66.0F38.W0 AB /r","V","V","FMA",""
+"VFMSUB231PD xmm0, xmm1, xmm2/m128","VEX.DDS.128.66.0F38.W1 BA /r","V","V","FMA",""
+"VFMSUB231PD ymm0, ymm1, ymm2/m256","VEX.DDS.256.66.0F38.W1 BA /r","V","V","FMA",""
+"VFMSUB231PS xmm0, xmm1, xmm2/m128","VEX.DDS.128.66.0F38.W0 BA /r","V","V","FMA",""
+"VFMSUB231PS ymm0, ymm1, ymm2/m256","VEX.DDS.256.66.0F38.0 BA /r","V","V","FMA",""
+"VFMSUB231SD xmm0, xmm1, xmm2/m64","VEX.DDS.LIG.128.66.0F38.W1 BB /r","V","V","FMA",""
+"VFMSUB231SS xmm0, xmm1, xmm2/m32","VEX.DDS.LIG.128.66.0F38.W0 BB /r","V","V","FMA",""
+"VFMSUBADD132PD xmm0, xmm1, xmm2/m128","VEX.DDS.128.66.0F38.W1 97 /r","V","V","FMA",""
+"VFMSUBADD132PD ymm0, ymm1, ymm2/m256","VEX.DDS.256.66.0F38.W1 97 /r","V","V","FMA",""
+"VFMSUBADD132PS xmm0, xmm1, xmm2/m128","VEX.DDS.128.66.0F38.W0 97 /r","V","V","FMA",""
+"VFMSUBADD132PS ymm0, ymm1, ymm2/m256","VEX.DDS.256.66.0F38.W0 97 /r","V","V","FMA",""
+"VFMSUBADD213PD xmm0, xmm1, xmm2/m128","VEX.DDS.128.66.0F38.W1 A7 /r","V","V","FMA",""
+"VFMSUBADD213PD ymm0, ymm1, ymm2/m256","VEX.DDS.256.66.0F38.W1 A7 /r","V","V","FMA",""
+"VFMSUBADD213PS xmm0, xmm1, xmm2/m128","VEX.DDS.128.66.0F38.W0 A7 /r","V","V","FMA",""
+"VFMSUBADD213PS ymm0, ymm1, ymm2/m256","VEX.DDS.256.66.0F38.W0 A7 /r","V","V","FMA",""
+"VFMSUBADD231PD xmm0, xmm1, xmm2/m128","VEX.DDS.128.66.0F38.W1 B7 /r","V","V","FMA",""
+"VFMSUBADD231PD ymm0, ymm1, ymm2/m256","VEX.DDS.256.66.0F38.W1 B7 /r","V","V","FMA",""
+"VFMSUBADD231PS xmm0, xmm1, xmm2/m128","VEX.DDS.128.66.0F38.W0 B7 /r","V","V","FMA",""
+"VFMSUBADD231PS ymm0, ymm1, ymm2/m256","VEX.DDS.256.66.0F38.W0 B7 /r","V","V","FMA",""
+"VFNMADD132PD xmm0, xmm1, xmm2/m128","VEX.DDS.128.66.0F38.W1 9C /r","V","V","FMA",""
+"VFNMADD132PD ymm0, ymm1, ymm2/m256","VEX.DDS.256.66.0F38.W1 9C /r","V","V","FMA",""
+"VFNMADD132PS xmm0, xmm1, xmm2/m128","VEX.DDS.128.66.0F38.W0 9C /r","V","V","FMA",""
+"VFNMADD132PS ymm0, ymm1, ymm2/m256","VEX.DDS.256.66.0F38.W0 9C /r","V","V","FMA",""
+"VFNMADD132SD xmm0, xmm1, xmm2/m64","VEX.DDS.LIG.128.66.0F38.W1 9D /r","V","V","FMA",""
+"VFNMADD132SS xmm0, xmm1, xmm2/m32","VEX.DDS.LIG.128.66.0F38.W0 9D /r","V","V","FMA",""
+"VFNMADD213PD xmm0, xmm1, xmm2/m128","VEX.DDS.128.66.0F38.W1 AC /r","V","V","FMA",""
+"VFNMADD213PD ymm0, ymm1, ymm2/m256","VEX.DDS.256.66.0F38.W1 AC /r","V","V","FMA",""
+"VFNMADD213PS xmm0, xmm1, xmm2/m128","VEX.DDS.128.66.0F38.W0 AC /r","V","V","FMA",""
+"VFNMADD213PS ymm0, ymm1, ymm2/m256","VEX.DDS.256.66.0F38.W0 AC /r","V","V","FMA",""
+"VFNMADD213SD xmm0, xmm1, xmm2/m64","VEX.DDS.LIG.128.66.0F38.W1 AD /r","V","V","FMA",""
+"VFNMADD213SS xmm0, xmm1, xmm2/m32","VEX.DDS.LIG.128.66.0F38.W0 AD /r","V","V","FMA",""
+"VFNMADD231PD xmm0, xmm1, xmm2/m128","VEX.DDS.128.66.0F38.W1 BC /r","V","V","FMA",""
+"VFNMADD231PD ymm0, ymm1, ymm2/m256","VEX.DDS.256.66.0F38.W1 BC /r","V","V","FMA",""
+"VFNMADD231PS xmm0, xmm1, xmm2/m128","VEX.DDS.128.66.0F38.W0 BC /r","V","V","FMA",""
+"VFNMADD231PS ymm0, ymm1, ymm2/m256","VEX.DDS.256.66.0F38.0 BC /r","V","V","FMA",""
+"VFNMADD231SD xmm0, xmm1, xmm2/m64","VEX.DDS.LIG.128.66.0F38.W1 BD /r","V","V","FMA",""
+"VFNMADD231SS xmm0, xmm1, xmm2/m32","VEX.DDS.LIG.128.66.0F38.W0 BD /r","V","V","FMA",""
+"VFNMSUB132PD xmm0, xmm1, xmm2/m128","VEX.DDS.128.66.0F38.W1 9E /r","V","V","FMA",""
+"VFNMSUB132PD ymm0, ymm1, ymm2/m256","VEX.DDS.256.66.0F38.W1 9E /r","V","V","FMA",""
+"VFNMSUB132PS xmm0, xmm1, xmm2/m128","VEX.DDS.128.66.0F38.W0 9E /r","V","V","FMA",""
+"VFNMSUB132PS ymm0, ymm1, ymm2/m256","VEX.DDS.256.66.0F38.W0 9E /r","V","V","FMA",""
+"VFNMSUB132SD xmm0, xmm1, xmm2/m64","VEX.DDS.LIG.128.66.0F38.W1 9F /r","V","V","FMA",""
+"VFNMSUB132SS xmm0, xmm1, xmm2/m32","VEX.DDS.LIG.128.66.0F38.W0 9F /r","V","V","FMA",""
+"VFNMSUB213PD xmm0, xmm1, xmm2/m128","VEX.DDS.128.66.0F38.W1 AE /r","V","V","FMA",""
+"VFNMSUB213PD ymm0, ymm1, ymm2/m256","VEX.DDS.256.66.0F38.W1 AE /r","V","V","FMA",""
+"VFNMSUB213PS xmm0, xmm1, xmm2/m128","VEX.DDS.128.66.0F38.W0 AE /r","V","V","FMA",""
+"VFNMSUB213PS ymm0, ymm1, ymm2/m256","VEX.DDS.256.66.0F38.W0 AE /r","V","V","FMA",""
+"VFNMSUB213SD xmm0, xmm1, xmm2/m64","VEX.DDS.LIG.128.66.0F38.W1 AF /r","V","V","FMA",""
+"VFNMSUB213SS xmm0, xmm1, xmm2/m32","VEX.DDS.LIG.128.66.0F38.W0 AF /r","V","V","FMA",""
+"VFNMSUB231PD xmm0, xmm1, xmm2/m128","VEX.DDS.128.66.0F38.W1 BE /r","V","V","FMA",""
+"VFNMSUB231PD ymm0, ymm1, ymm2/m256","VEX.DDS.256.66.0F38.W1 BE /r","V","V","FMA",""
+"VFNMSUB231PS xmm0, xmm1, xmm2/m128","VEX.DDS.128.66.0F38.W0 BE /r","V","V","FMA",""
+"VFNMSUB231PS ymm0, ymm1, ymm2/m256","VEX.DDS.256.66.0F38.0 BE /r","V","V","FMA",""
+"VFNMSUB231SD xmm0, xmm1, xmm2/m64","VEX.DDS.LIG.128.66.0F38.W1 BF /r","V","V","FMA",""
+"VFNMSUB231SS xmm0, xmm1, xmm2/m32","VEX.DDS.LIG.128.66.0F38.W0 BF /r","V","V","FMA",""
+"VGATHERDPD xmm1, vm32x, xmm2","VEX.DDS.128.66.0F38.W1 92 /r","V","V","AVX2",""
+"VGATHERDPD ymm1, vm32x, ymm2","VEX.DDS.256.66.0F38.W1 92 /r","V","V","AVX2",""
+"VGATHERDPS xmm1, vm32x, xmm2","VEX.DDS.128.66.0F38.W0 92 /r","V","V","AVX2",""
+"VGATHERDPS ymm1, vm32y, ymm2","VEX.DDS.256.66.0F38.W0 92 /r","V","V","AVX2",""
+"VGATHERQPD xmm1, vm64x, xmm2","VEX.DDS.128.66.0F38.W1 93 /r","V","V","AVX2",""
+"VGATHERQPD ymm1, vm64y, ymm2","VEX.DDS.256.66.0F38.W1 93 /r","V","V","AVX2",""
+"VGATHERQPS xmm1, vm64x, xmm2","VEX.DDS.128.66.0F38.W0 93 /r","V","V","AVX2",""
+"VGATHERQPS xmm1, vm64y, xmm2","VEX.DDS.256.66.0F38.W0 93 /r","V","V","AVX2",""
+"VHADDPD xmm1, xmm2, xmm3/m128","VEX.NDS.128.66.0F.WIG 7C /r","V","V","AVX",""
+"VHADDPD ymm1, ymm2, ymm3/m256","VEX.NDS.256.66.0F.WIG 7C /r","V","V","AVX",""
+"VHADDPS xmm1, xmm2, xmm3/m128","VEX.NDS.128.F2.0F.WIG 7C /r","V","V","AVX",""
+"VHADDPS ymm1, ymm2, ymm3/m256","VEX.NDS.256.F2.0F.WIG 7C /r","V","V","AVX",""
+"VHSUBPD xmm1, xmm2, xmm3/m128","VEX.NDS.128.66.0F.WIG 7D /r","V","V","AVX",""
+"VHSUBPD ymm1, ymm2, ymm3/m256","VEX.NDS.256.66.0F.WIG 7D /r","V","V","AVX",""
+"VHSUBPS xmm1, xmm2, xmm3/m128","VEX.NDS.128.F2.0F.WIG 7D /r","V","V","AVX",""
+"VHSUBPS ymm1, ymm2, ymm3/m256","VEX.NDS.256.F2.0F.WIG 7D /r","V","V","AVX",""
+"VINSERTF128 ymm1, ymm2, xmm3/m128, imm8","VEX.NDS.256.66.0F3A.W0 18 /r ib","V","V","AVX",""
+"VINSERTI128 ymm1, ymm2, xmm3/m128, imm8","VEX.NDS.256.66.0F3A.W0 38 /r ib","V","V","AVX2",""
+"VINSERTPS xmm1, xmm2, xmm3/m32, imm8","VEX.NDS.128.66.0F3A.WIG 21 /r ib","V","V","AVX",""
+"VLDDQU xmm1, m128","VEX.128.F2.0F.WIG F0 /r","V","V","AVX",""
+"VLDDQU ymm1, m256","VEX.256.F2.0F.WIG F0 /r","V","V","AVX",""
+"VLDMXCSR m32","VEX.LZ.0F.WIG AE /2","V","V","AVX",""
+"VMASKMOVDQU xmm1, xmm2","VEX.128.66.0F.WIG F7 /r","V","V","AVX",""
+"VMASKMOVPD m128, xmm1, xmm2","VEX.NDS.128.66.0F38.W0 2F /r","V","V","AVX",""
+"VMASKMOVPD m256, ymm1, ymm2","VEX.NDS.256.66.0F38.W0 2F /r","V","V","AVX",""
+"VMASKMOVPD xmm1, xmm2, m128","VEX.NDS.128.66.0F38.W0 2D /r","V","V","AVX",""
+"VMASKMOVPD ymm1, ymm2, m256","VEX.NDS.256.66.0F38.W0 2D /r","V","V","AVX",""
+"VMASKMOVPS m128, xmm1, xmm2","VEX.NDS.128.66.0F38.W0 2E /r","V","V","AVX",""
+"VMASKMOVPS m256, ymm1, ymm2","VEX.NDS.256.66.0F38.W0 2E /r","V","V","AVX",""
+"VMASKMOVPS xmm1, xmm2, m128","VEX.NDS.128.66.0F38.W0 2C /r","V","V","AVX",""
+"VMASKMOVPS ymm1, ymm2, m256","VEX.NDS.256.66.0F38.W0 2C /r","V","V","AVX",""
+"VMAXPD xmm1, xmm2, xmm3/m128","VEX.NDS.128.66.0F.WIG 5F /r","V","V","AVX",""
+"VMAXPD ymm1, ymm2, ymm3/m256","VEX.NDS.256.66.0F.WIG 5F /r","V","V","AVX",""
+"VMAXPS xmm1, xmm2, xmm3/m128","VEX.NDS.128.0F.WIG 5F /r","V","V","AVX",""
+"VMAXPS ymm1, ymm2, ymm3/m256","VEX.NDS.256.0F.WIG 5F /r","V","V","AVX",""
+"VMAXSD xmm1, xmm2, xmm3/m64","VEX.NDS.LIG.F2.0F.WIG 5F /r","V","V","AVX",""
+"VMAXSS xmm1, xmm2, xmm3/m32","VEX.NDS.LIG.F3.0F.WIG 5F /r","V","V","AVX",""
+"VMINPD xmm1, xmm2, xmm3/m128","VEX.NDS.128.66.0F.WIG 5D /r","V","V","AVX",""
+"VMINPD ymm1, ymm2, ymm3/m256","VEX.NDS.256.66.0F.WIG 5D /r","V","V","AVX",""
+"VMINPS xmm1, xmm2, xmm3/m128","VEX.NDS.128.0F.WIG 5D /r","V","V","AVX",""
+"VMINPS ymm1, ymm2, ymm3/m256","VEX.NDS.256.0F.WIG 5D /r","V","V","AVX",""
+"VMINSD xmm1, xmm2, xmm3/m64","VEX.NDS.LIG.F2.0F.WIG 5D /r","V","V","AVX",""
+"VMINSS xmm1, xmm2, xmm3/m32","VEX.NDS.LIG.F3.0F.WIG 5D /r","V","V","AVX",""
+"VMOVAPD xmm1, xmm2/m128","VEX.128.66.0F.WIG 28 /r","V","V","AVX",""
+"VMOVAPD xmm2/m128, xmm1","VEX.128.66.0F.WIG 29 /r","V","V","AVX",""
+"VMOVAPD ymm1, ymm2/m256","VEX.256.66.0F.WIG 28 /r","V","V","AVX",""
+"VMOVAPD ymm2/m256, ymm1","VEX.256.66.0F.WIG 29 /r","V","V","AVX",""
+"VMOVAPS xmm1, xmm2/m128","VEX.128.0F.WIG 28 /r","V","V","AVX",""
+"VMOVAPS xmm2/m128, xmm1","VEX.128.0F.WIG 29 /r","V","V","AVX",""
+"VMOVAPS ymm1, ymm2/m256","VEX.256.0F.WIG 28 /r","V","V","AVX",""
+"VMOVAPS ymm2/m256, ymm1","VEX.256.0F.WIG 29 /r","V","V","AVX",""
+"VMOVD r32/m32, xmm1","VEX.128.66.0F.W0 7E /r","V","V","AVX",""
+"VMOVD xmm1, r32/m32","VEX.128.66.0F.W0 6E /r","V","V","AVX",""
+"VMOVDDUP xmm1, xmm2/m64","VEX.128.F2.0F.WIG 12 /r","V","V","AVX",""
+"VMOVDDUP ymm1, ymm2/m256","VEX.256.F2.0F.WIG 12 /r","V","V","AVX",""
+"VMOVDQA xmm1, xmm2/m128","VEX.128.66.0F.WIG 6F /r","V","V","AVX",""
+"VMOVDQA xmm2/m128, xmm1","VEX.128.66.0F.WIG 7F /r","V","V","AVX",""
+"VMOVDQA ymm1, ymm2/m256","VEX.256.66.0F.WIG 6F /r","V","V","AVX",""
+"VMOVDQA ymm2/m256, ymm1","VEX.256.66.0F.WIG 7F /r","V","V","AVX",""
+"VMOVDQU xmm1, xmm2/m128","VEX.128.F3.0F.WIG 6F /r","V","V","AVX",""
+"VMOVDQU xmm2/m128, xmm1","VEX.128.F3.0F.WIG 7F /r","V","V","AVX",""
+"VMOVDQU ymm1, ymm2/m256","VEX.256.F3.0F.WIG 6F /r","V","V","AVX",""
+"VMOVDQU ymm2/m256, ymm1","VEX.256.F3.0F.WIG 7F /r","V","V","AVX",""
+"VMOVHLPS xmm1, xmm2, xmm3","VEX.NDS.128.0F.WIG 12 /r","V","V","AVX",""
+"VMOVHPD m64, xmm1","VEX128.66.0F.WIG 17 /r","V","V","AVX",""
+"VMOVHPD xmm2, xmm1, m64","VEX.NDS.128.66.0F.WIG 16 /r","V","V","AVX",""
+"VMOVHPS m64, xmm1","VEX.128.0F.WIG 17 /r","V","V","AVX",""
+"VMOVHPS xmm2, xmm1, m64","VEX.NDS.128.0F.WIG 16 /r","V","V","AVX",""
+"VMOVLHPS xmm1, xmm2, xmm3","VEX.NDS.128.0F.WIG 16 /r","V","V","AVX",""
+"VMOVLPD m64, xmm1","VEX.128.66.0F.WIG 13 /r","V","V","AVX",""
+"VMOVLPD xmm2, xmm1, m64","VEX.NDS.128.66.0F.WIG 12 /r","V","V","AVX",""
+"VMOVLPS m64, xmm1","VEX.128.0F.WIG 13 /r","V","V","AVX",""
+"VMOVLPS xmm2, xmm1, m64","VEX.NDS.128.0F.WIG 12 /r","V","V","AVX",""
+"VMOVMSKPD r32, xmm2","VEX.128.66.0F.WIG 50 /r","V","V","AVX",""
+"VMOVMSKPD r32, ymm2","VEX.256.66.0F.WIG 50 /r","V","V","AVX",""
+"VMOVMSKPS r32, xmm2","VEX.128.0F.WIG 50 /r","V","V","AVX",""
+"VMOVMSKPS r32, ymm2","VEX.256.0F.WIG 50 /r","V","V","AVX",""
+"VMOVNTDQ m128, xmm1","VEX.128.66.0F.WIG E7 /r","V","V","AVX",""
+"VMOVNTDQ m256, ymm1","VEX.256.66.0F.WIG E7 /r","V","V","AVX",""
+"VMOVNTDQA xmm1, m128","VEX.128.66.0F38.WIG 2A /r","V","V","AVX",""
+"VMOVNTDQA ymm1, m256","VEX.256.66.0F38.WIG 2A /r","V","V","AVX2",""
+"VMOVNTPD m128, xmm1","VEX.128.66.0F.WIG 2B /r","V","V","AVX",""
+"VMOVNTPD m256, ymm1","VEX.256.66.0F.WIG 2B /r","V","V","AVX",""
+"VMOVNTPS m128, xmm1","VEX.128.0F.WIG 2B /r","V","V","AVX",""
+"VMOVNTPS m256, ymm1","VEX.256.0F.WIG 2B /r","V","V","AVX",""
+"VMOVQ r64/m64, xmm1","VEX.128.66.0F.W1 7E /r","N.E.","V","AVX",""
+"VMOVQ xmm1, m64","VEX.128.F3.0F.WIG 7E /r","V","V","AVX",""
+"VMOVQ xmm1, r64/m64","VEX.128.66.0F.W1 6E /r","N.E.","V","AVX",""
+"VMOVQ xmm1, xmm2","VEX.128.F3.0F.WIG 7E /r","V","V","AVX",""
+"VMOVQ xmm1/m64, xmm2","VEX.128.66.0F.WIG D6 /r","V","V","AVX",""
+"VMOVSD m64, xmm1","VEX.LIG.F2.0F.WIG 11 /r","V","V","AVX",""
+"VMOVSD xmm1, m64","VEX.LIG.F2.0F.WIG 10 /r","V","V","AVX",""
+"VMOVSD xmm1, xmm2, xmm3","VEX.NDS.LIG.F2.0F.WIG 10 /r","V","V","AVX",""
+"VMOVSD xmm1, xmm2, xmm3","VEX.NDS.LIG.F2.0F.WIG 11 /r","V","V","AVX",""
+"VMOVSHDUP xmm1, xmm2/m128","VEX.128.F3.0F.WIG 16 /r","V","V","AVX",""
+"VMOVSHDUP ymm1, ymm2/m256","VEX.256.F3.0F.WIG 16 /r","V","V","AVX",""
+"VMOVSLDUP xmm1, xmm2/m128","VEX.128.F3.0F.WIG 12 /r","V","V","AVX",""
+"VMOVSLDUP ymm1, ymm2/m256","VEX.256.F3.0F.WIG 12 /r","V","V","AVX",""
+"VMOVSS m32, xmm1","VEX.LIG.F3.0F.WIG 11 /r","V","V","AVX",""
+"VMOVSS xmm1, m32","VEX.LIG.F3.0F.WIG 10 /r","V","V","AVX",""
+"VMOVSS xmm1, xmm2, xmm3","VEX.NDS.LIG.F3.0F.WIG 10 /r","V","V","AVX",""
+"VMOVSS xmm1, xmm2, xmm3","VEX.NDS.LIG.F3.0F.WIG 11 /r","V","V","AVX",""
+"VMOVUPD xmm1, xmm2/m128","VEX.128.66.0F.WIG 10 /r","V","V","AVX",""
+"VMOVUPD xmm2/m128, xmm1","VEX.128.66.0F.WIG 11 /r","V","V","AVX",""
+"VMOVUPD ymm1, ymm2/m256","VEX.256.66.0F.WIG 10 /r","V","V","AVX",""
+"VMOVUPD ymm2/m256, ymm1","VEX.256.66.0F.WIG 11 /r","V","V","AVX",""
+"VMOVUPS xmm1, xmm2/m128","VEX.128.0F.WIG 10 /r","V","V","AVX",""
+"VMOVUPS xmm2/m128, xmm1","VEX.128.0F.WIG 11 /r","V","V","AVX",""
+"VMOVUPS ymm1, ymm2/m256","VEX.256.0F.WIG 10 /r","V","V","AVX",""
+"VMOVUPS ymm2/m256, ymm1","VEX.256.0F.WIG 11 /r","V","V","AVX",""
+"VMPSADBW xmm1, xmm2, xmm3/m128, imm8","VEX.NDS.128.66.0F3A.WIG 42 /r ib","V","V","AVX",""
+"VMPSADBW ymm1, ymm2, ymm3/m256, imm8","VEX.NDS.256.66.0F3A.WIG 42 /r ib","V","V","AVX2",""
+"VMULPD xmm1, xmm2, xmm3/m128","VEX.NDS.128.66.0F.WIG 59 /r","V","V","AVX",""
+"VMULPD ymm1, ymm2, ymm3/m256","VEX.NDS.256.66.0F.WIG 59 /r","V","V","AVX",""
+"VMULPS xmm1, xmm2, xmm3/m128","VEX.NDS.128.0F.WIG 59 /r","V","V","AVX",""
+"VMULPS ymm1, ymm2, ymm3/m256","VEX.NDS.256.0F.WIG 59 /r","V","V","AVX",""
+"VMULSD xmm1, xmm2, xmm3/m64","VEX.NDS.LIG.F2.0F.WIG 59 /r","V","V","AVX",""
+"VMULSS xmm1, xmm2, xmm3/m32","VEX.NDS.LIG.F3.0F.WIG 59 /r","V","V","AVX",""
+"VORPD xmm1, xmm2, xmm3/m128","VEX.NDS.128.66.0F.WIG 56 /r","V","V","AVX",""
+"VORPD ymm1, ymm2, ymm3/m256","VEX.NDS.256.66.0F.WIG 56 /r","V","V","AVX",""
+"VORPS xmm1, xmm2, xmm3/m128","VEX.NDS.128.0F.WIG 56 /r","V","V","AVX",""
+"VORPS ymm1, ymm2, ymm3/m256","VEX.NDS.256.0F.WIG 56 /r","V","V","AVX",""
+"VPABSB xmm1, xmm2/m128","VEX.128.66.0F38.WIG 1C /r","V","V","AVX",""
+"VPABSB ymm1, ymm2/m256","VEX.256.66.0F38.WIG 1C /r","V","V","AVX2",""
+"VPABSD xmm1, xmm2/m128","VEX.128.66.0F38.WIG 1E /r","V","V","AVX",""
+"VPABSD ymm1, ymm2/m256","VEX.256.66.0F38.WIG 1E /r","V","V","AVX2",""
+"VPABSW xmm1, xmm2/m128","VEX.128.66.0F38.WIG 1D /r","V","V","AVX",""
+"VPABSW ymm1, ymm2/m256","VEX.256.66.0F38.WIG 1D /r","V","V","AVX2",""
+"VPACKSSDW xmm1, xmm2, xmm3/m128","VEX.NDS.128.66.0F.WIG 6B /r","V","V","AVX",""
+"VPACKSSDW ymm1, ymm2, ymm3/m256","VEX.NDS.256.66.0F.WIG 6B /r","V","V","AVX2",""
+"VPACKSSWB xmm1, xmm2, xmm3/m128","VEX.NDS.128.66.0F.WIG 63 /r","V","V","AVX",""
+"VPACKSSWB ymm1, ymm2, ymm3/m256","VEX.NDS.256.66.0F.WIG 63 /r","V","V","AVX2",""
+"VPACKUSDW xmm1, xmm2, xmm3/m128","VEX.NDS.128.66.0F38.WIG 2B /r","V","V","AVX",""
+"VPACKUSDW ymm1, ymm2, ymm3/m256","VEX.NDS.256.66.0F38.WIG 2B /r","V","V","AVX2",""
+"VPACKUSWB xmm1, xmm2, xmm3/m128","VEX.NDS.128.66.0F.WIG 67 /r","V","V","AVX",""
+"VPACKUSWB ymm1, ymm2, ymm3/m256","VEX.NDS.256.66.0F.WIG 67 /r","V","V","AVX2",""
+"VPADDB xmm1, xmm2, xmm3/m128","VEX.NDS.128.66.0F.WIG FC /r","V","V","AVX",""
+"VPADDB ymm1, ymm2, ymm3/m256","VEX.NDS.256.66.0F.WIG FC /r","V","V","AVX2",""
+"VPADDD xmm1, xmm2, xmm3/m128","VEX.NDS.128.66.0F.WIG FE /r","V","V","AVX",""
+"VPADDD ymm1, ymm2, ymm3/m256","VEX.NDS.256.66.0F.WIG FE /r","V","V","AVX2",""
+"VPADDQ xmm1, xmm2, xmm3/m128","VEX.NDS.128.66.0F.WIG D4 /r","V","V","AVX",""
+"VPADDQ ymm1, ymm2, ymm3/m256","VEX.NDS.256.66.0F.WIG D4 /r","V","V","AVX2",""
+"VPADDSB xmm1, xmm2, xmm3/m128","VEX.NDS.128.66.0F.WIG EC /r","V","V","AVX",""
+"VPADDSB ymm1, ymm2, ymm3/m256","VEX.NDS.256.66.0F.WIG EC /r","V","V","AVX2",""
+"VPADDSW xmm1, xmm2, xmm3/m128","VEX.NDS.128.66.0F.WIG ED /r","V","V","AVX",""
+"VPADDSW ymm1, ymm2, ymm3/m256","VEX.NDS.256.66.0F.WIG ED /r","V","V","AVX2",""
+"VPADDUSB xmm1, xmm2, xmm3/m128","VEX.NDS.128.660F.WIG DC /r","V","V","AVX",""
+"VPADDUSB ymm1, ymm2, ymm3/m256","VEX.NDS.256.66.0F.WIG DC /r","V","V","AVX2",""
+"VPADDUSW xmm1, xmm2, xmm3/m128","VEX.NDS.128.66.0F.WIG DD /r","V","V","AVX",""
+"VPADDUSW ymm1, ymm2, ymm3/m256","VEX.NDS.256.66.0F.WIG DD /r","V","V","AVX2",""
+"VPADDW xmm1, xmm2, xmm3/m128","VEX.NDS.128.66.0F.WIG FD /r","V","V","AVX",""
+"VPADDW ymm1, ymm2, ymm3/m256","VEX.NDS.256.66.0F.WIG FD /r","V","V","AVX2",""
+"VPALIGNR xmm1, xmm2, xmm3/m128, imm8","VEX.NDS.128.66.0F3A.WIG 0F /r ib","V","V","AVX",""
+"VPALIGNR ymm1, ymm2, ymm3/m256, imm8","VEX.NDS.256.66.0F3A.WIG 0F /r ib","V","V","AVX2",""
+"VPAND xmm1, xmm2, xmm3/m128","VEX.NDS.128.66.0F.WIG DB /r","V","V","AVX",""
+"VPAND ymm1, ymm2, ymm3/.m256","VEX.NDS.256.66.0F.WIG DB /r","V","V","AVX2",""
+"VPANDN xmm1, xmm2, xmm3/m128","VEX.NDS.128.66.0F.WIG DF /r","V","V","AVX",""
+"VPANDN ymm1, ymm2, ymm3/m256","VEX.NDS.256.66.0F.WIG DF /r","V","V","AVX2",""
+"VPAVGB xmm1, xmm2, xmm3/m128","VEX.NDS.128.66.0F.WIG E0 /r","V","V","AVX",""
+"VPAVGB ymm1, ymm2, ymm3/m256","VEX.NDS.256.66.0F.WIG E0 /r","V","V","AVX2",""
+"VPAVGW xmm1, xmm2, xmm3/m128","VEX.NDS.128.66.0F.WIG E3 /r","V","V","AVX",""
+"VPAVGW ymm1, ymm2, ymm3/m256","VEX.NDS.256.66.0F.WIG E3 /r","V","V","AVX2",""
+"VPBLENDD xmm1, xmm2, xmm3/m128, imm8","VEX.NDS.128.66.0F3A.W0 02 /r ib","V","V","AVX2",""
+"VPBLENDD ymm1, ymm2, ymm3/m256, imm8","VEX.NDS.256.66.0F3A.W0 02 /r ib","V","V","AVX2",""
+"VPBLENDVB xmm1, xmm2, xmm3/m128, xmm4","VEX.NDS.128.66.0F3A.W0 4C /r /is4","V","V","AVX",""
+"VPBLENDVB ymm1, ymm2, ymm3/m256, ymm4","VEX.NDS.256.66.0F3A.W0 4C /r /is4","V","V","AVX2",""
+"VPBLENDW xmm1, xmm2, xmm3/m128, imm8","VEX.NDS.128.66.0F3A.WIG 0E /r ib","V","V","AVX",""
+"VPBLENDW ymm1, ymm2, ymm3/m256, imm8","VEX.NDS.256.66.0F3A.WIG 0E /r ib","V","V","AVX2",""
+"VPBROADCASTB xmm1, xmm2/m8","VEX.128.66.0F38.W0 78 /r","V","V","AVX2",""
+"VPBROADCASTB ymm1, xmm2/m8","VEX.256.66.0F38.W0 78 /r","V","V","AVX2",""
+"VPBROADCASTD xmm1, xmm2/m32","VEX.128.66.0F38.W0 58 /r","V","V","AVX2",""
+"VPBROADCASTD ymm1, xmm2/m32","VEX.256.66.0F38.W0 58 /r","V","V","AVX2",""
+"VPBROADCASTQ xmm1, xmm2/m64","VEX.128.66.0F38.W0 59 /r","V","V","AVX2",""
+"VPBROADCASTQ ymm1, xmm2/m64","VEX.256.66.0F38.W0 59 /r","V","V","AVX2",""
+"VPBROADCASTW xmm1, xmm2/m16","VEX.128.66.0F38.W0 79 /r","V","V","AVX2",""
+"VPBROADCASTW ymm1, xmm2/m16","VEX.256.66.0F38.W0 79 /r","V","V","AVX2",""
+"VPCLMULQDQ xmm1, xmm2, xmm3/m128, imm8","VEX.NDS.128.66.0F3A.WIG 44 /r ib","V","V","Both CLMUL and AVX flags",""
+"VPCMPEQB xmm1, xmm2, xmm3/m128","VEX.NDS.128.66.0F.WIG 74 /r","V","V","AVX",""
+"VPCMPEQD xmm1, xmm2, xmm3/m128","VEX.NDS.128.66.0F.WIG 76 /r","V","V","AVX",""
+"VPCMPEQD ymm1, ymm2, ymm3 /m256","VEX.NDS.256.66.0F.WIG 76 /r","V","V","AVX2",""
+"VPCMPEQQ xmm1, xmm2, xmm3/m128","VEX.NDS.128.66.0F38.WIG 29 /r","V","V","AVX",""
+"VPCMPEQQ ymm1, ymm2, ymm3 /m256","VEX.NDS.256.66.0F38.WIG 29 /r","V","V","AVX2",""
+"VPCMPEQQ ymm1, ymm2, ymm3 /m256","VEX.NDS.256.66.0F38.WIG 29 /r","V","V","AVX2",""
+"VPCMPEQW xmm1, xmm2, xmm3/m128","VEX.NDS.128.66.0F.WIG 75 /r","V","V","AVX",""
+"VPCMPEQW ymm1, ymm2, ymm3 /m256","VEX.NDS.256.66.0F.WIG 75 /r","V","V","AVX2",""
+"VPCMPESTRI xmm1, xmm2/m128, imm8","VEX.128.66.0F3A.WIG 61 /r ib","V","V","AVX",""
+"VPCMPESTRM xmm1, xmm2/m128, imm8","VEX.128.66.0F3A.WIG 60 /r ib","V","V","AVX",""
+"VPCMPGTB xmm1, xmm2, xmm3/m128","VEX.NDS.128.66.0F.WIG 64 /r","V","V","AVX",""
+"VPCMPGTB ymm1, ymm2, ymm3/m256","VEX.NDS.256.66.0F.WIG 64 /r","V","V","AVX2",""
+"VPCMPGTD xmm1, xmm2, xmm3/m128","VEX.NDS.128.66.0F.WIG 66 /r","V","V","AVX",""
+"VPCMPGTD ymm1, ymm2, ymm3/m256","VEX.NDS.256.66.0F.WIG 66 /r","V","V","AVX2",""
+"VPCMPGTQ xmm1, xmm2, xmm3/m128","VEX.NDS.128.66.0F38.WIG 37 /r","V","V","AVX",""
+"VPCMPGTQ ymm1, ymm2, ymm3/m256","VEX.NDS.256.66.0F38.WIG 37 /r","V","V","AVX2",""
+"VPCMPGTW xmm1, xmm2, xmm3/m128","VEX.NDS.128.66.0F.WIG 65 /r","V","V","AVX",""
+"VPCMPGTW ymm1, ymm2, ymm3/m256","VEX.NDS.256.66.0F.WIG 65 /r","V","V","AVX2",""
+"VPCMPISTRI xmm1, xmm2/m128, imm8","VEX.128.66.0F3A.WIG 63 /r ib","V","V","AVX",""
+"VPCMPISTRM xmm1, xmm2/m128, imm8","VEX.128.66.0F3A.WIG 62 /r ib","V","V","AVX",""
+"VPERM2F128 ymm1, ymm2, ymm3/m256, imm8","VEX.NDS.256.66.0F3A.W0 06 /r ib","V","V","AVX",""
+"VPERM2I128 ymm1, ymm2, ymm3/m256, imm8","VEX.NDS.256.66.0F3A.W0 46 /r ib","V","V","AVX2",""
+"VPERMD ymm1, ymm2, ymm3/m256","VEX.NDS.256.66.0F38.W0 36 /r","V","V","AVX2",""
+"VPERMILPD xmm1, xmm2, xmm3/m128","VEX.NDS.128.66.0F38.W0 0D /r","V","V","AVX",""
+"VPERMILPD xmm1, xmm2/m128, imm8","VEX.128.66.0F3A.W0 05 /r ib","V","V","AVX",""
+"VPERMILPD ymm1, ymm2, ymm3/m256","VEX.NDS.256.66.0F38.W0 0D /r","V","V","AVX",""
+"VPERMILPD ymm1, ymm2/m256, imm8","VEX.256.66.0F3A.W0 05 /r ib","V","V","AVX",""
+"VPERMILPS xmm1, xmm2, xmm3/m128","VEX.NDS.128.66.0F38.W0 0C /r","V","V","AVX",""
+"VPERMILPS xmm1, xmm2/m128, imm8","VEX.128.66.0F3A.W0 04 /r ib","V","V","AVX",""
+"VPERMILPS ymm1, ymm2, ymm3/m256","VEX.NDS.256.66.0F38.W0 0C /r","V","V","AVX",""
+"VPERMILPS ymm1, ymm2/m256, imm8","VEX.256.66.0F3A.W0 04 /r ib","V","V","AVX",""
+"VPERMPD ymm1, ymm2/m256, imm8","VEX.256.66.0F3A.W1 01 /r ib","V","V","AVX2",""
+"VPERMPS ymm1, ymm2, ymm3/m256","VEX.NDS.256.66.0F38.W0 16 /r","V","V","AVX2",""
+"VPERMQ ymm1, ymm2/m256, imm8","VEX.256.66.0F3A.W1 00 /r ib","V","V","AVX2",""
+"VPEXTRB r32/m8, xmm1, imm8","VEX.128.66.0F3A.W0 14 /r ib","V","V","AVX",""
+"VPEXTRD r32/m32, xmm1, imm8","VEX.128.66.0F3A.W0 16 /r ib","V","V","AVX",""
+"VPEXTRQ r64/m64, xmm1, imm8","VEX.128.66.0F3A.W1 16 /r ib","I","V","AVX",""
+"VPEXTRW r32, xmm1, imm8","VEX.128.66.0F.W0 C5 /r ib","V","V","AVX",""
+"VPEXTRW r32/m16, xmm2, imm8","VEX.128.66.0F3A.W0 15 /r ib","V","V","AVX",""
+"VPGATHERDD xmm1, vm32x, xmm2","VEX.DDS.128.66.0F38.W0 90 /r","V","V","AVX2",""
+"VPGATHERDD ymm1, vm32y, ymm2","VEX.DDS.256.66.0F38.W0 90 /r","V","V","AVX2",""
+"VPGATHERDQ xmm1, vm32x, xmm2","VEX.DDS.128.66.0F38.W1 90 /r","V","V","AVX2",""
+"VPGATHERDQ ymm1, vm32x, ymm2","VEX.DDS.256.66.0F38.W1 90 /r","V","V","AVX2",""
+"VPGATHERQD xmm1, vm64x, xmm2","VEX.DDS.128.66.0F38.W0 91 /r","V","V","AVX2",""
+"VPGATHERQD xmm1, vm64y, xmm2","VEX.DDS.256.66.0F38.W0 91 /r","V","V","AVX2",""
+"VPGATHERQQ xmm1, vm64x, xmm2","VEX.DDS.128.66.0F38.W1 91 /r","V","V","AVX2",""
+"VPGATHERQQ ymm1, vm64y, ymm2","VEX.DDS.256.66.0F38.W1 91 /r","V","V","AVX2",""
+"VPHADDD xmm1, xmm2, xmm3/m128","VEX.NDS.128.66.0F38.WIG 02 /r","V","V","AVX",""
+"VPHADDD ymm1, ymm2, ymm3/m256","VEX.NDS.256.66.0F38.WIG 02 /r","V","V","AVX2",""
+"VPHADDSW xmm1, xmm2, xmm3/m128","VEX.NDS.128.66.0F38.WIG 03 /r","V","V","AVX",""
+"VPHADDSW ymm1, ymm2, ymm3/m256","VEX.NDS.256.66.0F38.WIG 03 /r","V","V","AVX2",""
+"VPHADDW xmm1, xmm2, xmm3/m128","VEX.NDS.128.66.0F38.WIG 01 /r","V","V","AVX",""
+"VPHADDW ymm1, ymm2, ymm3/m256","VEX.NDS.256.66.0F38.WIG 01 /r","V","V","AVX2",""
+"VPHMINPOSUW xmm1, xmm2/m128","VEX.128.66.0F38.WIG 41 /r","V","V","AVX",""
+"VPHSUBD xmm1, xmm2, xmm3/m128","VEX.NDS.128.66.0F38.WIG 06 /r","V","V","AVX",""
+"VPHSUBD ymm1, ymm2, ymm3/m256","VEX.NDS.256.66.0F38.WIG 06 /r","V","V","AVX2",""
+"VPHSUBSW xmm1, xmm2, xmm3/m128","VEX.NDS.128.66.0F38.WIG 07 /r","V","V","AVX",""
+"VPHSUBSW ymm1, ymm2, ymm3/m256","VEX.NDS.256.66.0F38.WIG 07 /r","V","V","AVX2",""
+"VPHSUBW xmm1, xmm2, xmm3/m128","VEX.NDS.128.66.0F38.WIG 05 /r","V","V","AVX",""
+"VPHSUBW ymm1, ymm2, ymm3/m256","VEX.NDS.256.66.0F38.WIG 05 /r","V","V","AVX2",""
+"VPINSRB xmm1, xmm2, r32/m8, imm8","VEX.NDS.128.66.0F3A.W0 20 /r ib","V","V","AVX",""
+"VPINSRD xmm1, xmm2, r/m32, imm8","VEX.NDS.128.66.0F3A.W0 22 /r ib","V","V","AVX",""
+"VPINSRQ xmm1, xmm2, r/m64, imm8","VEX.NDS.128.66.0F3A.W1 22 /r ib","I","V","AVX",""
+"VPINSRW xmm1, xmm2, r32/m16, imm8","VEX.NDS.128.66.0F.W0 C4 /r ib","V","V","AVX",""
+"VPMADDUBSW xmm1, xmm2, xmm3/m128","VEX.NDS.128.66.0F38.WIG 04 /r","V","V","AVX",""
+"VPMADDUBSW ymm1, ymm2, ymm3/m256","VEX.NDS.256.66.0F38.WIG 04 /r","V","V","AVX2",""
+"VPMADDWD xmm1, xmm2, xmm3/m128","VEX.NDS.128.66.0F.WIG F5 /r","V","V","AVX",""
+"VPMADDWD ymm1, ymm2, ymm3/m256","VEX.NDS.256.66.0F.WIG F5 /r","V","V","AVX2",""
+"VPMASKMOVD m128, xmm1, xmm2","VEX.NDS.128.66.0F38.W0 8E /r","V","V","AVX2",""
+"VPMASKMOVD m256, ymm1, ymm2","VEX.NDS.256.66.0F38.W0 8E /r","V","V","AVX2",""
+"VPMASKMOVD xmm1, xmm2, m128","VEX.NDS.128.66.0F38.W0 8C /r","V","V","AVX2",""
+"VPMASKMOVD ymm1, ymm2, m256","VEX.NDS.256.66.0F38.W0 8C /r","V","V","AVX2",""
+"VPMASKMOVQ m128, xmm1, xmm2","VEX.NDS.128.66.0F38.W1 8E /r","V","V","AVX2",""
+"VPMASKMOVQ m256, ymm1, ymm2","VEX.NDS.256.66.0F38.W1 8E /r","V","V","AVX2",""
+"VPMASKMOVQ xmm1, xmm2, m128","VEX.NDS.128.66.0F38.W1 8C /r","V","V","AVX2",""
+"VPMASKMOVQ ymm1, ymm2, m256","VEX.NDS.256.66.0F38.W1 8C /r","V","V","AVX2",""
+"VPMAXSB xmm1, xmm2, xmm3/m128","VEX.NDS.128.66.0F38.WIG 3C /r","V","V","AVX",""
+"VPMAXSB ymm1, ymm2, ymm3/m256","VEX.NDS.256.66.0F38.WIG 3C /r","V","V","AVX2",""
+"VPMAXSD xmm1, xmm2, xmm3/m128","VEX.NDS.128.66.0F38.WIG 3D /r","V","V","AVX",""
+"VPMAXSD ymm1, ymm2, ymm3/m256","VEX.NDS.256.66.0F38.WIG 3D /r","V","V","AVX2",""
+"VPMAXSW xmm1, xmm2, xmm3/m128","VEX.NDS.128.66.0F.WIG EE /r","V","V","AVX",""
+"VPMAXSW ymm1, ymm2, ymm3/m256","VEX.NDS.256.66.0F.WIG EE /r","V","V","AVX2",""
+"VPMAXUB xmm1, xmm2, xmm3/m128","VEX.NDS.128.66.0F.WIG DE /r","V","V","AVX",""
+"VPMAXUB ymm1, ymm2, ymm3/m256","VEX.NDS.256.66.0F.WIG DE /r","V","V","AVX2",""
+"VPMAXUD xmm1, xmm2, xmm3/m128","VEX.NDS.128.66.0F38.WIG 3F /r","V","V","AVX",""
+"VPMAXUD ymm1, ymm2, ymm3/m256","VEX.NDS.256.66.0F38.WIG 3F /r","V","V","AVX2",""
+"VPMAXUW xmm1, xmm2, xmm3/m128","VEX.NDS.128.66.0F38.WIG 3E /r","V","V","AVX",""
+"VPMAXUW ymm1, ymm2, ymm3/m256","VEX.NDS.256.66.0F38.WIG 3E /r","V","V","AVX2",""
+"VPMINSB xmm1, xmm2, xmm3/m128","VEX.NDS.128.66.0F38.WIG 38 /r","V","V","AVX",""
+"VPMINSB ymm1, ymm2, ymm3/m256","VEX.NDS.256.66.0F38.WIG 38 /r","V","V","AVX2",""
+"VPMINSD xmm1, xmm2, xmm3/m128","VEX.NDS.128.66.0F38.WIG 39 /r","V","V","AVX",""
+"VPMINSD ymm1, ymm2, ymm3/m256","VEX.NDS.256.66.0F38.WIG 39 /r","V","V","AVX2",""
+"VPMINSW xmm1, xmm2, xmm3/m128","VEX.NDS.128.66.0F.WIG EA /r","V","V","AVX",""
+"VPMINSW ymm1, ymm2, ymm3/m256","VEX.NDS.256.66.0F.WIG EA /r","V","V","AVX2",""
+"VPMINUB xmm1, xmm2, xmm3/m128","VEX.NDS.128.66.0F.WIG DA /r","V","V","AVX",""
+"VPMINUB ymm1, ymm2, ymm3/m256","VEX.NDS.256.66.0F.WIG DA /r","V","V","AVX2",""
+"VPMINUD xmm1, xmm2, xmm3/m128","VEX.NDS.128.66.0F38.WIG 3B /r","V","V","AVX",""
+"VPMINUD ymm1, ymm2, ymm3/m256","VEX.NDS.256.66.0F38.WIG 3B /r","V","V","AVX2",""
+"VPMINUW xmm1, xmm2, xmm3/m128","VEX.NDS.128.66.0F38.WIG 3A /r","V","V","AVX",""
+"VPMINUW ymm1, ymm2, ymm3/m256","VEX.NDS.256.66.0F38.WIG 3A /r","V","V","AVX2",""
+"VPMOVMSKB r32, xmm1","VEX.128.66.0F.WIG D7 /r","V","V","AVX",""
+"VPMOVMSKB r32, ymm1","VEX.256.66.0F.WIG D7 /r","V","V","AVX2",""
+"VPMOVSXBD xmm1, xmm2/m32","VEX.128.66.0F38.WIG 21 /r","V","V","AVX",""
+"VPMOVSXBD ymm1, xmm2/m64","VEX.256.66.0F38.WIG 21 /r","V","V","AVX2",""
+"VPMOVSXBQ xmm1, xmm2/m16","VEX.128.66.0F38.WIG 22 /r","V","V","AVX",""
+"VPMOVSXBQ ymm1, xmm2/m32","VEX.256.66.0F38.WIG 22 /r","V","V","AVX2",""
+"VPMOVSXBW xmm1, xmm2/m64","VEX.128.66.0F38.WIG 20 /r","V","V","AVX",""
+"VPMOVSXBW ymm1, xmm2/m128","VEX.256.66.0F38.WIG 20 /r","V","V","AVX2",""
+"VPMOVSXDQ xmm1, xmm2/m64","VEX.128.66.0F38.WIG 25 /r","V","V","AVX",""
+"VPMOVSXWD xmm1, xmm2/m64","VEX.128.66.0F38.WIG 23 /r","V","V","AVX",""
+"VPMOVSXWD ymm1, xmm2/m128","VEX.256.66.0F38.WIG 23 /r","V","V","AVX2",""
+"VPMOVSXWQ xmm1, xmm2/m32","VEX.128.66.0F38.WIG 24 /r","V","V","AVX",""
+"VPMOVZXBD xmm1, xmm2/m32","VEX.128.66.0F38.WIG 31 /r","V","V","AVX",""
+"VPMOVZXBD ymm1, xmm2/m64","VEX.256.66.0F38.WIG 31 /r","V","V","AVX2",""
+"VPMOVZXBQ xmm1, xmm2/m16","VEX.128.66.0F38.WIG 32 /r","V","V","AVX",""
+"VPMOVZXBQ ymm1, xmm2/m32","VEX.256.66.0F38.WIG 32 /r","V","V","AVX2",""
+"VPMOVZXBW xmm1, xmm2/m64","VEX.128.66.0F38.WIG 30 /r","V","V","AVX",""
+"VPMOVZXBW ymm1, xmm2/m128","VEX.256.66.0F38.WIG 30 /r","V","V","AVX2",""
+"VPMOVZXDQ xmm1, xmm2/m64","VEX.128.66.0F38.WIG 35 /r","V","V","AVX",""
+"VPMOVZXWD xmm1, xmm2/m64","VEX.128.66.0F38.WIG 33 /r","V","V","AVX",""
+"VPMOVZXWD ymm1, xmm2/m128","VEX.256.66.0F38.WIG 33 /r","V","V","AVX2",""
+"VPMOVZXWQ xmm1, xmm2/m32","VEX.128.66.0F38.WIG 34 /r","V","V","AVX",""
+"VPMULDQ xmm1, xmm2, xmm3/m128","VEX.NDS.128.66.0F38.WIG 28 /r","V","V","AVX",""
+"VPMULDQ ymm1, ymm2, ymm3/m256","VEX.NDS.256.66.0F38.WIG 28 /r","V","V","AVX2",""
+"VPMULHRSW xmm1, xmm2, xmm3/m128","VEX.NDS.128.66.0F38.WIG 0B /r","V","V","AVX",""
+"VPMULHRSW ymm1, ymm2, ymm3/m256","VEX.NDS.256.66.0F38.WIG 0B /r","V","V","AVX2",""
+"VPMULHUW xmm1, xmm2, xmm3/m128","VEX.NDS.128.66.0F.WIG E4 /r","V","V","AVX",""
+"VPMULHUW ymm1, ymm2, ymm3/m256","VEX.NDS.256.66.0F.WIG E4 /r","V","V","AVX2",""
+"VPMULHW xmm1, xmm2, xmm3/m128","VEX.NDS.128.66.0F.WIG E5 /r","V","V","AVX",""
+"VPMULHW ymm1, ymm2, ymm3/m256","VEX.NDS.256.66.0F.WIG E5 /r","V","V","AVX2",""
+"VPMULLD xmm1, xmm2, xmm3/m128","VEX.NDS.128.66.0F38.WIG 40 /r","V","V","AVX",""
+"VPMULLD ymm1, ymm2, ymm3/m256","VEX.NDS.256.66.0F38.WIG 40 /r","V","V","AVX2",""
+"VPMULLW xmm1, xmm2, xmm3/m128","VEX.NDS.128.66.0F.WIG D5 /r","V","V","AVX",""
+"VPMULLW ymm1, ymm2, ymm3/m256","VEX.NDS.256.66.0F.WIG D5 /r","V","V","AVX2",""
+"VPMULUDQ xmm1, xmm2, xmm3/m128","VEX.NDS.128.66.0F.WIG F4 /r","V","V","AVX",""
+"VPMULUDQ ymm1, ymm2, ymm3/m256","VEX.NDS.256.66.0F.WIG F4 /r","V","V","AVX2",""
+"VPOR xmm1, xmm2, xmm3/m128","VEX.NDS.128.66.0F.WIG EB /r","V","V","AVX",""
+"VPOR ymm1, ymm2, ymm3/m256","VEX.NDS.256.66.0F.WIG EB /r","V","V","AVX2",""
+"VPSADBW xmm1, xmm2, xmm3/m128","VEX.NDS.128.66.0F.WIG F6 /r","V","V","AVX",""
+"VPSADBW ymm1, ymm2, ymm3/m256","VEX.NDS.256.66.0F.WIG F6 /r","V","V","AVX2",""
+"VPSHUFB xmm1, xmm2, xmm3/m128","VEX.NDS.128.66.0F38.WIG 00 /r","V","V","AVX",""
+"VPSHUFB ymm1, ymm2, ymm3/m256","VEX.NDS.256.66.0F38.WIG 00 /r","V","V","AVX2",""
+"VPSHUFD xmm1, xmm2/m128, imm8u","VEX.128.66.0F.WIG 70 /r ib","V","V","AVX",""
+"VPSHUFD ymm1, ymm2/m256, imm8u","VEX.256.66.0F.WIG 70 /r ib","V","V","AVX2",""
+"VPSHUFHW xmm1, xmm2/m128, imm8u","VEX.128.F3.0F.WIG 70 /r ib","V","V","AVX",""
+"VPSHUFHW ymm1, ymm2/m256, imm8u","VEX.256.F3.0F.WIG 70 /r ib","V","V","AVX2",""
+"VPSHUFLW xmm1, xmm2/m128, imm8u","VEX.128.F2.0F.WIG 70 /r ib","V","V","AVX",""
+"VPSHUFLW ymm1, ymm2/m256, imm8u","VEX.256.F2.0F.WIG 70 /r ib","V","V","AVX2",""
+"VPSIGNB xmm1, xmm2, xmm3/m128","VEX.NDS.128.66.0F38.WIG 08 /r","V","V","AVX",""
+"VPSIGNB ymm1, ymm2, ymm3/m256","VEX.NDS.256.66.0F38.WIG 08 /r","V","V","AVX2",""
+"VPSIGND xmm1, xmm2, xmm3/m128","VEX.NDS.128.66.0F38.WIG 0A /r","V","V","AVX",""
+"VPSIGND ymm1, ymm2, ymm3/m256","VEX.NDS.256.66.0F38.WIG 0A /r","V","V","AVX2",""
+"VPSIGNW xmm1, xmm2, xmm3/m128","VEX.NDS.128.66.0F38.WIG 09 /r","V","V","AVX",""
+"VPSIGNW ymm1, ymm2, ymm3/m256","VEX.NDS.256.66.0F38.WIG 09 /r","V","V","AVX2",""
+"VPSLLD xmm1, xmm2, imm8u","VEX.NDD.128.66.0F.WIG 72 /6 ib","V","V","AVX",""
+"VPSLLD xmm1, xmm2, xmm3/m128","VEX.NDS.128.66.0F.WIG F2 /r","V","V","AVX",""
+"VPSLLDQ xmm1, xmm2, imm8u","VEX.NDD.128.66.0F.WIG 73 /7 ib","V","V","AVX",""
+"VPSLLDQ ymm1, ymm2, imm8u","VEX.NDD.256.66.0F.WIG 73 /7 ib","V","V","AVX2",""
+"VPSLLQ xmm1, xmm2, imm8u","VEX.NDD.128.66.0F.WIG 73 /6 ib","V","V","AVX",""
+"VPSLLQ xmm1, xmm2, xmm3/m128","VEX.NDS.128.66.0F.WIG F3 /r","V","V","AVX",""
+"VPSLLVD xmm1, xmm2, xmm3/m128","VEX.NDS.128.66.0F38.W0 47 /r","V","V","AVX2",""
+"VPSLLVD ymm1, ymm2, ymm3/m256","VEX.NDS.256.66.0F38.W0 47 /r","V","V","AVX2",""
+"VPSLLVQ xmm1, xmm2, xmm3/m128","VEX.NDS.128.66.0F38.W1 47 /r","V","V","AVX2",""
+"VPSLLVQ ymm1, ymm2, ymm3/m256","VEX.NDS.256.66.0F38.W1 47 /r","V","V","AVX2",""
+"VPSLLW xmm1, xmm2, imm8u","VEX.NDD.128.66.0F.WIG 71 /6 ib","V","V","AVX",""
+"VPSLLW xmm1, xmm2, xmm3/m128","VEX.NDS.128.66.0F.WIG F1 /r","V","V","AVX",""
+"VPSLLW ymm1, ymm2, imm8u","VEX.NDD.256.66.0F.WIG 71 /6 ib","V","V","AVX2",""
+"VPSLLW ymm1, ymm2, xmm3/m128","VEX.NDS.256.66.0F.WIG F1 /r","V","V","AVX2",""
+"VPSRAD xmm1, xmm2, imm8u","VEX.NDD.128.66.0F.WIG 72 /4 ib","V","V","AVX",""
+"VPSRAD xmm1, xmm2, xmm3/m128","VEX.NDS.128.66.0F.WIG E2 /r","V","V","AVX",""
+"VPSRAD ymm1, ymm2, imm8u","VEX.NDD.256.66.0F.WIG 72 /4 ib","V","V","AVX2",""
+"VPSRAD ymm1, ymm2, xmm3/m128","VEX.NDS.256.66.0F.WIG E2 /r","V","V","AVX2",""
+"VPSRAVD xmm1, xmm2, xmm3/m128","VEX.NDS.128.66.0F38.W0 46 /r","V","V","AVX2",""
+"VPSRAVD ymm1, ymm2, ymm3/m256","VEX.NDS.256.66.0F38.W0 46 /r","V","V","AVX2",""
+"VPSRAW xmm1, xmm2, imm8u","VEX.NDD.128.66.0F.WIG 71 /4 ib","V","V","AVX",""
+"VPSRAW xmm1, xmm2, xmm3/m128","VEX.NDS.128.66.0F.WIG E1 /r","V","V","AVX",""
+"VPSRAW ymm1, ymm2, imm8u","VEX.NDD.256.66.0F.WIG 71 /4 ib","V","V","AVX2",""
+"VPSRAW ymm1, ymm2, xmm3/m128","VEX.NDS.256.66.0F.WIG E1 /r","V","V","AVX2",""
+"VPSRLD xmm1, xmm2, imm8u","VEX.NDD.128.66.0F.WIG 72 /2 ib","V","V","AVX",""
+"VPSRLD xmm1, xmm2, xmm3/m128","VEX.NDS.128.66.0F.WIG D2 /r","V","V","AVX",""
+"VPSRLDQ xmm1, xmm2, imm8u","VEX.NDD.128.66.0F.WIG 73 /3 ib","V","V","AVX",""
+"VPSRLDQ ymm1, ymm2, imm8u","VEX.NDD.256.66.0F.WIG 73 /3 ib","V","V","AVX2",""
+"VPSRLQ xmm1, xmm2, imm8u","VEX.NDD.128.66.0F.WIG 73 /2 ib","V","V","AVX",""
+"VPSRLQ xmm1, xmm2, xmm3/m128","VEX.NDS.128.66.0F.WIG D3 /r","V","V","AVX",""
+"VPSRLVD xmm1, xmm2, xmm3/m128","VEX.NDS.128.66.0F38.W0 45 /r","V","V","AVX2",""
+"VPSRLVD ymm1, ymm2, ymm3/m256","VEX.NDS.256.66.0F38.W0 45 /r","V","V","AVX2",""
+"VPSRLVQ xmm1, xmm2, xmm3/m128","VEX.NDS.128.66.0F38.W1 45 /r","V","V","AVX2",""
+"VPSRLVQ ymm1, ymm2, ymm3/m256","VEX.NDS.256.66.0F38.W1 45 /r","V","V","AVX2",""
+"VPSRLW xmm1, xmm2, imm8u","VEX.NDD.128.66.0F.WIG 71 /2 ib","V","V","AVX",""
+"VPSRLW xmm1, xmm2, xmm3/m128","VEX.NDS.128.66.0F.WIG D1 /r","V","V","AVX",""
+"VPSRLW ymm1, ymm2, imm8u","VEX.NDD.256.66.0F.WIG 71 /2 ib","V","V","AVX2",""
+"VPSRLW ymm1, ymm2, xmm3/m128","VEX.NDS.256.66.0F.WIG D1 /r","V","V","AVX2",""
+"VPSUBB xmm1, xmm2, xmm3/m128","VEX.NDS.128.66.0F.WIG F8 /r","V","V","AVX",""
+"VPSUBB ymm1, ymm2, ymm3/m256","VEX.NDS.256.66.0F.WIG F8 /r","V","V","AVX2",""
+"VPSUBD xmm1, xmm2, xmm3/m128","VEX.NDS.128.66.0F.WIG FA /r","V","V","AVX",""
+"VPSUBD ymm1, ymm2, ymm3/m256","VEX.NDS.256.66.0F.WIG FA /r","V","V","AVX2",""
+"VPSUBQ xmm1, xmm2, xmm3/m128","VEX.NDS.128.66.0F.WIG FB /r","V","V","AVX",""
+"VPSUBQ ymm1, ymm2, ymm3/m256","VEX.NDS.256.66.0F.WIG FB /r","V","V","AVX2",""
+"VPSUBSB xmm1, xmm2, xmm3/m128","VEX.NDS.128.66.0F.WIG E8 /r","V","V","AVX",""
+"VPSUBSB ymm1, ymm2, ymm3/m256","VEX.NDS.256.66.0F.WIG E8 /r","V","V","AVX2",""
+"VPSUBSW xmm1, xmm2, xmm3/m128","VEX.NDS.128.66.0F.WIG E9 /r","V","V","AVX",""
+"VPSUBSW ymm1, ymm2, ymm3/m256","VEX.NDS.256.66.0F.WIG E9 /r","V","V","AVX2",""
+"VPSUBUSB xmm1, xmm2, xmm3/m128","VEX.NDS.128.66.0F.WIG D8 /r","V","V","AVX",""
+"VPSUBUSB ymm1, ymm2, ymm3/m256","VEX.NDS.256.66.0F.WIG D8 /r","V","V","AVX2",""
+"VPSUBUSW xmm1, xmm2, xmm3/m128","VEX.NDS.128.66.0F.WIG D9 /r","V","V","AVX",""
+"VPSUBUSW ymm1, ymm2, ymm3/m256","VEX.NDS.256.66.0F.WIG D9 /r","V","V","AVX2",""
+"VPSUBW xmm1, xmm2, xmm3/m128","VEX.NDS.128.66.0F.WIG F9 /r","V","V","AVX",""
+"VPSUBW ymm1, ymm2, ymm3/m256","VEX.NDS.256.66.0F.WIG F9 /r","V","V","AVX2",""
+"VPTEST xmm1, xmm2/m128","VEX.128.66.0F38.WIG 17 /r","V","V","AVX",""
+"VPTEST ymm1, ymm2/m256","VEX.256.66.0F38.WIG 17 /r","V","V","AVX",""
+"VPUNPCKHBW xmm1, xmm2, xmm3/m128","VEX.NDS.128.66.0F.WIG 68 /r","V","V","AVX",""
+"VPUNPCKHBW ymm1, ymm2, ymm3/m256","VEX.NDS.256.66.0F.WIG 68 /r","V","V","AVX2",""
+"VPUNPCKHDQ xmm1, xmm2, xmm3/m128","VEX.NDS.128.66.0F.WIG 6A /r","V","V","AVX",""
+"VPUNPCKHDQ ymm1, ymm2, ymm3/m256","VEX.NDS.256.66.0F.WIG 6A /r","V","V","AVX2",""
+"VPUNPCKHQDQ xmm1, xmm2, xmm3/m128","VEX.NDS.128.66.0F.WIG 6D /r","V","V","AVX",""
+"VPUNPCKHQDQ ymm1, ymm2, ymm3/m256","VEX.NDS.256.66.0F.WIG 6D /r","V","V","AVX2",""
+"VPUNPCKHWD xmm1, xmm2, xmm3/m128","VEX.NDS.128.66.0F.WIG 69 /r","V","V","AVX",""
+"VPUNPCKHWD ymm1, ymm2, ymm3/m256","VEX.NDS.256.66.0F.WIG 69 /r","V","V","AVX2",""
+"VPUNPCKLBW xmm1, xmm2, xmm3/m128","VEX.NDS.128.66.0F.WIG 60 /r","V","V","AVX",""
+"VPUNPCKLBW ymm1, ymm2, ymm3/m256","VEX.NDS.256.66.0F.WIG 60 /r","V","V","AVX2",""
+"VPUNPCKLDQ xmm1, xmm2, xmm3/m128","VEX.NDS.128.66.0F.WIG 62 /r","V","V","AVX",""
+"VPUNPCKLDQ ymm1, ymm2, ymm3/m256","VEX.NDS.256.66.0F.WIG 62 /r","V","V","AVX2",""
+"VPUNPCKLQDQ xmm1, xmm2, xmm3/m128","VEX.NDS.128.66.0F.WIG 6C /r","V","V","AVX",""
+"VPUNPCKLQDQ ymm1, ymm2, ymm3/m256","VEX.NDS.256.66.0F.WIG 6C /r","V","V","AVX2",""
+"VPUNPCKLWD xmm1, xmm2, xmm3/m128","VEX.NDS.128.66.0F.WIG 61 /r","V","V","AVX",""
+"VPUNPCKLWD ymm1, ymm2, ymm3/m256","VEX.NDS.256.66.0F.WIG 61 /r","V","V","AVX2",""
+"VPXOR xmm1, xmm2, xmm3/m128","VEX.NDS.128.66.0F.WIG EF /r","V","V","AVX",""
+"VPXOR ymm1, ymm2, ymm3/m256","VEX.NDS.256.66.0F.WIG EF /r","V","V","AVX2",""
+"VRCPPS xmm1, xmm2/m128","VEX.128.0F.WIG 53 /r","V","V","AVX",""
+"VRCPPS ymm1, ymm2/m256","VEX.256.0F.WIG 53 /r","V","V","AVX",""
+"VRCPSS xmm1, xmm2, xmm3/m32","VEX.NDS.LIG.F3.0F.WIG 53 /r","V","V","AVX",""
+"VROUNDPD xmm1, xmm2/m128, imm8","VEX.128.66.0F3A.WIG 09 /r ib","V","V","AVX",""
+"VROUNDPD ymm1, ymm2/m256, imm8","VEX.256.66.0F3A.WIG 09 /r ib","V","V","AVX",""
+"VROUNDPS xmm1, xmm2/m128, imm8","VEX.128.66.0F3A.WIG 08 /r ib","V","V","AVX",""
+"VROUNDPS ymm1, ymm2/m256, imm8","VEX.256.66.0F3A.WIG 08 /r ib","V","V","AVX",""
+"VROUNDSD xmm1, xmm2, xmm3/m64, imm8","VEX.NDS.LIG.66.0F3A.WIG 0B /r ib","V","V","AVX",""
+"VROUNDSS xmm1, xmm2, xmm3/m32, imm8","VEX.NDS.LIG.66.0F3A.WIG 0A /r ib","V","V","AVX",""
+"VRSQRTPS xmm1, xmm2/m128","VEX.128.0F.WIG 52 /r","V","V","AVX",""
+"VRSQRTPS ymm1, ymm2/m256","VEX.256.0F.WIG 52 /r","V","V","AVX",""
+"VRSQRTSS xmm1, xmm2, xmm3/m32","VEX.NDS.LIG.F3.0F.WIG 52 /r","V","V","AVX",""
+"VSHUFPD xmm1, xmm2, xmm3/m128, imm8","VEX.NDS.128.66.0F.WIG C6 /r ib","V","V","AVX",""
+"VSHUFPD ymm1, ymm2, ymm3/m256, imm8","VEX.NDS.256.66.0F.WIG C6 /r ib","V","V","AVX",""
+"VSHUFPS xmm1, xmm2, xmm3/m128, imm8","VEX.NDS.128.0F.WIG C6 /r ib","V","V","AVX",""
+"VSHUFPS ymm1, ymm2, ymm3/m256, imm8","VEX.NDS.256.0F.WIG C6 /r ib","V","V","AVX",""
+"VSQRTPD xmm1, xmm2/m128","VEX.128.66.0F.WIG 51 /r","V","V","AVX",""
+"VSQRTPD ymm1, ymm2/m256","VEX.256.66.0F.WIG 51 /r","V","V","AVX",""
+"VSQRTPS xmm1, xmm2/m128","VEX.128.0F.WIG 51 /r","V","V","AVX",""
+"VSQRTPS ymm1, ymm2/m256","VEX.256.0F.WIG 51 /r","V","V","AVX",""
+"VSQRTSD xmm1, xmm2, xmm3/m64","VEX.NDS.LIG.F2.0F.WIG 51 /r","V","V","AVX",""
+"VSQRTSS xmm1, xmm2, xmm3/m32","VEX.NDS.LIG.F3.0F.WIG 51 /r","V","V","AVX",""
+"VSTMXCSR m32","VEX.LZ.0F.WIG AE /3","V","V","AVX",""
+"VSUBPD xmm1, xmm2, xmm3/m128","VEX.NDS.128.66.0F.WIG 5C /r","V","V","AVX",""
+"VSUBPD ymm1, ymm2, ymm3/m256","VEX.NDS.256.66.0F.WIG 5C /r","V","V","AVX",""
+"VSUBPS xmm1, xmm2, xmm3/m128","VEX.NDS.128.0F.WIG 5C /r","V","V","AVX",""
+"VSUBPS ymm1, ymm2, ymm3/m256","VEX.NDS.256.0F.WIG 5C /r","V","V","AVX",""
+"VSUBSD xmm1, xmm2, xmm3/m64","VEX.NDS.LIG.F2.0F.WIG 5C /r","V","V","AVX",""
+"VSUBSS xmm1, xmm2, xmm3/m32","VEX.NDS.LIG.F3.0F.WIG 5C /r","V","V","AVX",""
+"VTESTPD xmm1, xmm2/m128","VEX.128.66.0F38.W0 0F /r","V","V","AVX",""
+"VTESTPD ymm1, ymm2/m256","VEX.256.66.0F38.W0 0F /r","V","V","AVX",""
+"VTESTPS xmm1, xmm2/m128","VEX.128.66.0F38.W0 0E /r","V","V","AVX",""
+"VTESTPS ymm1, ymm2/m256","VEX.256.66.0F38.W0 0E /r","V","V","AVX",""
+"VUCOMISD xmm1, xmm2/m64","VEX.LIG.66.0F.WIG 2E /r","V","V","AVX",""
+"VUCOMISS xmm1, xmm2/m32","VEX.LIG.0F.WIG 2E /r","V","V","AVX",""
+"VUNPCKHPD xmm1, xmm2, xmm3/m128","VEX.NDS.128.66.0F.WIG 15 /r","V","V","AVX",""
+"VUNPCKHPD ymm1,ymm2, ymm3/m256","VEX.NDS.256.66.0F.WIG 15 /r","V","V","AVX",""
+"VUNPCKHPS xmm1, xmm2, xmm3/m128","VEX.NDS.128.0F.WIG 15 /r","V","V","AVX",""
+"VUNPCKHPS ymm1,ymm2,ymm3/m256","VEX.NDS.256.0F.WIG 15 /r","V","V","AVX",""
+"VUNPCKLPD xmm1, xmm2, xmm3/m128","VEX.NDS.128.66.0F.WIG 14 /r","V","V","AVX",""
+"VUNPCKLPD ymm1,ymm2, ymm3/m256","VEX.NDS.256.66.0F.WIG 14 /r","V","V","AVX",""
+"VUNPCKLPS xmm1, xmm2, xmm3/m128","VEX.NDS.128.0F.WIG 14 /r","V","V","AVX",""
+"VUNPCKLPS ymm1,ymm2,ymm3/m256","VEX.NDS.256.0F.WIG 14 /r","V","V","AVX",""
+"VXORPD xmm1, xmm2, xmm3/m128","VEX.NDS.128.66.0F.WIG 57 /r","V","V","AVX",""
+"VXORPD ymm1, ymm2, ymm3/m256","VEX.NDS.256.66.0F.WIG 57 /r","V","V","AVX",""
+"VXORPS xmm1, xmm2, xmm3/m128","VEX.NDS.128.0F.WIG 57 /r","V","V","AVX",""
+"VXORPS ymm1, ymm2, ymm3/m256","VEX.NDS.256.0F.WIG 57 /r","V","V","AVX",""
+"VZEROALL","VEX.256.0F.WIG 77","V","V","AVX",""
+"VZEROUPPER","VEX.128.0F.WIG 77","V","V","AVX",""
+"WAIT","9B","V","V","","pseudo"
+"WBINVD","0F 09","V","V","",""
+"WRFSBASE r/m32","F3 0F AE /2","I","V","FSGSBASE","operand16,operand32"
+"WRFSBASE r/m64","REX.W + F3 0F AE /2","I","V","FSGSBASE",""
+"WRGSBASE r/m32","F3 0F AE /3","I","V","FSGSBASE","operand16,operand32"
+"WRGSBASE r/m64","REX.W + F3 0F AE /3","I","V","FSGSBASE",""
+"WRMSR","0F 30","V","V","",""
+"XABORT imm8u","C6 F8 ib","V","V","RTM",""
+"XACQUIRE","F2","V","V","HLE","pseudo"
+"XADD r/m16, r16","0F C1 /r","V","V","","operand16"
+"XADD r/m32, r32","0F C1 /r","V","V","","operand32"
+"XADD r/m64, r64","REX.W + 0F C1 /r","N.E.","V","",""
+"XADD r/m8, r8","0F C0 /r","V","V","",""
+"XADD r/m8, r8","REX + 0F C0 /r","N.E.","V","","pseudo64"
+"XBEGIN rel16","C7 F8 cw","V","V","RTM","operand16"
+"XBEGIN rel32","C7 F8 cd","V","V","RTM","operand32,operand64"
+"XCHG AX, r16op","90+rw","V","V","","pseudo"
+"XCHG EAX, r32op","90+rd","V","V","","pseudo"
+"XCHG RAX, r64op","REX.W + 90+rd","N.E.","V","","pseudo"
+"XCHG r/m16, r16","87 /r","V","V","","operand16"
+"XCHG r/m32, r32","87 /r","V","V","","operand32"
+"XCHG r/m64, r64","REX.W + 87 /r","N.E.","V","",""
+"XCHG r/m8, r8","86 /r","V","V","",""
+"XCHG r/m8, r8","REX + 86 /r","N.E.","V","","pseudo64"
+"XCHG r16op, AX","90+rw","V","V","","operand16"
+"XCHG r16, r/m16","87 /r","V","V","","pseudo"
+"XCHG r32op, EAX","90+rd","V","V","","operand32"
+"XCHG r32, r/m32","87 /r","V","V","","pseudo"
+"XCHG r64op, RAX","REX.W + 90+rd","N.E.","V","",""
+"XCHG r64, r/m64","REX.W + 87 /r","N.E.","V","","pseudo"
+"XCHG r8, r/m8","86 /r","V","V","","pseudo"
+"XCHG r8, r/m8","REX + 86 /r","N.E.","V","","pseudo"
+"XEND","0F 01 D5","V","V","RTM",""
+"XGETBV","0F 01 D0","V","V","",""
+"XLAT m8","D7","V","V","","pseudo"
+"XLATB","D7","V","V","",""
+"XLATB","REX.W + D7","N.E.","V","",""
+"XOR AL, imm8u","34 ib","V","V","",""
+"XOR AX, imm16","35 iw","V","V","","operand16"
+"XOR EAX, imm32","35 id","V","V","","operand32"
+"XOR RAX, imm32","REX.W + 35 id","N.E.","V","",""
+"XOR r/m16, imm16","81 /6 iw","V","V","","operand16"
+"XOR r/m16, imm8","83 /6 ib","V","V","","operand16"
+"XOR r/m16, r16","31 /r","V","V","","operand16"
+"XOR r/m32, imm32","81 /6 id","V","V","","operand32"
+"XOR r/m32, imm8","83 /6 ib","V","V","","operand32"
+"XOR r/m32, r32","31 /r","V","V","","operand32"
+"XOR r/m64, imm32","REX.W + 81 /6 id","N.E.","V","",""
+"XOR r/m64, imm8","REX.W + 83 /6 ib","N.E.","V","",""
+"XOR r/m64, r64","REX.W + 31 /r","N.E.","V","",""
+"XOR r/m8, imm8u","80 /6 ib","V","V","",""
+"XOR r/m8, imm8u","REX + 80 /6 ib","N.E.","V","","pseudo64"
+"XOR r/m8, r8","30 /r","V","V","",""
+"XOR r/m8, r8","REX + 30 /r","N.E.","V","","pseudo64"
+"XOR r16, r/m16","33 /r","V","V","","operand16"
+"XOR r32, r/m32","33 /r","V","V","","operand32"
+"XOR r64, r/m64","REX.W + 33 /r","N.E.","V","",""
+"XOR r8, r/m8","32 /r","V","V","",""
+"XOR r8, r/m8","REX + 32 /r","N.E.","V","","pseudo64"
+"XORPD xmm1, xmm2/m128","66 0F 57 /r","V","V","SSE2",""
+"XORPS xmm1, xmm2/m128","0F 57 /r","V","V","SSE",""
+"XRELEASE","F3","V","V","HLE","pseudo"
+"XRSTOR mem","0F AE /5","V","V","","operand16,operand32"
+"XRSTOR64 mem","REX.W + 0F AE /5","N.E.","V","",""
+"XRSTORS mem","0F C7 /3","V","V","","operand16,operand32"
+"XRSTORS64 mem","REX.W + 0F C7 /3","N.E.","V","",""
+"XSAVE mem","0F AE /4","V","V","","operand16,operand32"
+"XSAVE64 mem","REX.W + 0F AE /4","N.E.","V","",""
+"XSAVEC mem","0F C7 /4","V","V","","operand16,operand32"
+"XSAVEC64 mem","REX.W + 0F C7 /4","N.E.","V","",""
+"XSAVEOPT mem","0F AE /6","V","V","XSAVEOPT","operand16,operand32"
+"XSAVEOPT64 mem","REX.W + 0F AE /6","V","V","XSAVEOPT",""
+"XSAVES mem","0F C7 /5","V","V","","operand16,operand32"
+"XSAVES64 mem","REX.W + 0F C7 /5","N.E.","V","",""
+"XSETBV","0F 01 D1","V","V","",""
+"XTEST","0F 01 D6","V","V","HLE or RTM",""
diff --git a/x86/x86asm/Makefile b/x86/x86asm/Makefile
new file mode 100644
index 0000000..9eb4557
--- /dev/null
+++ b/x86/x86asm/Makefile
@@ -0,0 +1,3 @@
+tables.go: ../x86map/map.go ../x86.csv 
+	go run ../x86map/map.go -fmt=decoder ../x86.csv >_tables.go && gofmt _tables.go >tables.go && rm _tables.go
+
diff --git a/x86/x86asm/decode.go b/x86/x86asm/decode.go
new file mode 100644
index 0000000..e4122c1
--- /dev/null
+++ b/x86/x86asm/decode.go
@@ -0,0 +1,1646 @@
+// Copyright 2014 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Table-driven decoding of x86 instructions.
+
+package x86asm
+
+import (
+	"encoding/binary"
+	"errors"
+	"fmt"
+	"runtime"
+)
+
+// Set trace to true to cause the decoder to print the PC sequence
+// of the executed instruction codes. This is typically only useful
+// when you are running a test of a single input case.
+const trace = false
+
+// A decodeOp is a single instruction in the decoder bytecode program.
+//
+// The decodeOps correspond to consuming and conditionally branching
+// on input bytes, consuming additional fields, and then interpreting
+// consumed data as instruction arguments. The names of the xRead and xArg
+// operations are taken from the Intel manual conventions, for example
+// Volume 2, Section 3.1.1, page 487 of
+// http://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-manual-325462.pdf
+//
+// The actual decoding program is generated by ../x86map.
+//
+// TODO(rsc): We may be able to merge various of the memory operands
+// since we don't care about, say, the distinction between m80dec and m80bcd.
+// Similarly, mm and mm1 have identical meaning, as do xmm and xmm1.
+
+type decodeOp uint16
+
+const (
+	xFail  decodeOp = iota // invalid instruction (return)
+	xMatch                 // completed match
+	xJump                  // jump to pc
+
+	xCondByte     // switch on instruction byte value
+	xCondSlashR   // read and switch on instruction /r value
+	xCondPrefix   // switch on presence of instruction prefix
+	xCondIs64     // switch on 64-bit processor mode
+	xCondDataSize // switch on operand size
+	xCondAddrSize // switch on address size
+	xCondIsMem    // switch on memory vs register argument
+
+	xSetOp // set instruction opcode
+
+	xReadSlashR // read /r
+	xReadIb     // read ib
+	xReadIw     // read iw
+	xReadId     // read id
+	xReadIo     // read io
+	xReadCb     // read cb
+	xReadCw     // read cw
+	xReadCd     // read cd
+	xReadCp     // read cp
+	xReadCm     // read cm
+
+	xArg1            // arg 1
+	xArg3            // arg 3
+	xArgAL           // arg AL
+	xArgAX           // arg AX
+	xArgCL           // arg CL
+	xArgCR0dashCR7   // arg CR0-CR7
+	xArgCS           // arg CS
+	xArgDR0dashDR7   // arg DR0-DR7
+	xArgDS           // arg DS
+	xArgDX           // arg DX
+	xArgEAX          // arg EAX
+	xArgEDX          // arg EDX
+	xArgES           // arg ES
+	xArgFS           // arg FS
+	xArgGS           // arg GS
+	xArgImm16        // arg imm16
+	xArgImm32        // arg imm32
+	xArgImm64        // arg imm64
+	xArgImm8         // arg imm8
+	xArgImm8u        // arg imm8 but record as unsigned
+	xArgImm16u       // arg imm8 but record as unsigned
+	xArgM            // arg m
+	xArgM128         // arg m128
+	xArgM1428byte    // arg m14/28byte
+	xArgM16          // arg m16
+	xArgM16and16     // arg m16&16
+	xArgM16and32     // arg m16&32
+	xArgM16and64     // arg m16&64
+	xArgM16colon16   // arg m16:16
+	xArgM16colon32   // arg m16:32
+	xArgM16colon64   // arg m16:64
+	xArgM16int       // arg m16int
+	xArgM2byte       // arg m2byte
+	xArgM32          // arg m32
+	xArgM32and32     // arg m32&32
+	xArgM32fp        // arg m32fp
+	xArgM32int       // arg m32int
+	xArgM512byte     // arg m512byte
+	xArgM64          // arg m64
+	xArgM64fp        // arg m64fp
+	xArgM64int       // arg m64int
+	xArgM8           // arg m8
+	xArgM80bcd       // arg m80bcd
+	xArgM80dec       // arg m80dec
+	xArgM80fp        // arg m80fp
+	xArgM94108byte   // arg m94/108byte
+	xArgMm           // arg mm
+	xArgMm1          // arg mm1
+	xArgMm2          // arg mm2
+	xArgMm2M64       // arg mm2/m64
+	xArgMmM32        // arg mm/m32
+	xArgMmM64        // arg mm/m64
+	xArgMem          // arg mem
+	xArgMoffs16      // arg moffs16
+	xArgMoffs32      // arg moffs32
+	xArgMoffs64      // arg moffs64
+	xArgMoffs8       // arg moffs8
+	xArgPtr16colon16 // arg ptr16:16
+	xArgPtr16colon32 // arg ptr16:32
+	xArgR16          // arg r16
+	xArgR16op        // arg r16 with +rw in opcode
+	xArgR32          // arg r32
+	xArgR32M16       // arg r32/m16
+	xArgR32M8        // arg r32/m8
+	xArgR32op        // arg r32 with +rd in opcode
+	xArgR64          // arg r64
+	xArgR64M16       // arg r64/m16
+	xArgR64op        // arg r64 with +rd in opcode
+	xArgR8           // arg r8
+	xArgR8op         // arg r8 with +rb in opcode
+	xArgRAX          // arg RAX
+	xArgRDX          // arg RDX
+	xArgRM           // arg r/m
+	xArgRM16         // arg r/m16
+	xArgRM32         // arg r/m32
+	xArgRM64         // arg r/m64
+	xArgRM8          // arg r/m8
+	xArgReg          // arg reg
+	xArgRegM16       // arg reg/m16
+	xArgRegM32       // arg reg/m32
+	xArgRegM8        // arg reg/m8
+	xArgRel16        // arg rel16
+	xArgRel32        // arg rel32
+	xArgRel8         // arg rel8
+	xArgSS           // arg SS
+	xArgST           // arg ST, aka ST(0)
+	xArgSTi          // arg ST(i) with +i in opcode
+	xArgSreg         // arg Sreg
+	xArgTR0dashTR7   // arg TR0-TR7
+	xArgXmm          // arg xmm
+	xArgXMM0         // arg <XMM0>
+	xArgXmm1         // arg xmm1
+	xArgXmm2         // arg xmm2
+	xArgXmm2M128     // arg xmm2/m128
+	xArgXmm2M16      // arg xmm2/m16
+	xArgXmm2M32      // arg xmm2/m32
+	xArgXmm2M64      // arg xmm2/m64
+	xArgXmmM128      // arg xmm/m128
+	xArgXmmM32       // arg xmm/m32
+	xArgXmmM64       // arg xmm/m64
+	xArgRmf16        // arg r/m16 but force mod=3
+	xArgRmf32        // arg r/m32 but force mod=3
+	xArgRmf64        // arg r/m64 but force mod=3
+)
+
+// instPrefix returns an Inst describing just one prefix byte.
+// It is only used if there is a prefix followed by an unintelligible
+// or invalid instruction byte sequence.
+func instPrefix(b byte, mode int) (Inst, error) {
+	// When tracing it is useful to see what called instPrefix to report an error.
+	if trace {
+		_, file, line, _ := runtime.Caller(1)
+		fmt.Printf("%s:%d\n", file, line)
+	}
+	p := Prefix(b)
+	switch p {
+	case PrefixDataSize:
+		if mode == 16 {
+			p = PrefixData32
+		} else {
+			p = PrefixData16
+		}
+	case PrefixAddrSize:
+		if mode == 32 {
+			p = PrefixAddr16
+		} else {
+			p = PrefixAddr32
+		}
+	}
+	// Note: using composite literal with Prefix key confuses 'bundle' tool.
+	inst := Inst{Len: 1}
+	inst.Prefix = Prefixes{p}
+	return inst, nil
+}
+
+// truncated reports a truncated instruction.
+// For now we use instPrefix but perhaps later we will return
+// a specific error here.
+func truncated(src []byte, mode int) (Inst, error) {
+	//	return Inst{}, len(src), ErrTruncated
+	return instPrefix(src[0], mode) // too long
+}
+
+// These are the errors returned by Decode.
+var (
+	ErrInvalidMode  = errors.New("invalid x86 mode in Decode")
+	ErrTruncated    = errors.New("truncated instruction")
+	ErrUnrecognized = errors.New("unrecognized instruction")
+)
+
+// decoderCover records coverage information for which parts
+// of the byte code have been executed.
+// TODO(rsc): This is for testing. Only use this if a flag is given.
+var decoderCover []bool
+
+// Decode decodes the leading bytes in src as a single instruction.
+// The mode arguments specifies the assumed processor mode:
+// 16, 32, or 64 for 16-, 32-, and 64-bit execution modes.
+func Decode(src []byte, mode int) (inst Inst, err error) {
+	return decode1(src, mode, false)
+}
+
+// decode1 is the implementation of Decode but takes an extra
+// gnuCompat flag to cause it to change its behavior to mimic
+// bugs (or at least unique features) of GNU libopcodes as used
+// by objdump. We don't believe that logic is the right thing to do
+// in general, but when testing against libopcodes it simplifies the
+// comparison if we adjust a few small pieces of logic.
+// The affected logic is in the conditional branch for "mandatory" prefixes,
+// case xCondPrefix.
+func decode1(src []byte, mode int, gnuCompat bool) (Inst, error) {
+	switch mode {
+	case 16, 32, 64:
+		// ok
+		// TODO(rsc): 64-bit mode not tested, probably not working.
+	default:
+		return Inst{}, ErrInvalidMode
+	}
+
+	// Maximum instruction size is 15 bytes.
+	// If we need to read more, return 'truncated instruction.
+	if len(src) > 15 {
+		src = src[:15]
+	}
+
+	var (
+		// prefix decoding information
+		pos           = 0    // position reading src
+		nprefix       = 0    // number of prefixes
+		lockIndex     = -1   // index of LOCK prefix in src and inst.Prefix
+		repIndex      = -1   // index of REP/REPN prefix in src and inst.Prefix
+		segIndex      = -1   // index of Group 2 prefix in src and inst.Prefix
+		dataSizeIndex = -1   // index of Group 3 prefix in src and inst.Prefix
+		addrSizeIndex = -1   // index of Group 4 prefix in src and inst.Prefix
+		rex           Prefix // rex byte if present (or 0)
+		rexUsed       Prefix // bits used in rex byte
+		rexIndex      = -1   // index of rex byte
+
+		addrMode = mode // address mode (width in bits)
+		dataMode = mode // operand mode (width in bits)
+
+		// decoded ModR/M fields
+		haveModrm bool
+		modrm     int
+		mod       int
+		regop     int
+		rm        int
+
+		// if ModR/M is memory reference, Mem form
+		mem     Mem
+		haveMem bool
+
+		// decoded SIB fields
+		haveSIB bool
+		sib     int
+		scale   int
+		index   int
+		base    int
+		displen int
+		dispoff int
+
+		// decoded immediate values
+		imm     int64
+		imm8    int8
+		immc    int64
+		immcpos int
+
+		// output
+		opshift int
+		inst    Inst
+		narg    int // number of arguments written to inst
+	)
+
+	if mode == 64 {
+		dataMode = 32
+	}
+
+	// Prefixes are certainly the most complex and underspecified part of
+	// decoding x86 instructions. Although the manuals say things like
+	// up to four prefixes, one from each group, nearly everyone seems to
+	// agree that in practice as many prefixes as possible, including multiple
+	// from a particular group or repetitions of a given prefix, can be used on
+	// an instruction, provided the total instruction length including prefixes
+	// does not exceed the agreed-upon maximum of 15 bytes.
+	// Everyone also agrees that if one of these prefixes is the LOCK prefix
+	// and the instruction is not one of the instructions that can be used with
+	// the LOCK prefix or if the destination is not a memory operand,
+	// then the instruction is invalid and produces the #UD exception.
+	// However, that is the end of any semblance of agreement.
+	//
+	// What happens if prefixes are given that conflict with other prefixes?
+	// For example, the memory segment overrides CS, DS, ES, FS, GS, SS
+	// conflict with each other: only one segment can be in effect.
+	// Disassemblers seem to agree that later prefixes take priority over
+	// earlier ones. I have not taken the time to write assembly programs
+	// to check to see if the hardware agrees.
+	//
+	// What happens if prefixes are given that have no meaning for the
+	// specific instruction to which they are attached? It depends.
+	// If they really have no meaning, they are ignored. However, a future
+	// processor may assign a different meaning. As a disassembler, we
+	// don't really know whether we're seeing a meaningless prefix or one
+	// whose meaning we simply haven't been told yet.
+	//
+	// Combining the two questions, what happens when conflicting
+	// extension prefixes are given? No one seems to know for sure.
+	// For example, MOVQ is 66 0F D6 /r, MOVDQ2Q is F2 0F D6 /r,
+	// and MOVQ2DQ is F3 0F D6 /r. What is '66 F2 F3 0F D6 /r'?
+	// Which prefix wins? See the xCondPrefix prefix for more.
+	//
+	// Writing assembly test cases to divine which interpretation the
+	// CPU uses might clarify the situation, but more likely it would
+	// make the situation even less clear.
+
+	// Read non-REX prefixes.
+ReadPrefixes:
+	for ; pos < len(src); pos++ {
+		p := Prefix(src[pos])
+		switch p {
+		default:
+			nprefix = pos
+			break ReadPrefixes
+
+		// Group 1 - lock and repeat prefixes
+		// According to Intel, there should only be one from this set,
+		// but according to AMD both can be present.
+		case 0xF0:
+			if lockIndex >= 0 {
+				inst.Prefix[lockIndex] |= PrefixIgnored
+			}
+			lockIndex = pos
+		case 0xF2, 0xF3:
+			if repIndex >= 0 {
+				inst.Prefix[repIndex] |= PrefixIgnored
+			}
+			repIndex = pos
+
+		// Group 2 - segment override / branch hints
+		case 0x26, 0x2E, 0x36, 0x3E:
+			if mode == 64 {
+				p |= PrefixIgnored
+				break
+			}
+			fallthrough
+		case 0x64, 0x65:
+			if segIndex >= 0 {
+				inst.Prefix[segIndex] |= PrefixIgnored
+			}
+			segIndex = pos
+
+		// Group 3 - operand size override
+		case 0x66:
+			if mode == 16 {
+				dataMode = 32
+				p = PrefixData32
+			} else {
+				dataMode = 16
+				p = PrefixData16
+			}
+			if dataSizeIndex >= 0 {
+				inst.Prefix[dataSizeIndex] |= PrefixIgnored
+			}
+			dataSizeIndex = pos
+
+		// Group 4 - address size override
+		case 0x67:
+			if mode == 32 {
+				addrMode = 16
+				p = PrefixAddr16
+			} else {
+				addrMode = 32
+				p = PrefixAddr32
+			}
+			if addrSizeIndex >= 0 {
+				inst.Prefix[addrSizeIndex] |= PrefixIgnored
+			}
+			addrSizeIndex = pos
+		}
+
+		if pos >= len(inst.Prefix) {
+			return instPrefix(src[0], mode) // too long
+		}
+
+		inst.Prefix[pos] = p
+	}
+
+	// Read REX prefix.
+	if pos < len(src) && mode == 64 && Prefix(src[pos]).IsREX() {
+		rex = Prefix(src[pos])
+		rexIndex = pos
+		if pos >= len(inst.Prefix) {
+			return instPrefix(src[0], mode) // too long
+		}
+		inst.Prefix[pos] = rex
+		pos++
+		if rex&PrefixREXW != 0 {
+			dataMode = 64
+			if dataSizeIndex >= 0 {
+				inst.Prefix[dataSizeIndex] |= PrefixIgnored
+			}
+		}
+	}
+
+	// Decode instruction stream, interpreting decoding instructions.
+	// opshift gives the shift to use when saving the next
+	// opcode byte into inst.Opcode.
+	opshift = 24
+	if decoderCover == nil {
+		decoderCover = make([]bool, len(decoder))
+	}
+
+	// Decode loop, executing decoder program.
+	var oldPC, prevPC int
+Decode:
+	for pc := 1; ; { // TODO uint
+		oldPC = prevPC
+		prevPC = pc
+		if trace {
+			println("run", pc)
+		}
+		x := decoder[pc]
+		decoderCover[pc] = true
+		pc++
+
+		// Read and decode ModR/M if needed by opcode.
+		switch decodeOp(x) {
+		case xCondSlashR, xReadSlashR:
+			if haveModrm {
+				return Inst{Len: pos}, errInternal
+			}
+			haveModrm = true
+			if pos >= len(src) {
+				return truncated(src, mode)
+			}
+			modrm = int(src[pos])
+			pos++
+			if opshift >= 0 {
+				inst.Opcode |= uint32(modrm) << uint(opshift)
+				opshift -= 8
+			}
+			mod = modrm >> 6
+			regop = (modrm >> 3) & 07
+			rm = modrm & 07
+			if rex&PrefixREXR != 0 {
+				rexUsed |= PrefixREXR
+				regop |= 8
+			}
+			if addrMode == 16 {
+				// 16-bit modrm form
+				if mod != 3 {
+					haveMem = true
+					mem = addr16[rm]
+					if rm == 6 && mod == 0 {
+						mem.Base = 0
+					}
+
+					// Consume disp16 if present.
+					if mod == 0 && rm == 6 || mod == 2 {
+						if pos+2 > len(src) {
+							return truncated(src, mode)
+						}
+						mem.Disp = int64(binary.LittleEndian.Uint16(src[pos:]))
+						pos += 2
+					}
+
+					// Consume disp8 if present.
+					if mod == 1 {
+						if pos >= len(src) {
+							return truncated(src, mode)
+						}
+						mem.Disp = int64(int8(src[pos]))
+						pos++
+					}
+				}
+			} else {
+				haveMem = mod != 3
+
+				// 32-bit or 64-bit form
+				// Consume SIB encoding if present.
+				if rm == 4 && mod != 3 {
+					haveSIB = true
+					if pos >= len(src) {
+						return truncated(src, mode)
+					}
+					sib = int(src[pos])
+					pos++
+					if opshift >= 0 {
+						inst.Opcode |= uint32(sib) << uint(opshift)
+						opshift -= 8
+					}
+					scale = sib >> 6
+					index = (sib >> 3) & 07
+					base = sib & 07
+					if rex&PrefixREXB != 0 {
+						rexUsed |= PrefixREXB
+						base |= 8
+					}
+					if rex&PrefixREXX != 0 {
+						rexUsed |= PrefixREXX
+						index |= 8
+					}
+
+					mem.Scale = 1 << uint(scale)
+					if index == 4 {
+						// no mem.Index
+					} else {
+						mem.Index = baseRegForBits(addrMode) + Reg(index)
+					}
+					if base&7 == 5 && mod == 0 {
+						// no mem.Base
+					} else {
+						mem.Base = baseRegForBits(addrMode) + Reg(base)
+					}
+				} else {
+					if rex&PrefixREXB != 0 {
+						rexUsed |= PrefixREXB
+						rm |= 8
+					}
+					if mod == 0 && rm&7 == 5 || rm&7 == 4 {
+						// base omitted
+					} else if mod != 3 {
+						mem.Base = baseRegForBits(addrMode) + Reg(rm)
+					}
+				}
+
+				// Consume disp32 if present.
+				if mod == 0 && (rm&7 == 5 || haveSIB && base&7 == 5) || mod == 2 {
+					if pos+4 > len(src) {
+						return truncated(src, mode)
+					}
+					dispoff = pos
+					displen = 4
+					mem.Disp = int64(binary.LittleEndian.Uint32(src[pos:]))
+					pos += 4
+				}
+
+				// Consume disp8 if present.
+				if mod == 1 {
+					if pos >= len(src) {
+						return truncated(src, mode)
+					}
+					dispoff = pos
+					displen = 1
+					mem.Disp = int64(int8(src[pos]))
+					pos++
+				}
+
+				// In 64-bit, mod=0 rm=5 is PC-relative instead of just disp.
+				// See Vol 2A. Table 2-7.
+				if mode == 64 && mod == 0 && rm&7 == 5 {
+					if addrMode == 32 {
+						mem.Base = EIP
+					} else {
+						mem.Base = RIP
+					}
+				}
+			}
+
+			if segIndex >= 0 {
+				mem.Segment = prefixToSegment(inst.Prefix[segIndex])
+			}
+		}
+
+		// Execute single opcode.
+		switch decodeOp(x) {
+		default:
+			println("bad op", x, "at", pc-1, "from", oldPC)
+			return Inst{Len: pos}, errInternal
+
+		case xFail:
+			inst.Op = 0
+			break Decode
+
+		case xMatch:
+			break Decode
+
+		case xJump:
+			pc = int(decoder[pc])
+
+		// Conditional branches.
+
+		case xCondByte:
+			if pos >= len(src) {
+				return truncated(src, mode)
+			}
+			b := src[pos]
+			n := int(decoder[pc])
+			pc++
+			for i := 0; i < n; i++ {
+				xb, xpc := decoder[pc], int(decoder[pc+1])
+				pc += 2
+				if b == byte(xb) {
+					pc = xpc
+					pos++
+					if opshift >= 0 {
+						inst.Opcode |= uint32(b) << uint(opshift)
+						opshift -= 8
+					}
+					continue Decode
+				}
+			}
+			// xCondByte is the only conditional with a fall through,
+			// so that it can be used to pick off special cases before
+			// an xCondSlash. If the fallthrough instruction is xFail,
+			// advance the position so that the decoded instruction
+			// size includes the byte we just compared against.
+			if decodeOp(decoder[pc]) == xJump {
+				pc = int(decoder[pc+1])
+			}
+			if decodeOp(decoder[pc]) == xFail {
+				pos++
+			}
+
+		case xCondIs64:
+			if mode == 64 {
+				pc = int(decoder[pc+1])
+			} else {
+				pc = int(decoder[pc])
+			}
+
+		case xCondIsMem:
+			mem := haveMem
+			if !haveModrm {
+				if pos >= len(src) {
+					return instPrefix(src[0], mode) // too long
+				}
+				mem = src[pos]>>6 != 3
+			}
+			if mem {
+				pc = int(decoder[pc+1])
+			} else {
+				pc = int(decoder[pc])
+			}
+
+		case xCondDataSize:
+			switch dataMode {
+			case 16:
+				if dataSizeIndex >= 0 {
+					inst.Prefix[dataSizeIndex] |= PrefixImplicit
+				}
+				pc = int(decoder[pc])
+			case 32:
+				if dataSizeIndex >= 0 {
+					inst.Prefix[dataSizeIndex] |= PrefixImplicit
+				}
+				pc = int(decoder[pc+1])
+			case 64:
+				rexUsed |= PrefixREXW
+				pc = int(decoder[pc+2])
+			}
+
+		case xCondAddrSize:
+			switch addrMode {
+			case 16:
+				if addrSizeIndex >= 0 {
+					inst.Prefix[addrSizeIndex] |= PrefixImplicit
+				}
+				pc = int(decoder[pc])
+			case 32:
+				if addrSizeIndex >= 0 {
+					inst.Prefix[addrSizeIndex] |= PrefixImplicit
+				}
+				pc = int(decoder[pc+1])
+			case 64:
+				pc = int(decoder[pc+2])
+			}
+
+		case xCondPrefix:
+			// Conditional branch based on presence or absence of prefixes.
+			// The conflict cases here are completely undocumented and
+			// differ significantly between GNU libopcodes and Intel xed.
+			// I have not written assembly code to divine what various CPUs
+			// do, but it wouldn't surprise me if they are not consistent either.
+			//
+			// The basic idea is to switch on the presence of a prefix, so that
+			// for example:
+			//
+			//	xCondPrefix, 4
+			//	0xF3, 123,
+			//	0xF2, 234,
+			//	0x66, 345,
+			//	0, 456
+			//
+			// branch to 123 if the F3 prefix is present, 234 if the F2 prefix
+			// is present, 66 if the 345 prefix is present, and 456 otherwise.
+			// The prefixes are given in descending order so that the 0 will be last.
+			//
+			// It is unclear what should happen if multiple conditions are
+			// satisfied: what if F2 and F3 are both present, or if 66 and F2
+			// are present, or if all three are present? The one chosen becomes
+			// part of the opcode and the others do not. Perhaps the answer
+			// depends on the specific opcodes in question.
+			//
+			// The only clear example is that CRC32 is F2 0F 38 F1 /r, and
+			// it comes in 16-bit and 32-bit forms based on the 66 prefix,
+			// so 66 F2 0F 38 F1 /r should be treated as F2 taking priority,
+			// with the 66 being only an operand size override, and probably
+			// F2 66 0F 38 F1 /r should be treated the same.
+			// Perhaps that rule is specific to the case of CRC32, since no
+			// 66 0F 38 F1 instruction is defined (today) (that we know of).
+			// However, both libopcodes and xed seem to generalize this
+			// example and choose F2/F3 in preference to 66, and we
+			// do the same.
+			//
+			// Next, what if both F2 and F3 are present? Which wins?
+			// The Intel xed rule, and ours, is that the one that occurs last wins.
+			// The GNU libopcodes rule, which we implement only in gnuCompat mode,
+			// is that F3 beats F2 unless F3 has no special meaning, in which
+			// case F3 can be a modified on an F2 special meaning.
+			//
+			// Concretely,
+			//	66 0F D6 /r is MOVQ
+			//	F2 0F D6 /r is MOVDQ2Q
+			//	F3 0F D6 /r is MOVQ2DQ.
+			//
+			//	F2 66 0F D6 /r is 66 + MOVDQ2Q always.
+			//	66 F2 0F D6 /r is 66 + MOVDQ2Q always.
+			//	F3 66 0F D6 /r is 66 + MOVQ2DQ always.
+			//	66 F3 0F D6 /r is 66 + MOVQ2DQ always.
+			//	F2 F3 0F D6 /r is F2 + MOVQ2DQ always.
+			//	F3 F2 0F D6 /r is F3 + MOVQ2DQ in Intel xed, but F2 + MOVQ2DQ in GNU libopcodes.
+			//	Adding 66 anywhere in the prefix section of the
+			//	last two cases does not change the outcome.
+			//
+			// Finally, what if there is a variant in which 66 is a mandatory
+			// prefix rather than an operand size override, but we know of
+			// no corresponding F2/F3 form, and we see both F2/F3 and 66.
+			// Does F2/F3 still take priority, so that the result is an unknown
+			// instruction, or does the 66 take priority, so that the extended
+			// 66 instruction should be interpreted as having a REP/REPN prefix?
+			// Intel xed does the former and GNU libopcodes does the latter.
+			// We side with Intel xed, unless we are trying to match libopcodes
+			// more closely during the comparison-based test suite.
+			//
+			// In 64-bit mode REX.W is another valid prefix to test for, but
+			// there is less ambiguity about that. When present, REX.W is
+			// always the first entry in the table.
+			n := int(decoder[pc])
+			pc++
+			sawF3 := false
+			for j := 0; j < n; j++ {
+				prefix := Prefix(decoder[pc+2*j])
+				if prefix.IsREX() {
+					rexUsed |= prefix
+					if rex&prefix == prefix {
+						pc = int(decoder[pc+2*j+1])
+						continue Decode
+					}
+					continue
+				}
+				ok := false
+				if prefix == 0 {
+					ok = true
+				} else if prefix.IsREX() {
+					rexUsed |= prefix
+					if rex&prefix == prefix {
+						ok = true
+					}
+				} else {
+					if prefix == 0xF3 {
+						sawF3 = true
+					}
+					switch prefix {
+					case PrefixLOCK:
+						if lockIndex >= 0 {
+							inst.Prefix[lockIndex] |= PrefixImplicit
+							ok = true
+						}
+					case PrefixREP, PrefixREPN:
+						if repIndex >= 0 && inst.Prefix[repIndex]&0xFF == prefix {
+							inst.Prefix[repIndex] |= PrefixImplicit
+							ok = true
+						}
+						if gnuCompat && !ok && prefix == 0xF3 && repIndex >= 0 && (j+1 >= n || decoder[pc+2*(j+1)] != 0xF2) {
+							// Check to see if earlier prefix F3 is present.
+							for i := repIndex - 1; i >= 0; i-- {
+								if inst.Prefix[i]&0xFF == prefix {
+									inst.Prefix[i] |= PrefixImplicit
+									ok = true
+								}
+							}
+						}
+						if gnuCompat && !ok && prefix == 0xF2 && repIndex >= 0 && !sawF3 && inst.Prefix[repIndex]&0xFF == 0xF3 {
+							// Check to see if earlier prefix F2 is present.
+							for i := repIndex - 1; i >= 0; i-- {
+								if inst.Prefix[i]&0xFF == prefix {
+									inst.Prefix[i] |= PrefixImplicit
+									ok = true
+								}
+							}
+						}
+					case PrefixCS, PrefixDS, PrefixES, PrefixFS, PrefixGS, PrefixSS:
+						if segIndex >= 0 && inst.Prefix[segIndex]&0xFF == prefix {
+							inst.Prefix[segIndex] |= PrefixImplicit
+							ok = true
+						}
+					case PrefixDataSize:
+						// Looking for 66 mandatory prefix.
+						// The F2/F3 mandatory prefixes take priority when both are present.
+						// If we got this far in the xCondPrefix table and an F2/F3 is present,
+						// it means the table didn't have any entry for that prefix. But if 66 has
+						// special meaning, perhaps F2/F3 have special meaning that we don't know.
+						// Intel xed works this way, treating the F2/F3 as inhibiting the 66.
+						// GNU libopcodes allows the 66 to match. We do what Intel xed does
+						// except in gnuCompat mode.
+						if repIndex >= 0 && !gnuCompat {
+							inst.Op = 0
+							break Decode
+						}
+						if dataSizeIndex >= 0 {
+							inst.Prefix[dataSizeIndex] |= PrefixImplicit
+							ok = true
+						}
+					case PrefixAddrSize:
+						if addrSizeIndex >= 0 {
+							inst.Prefix[addrSizeIndex] |= PrefixImplicit
+							ok = true
+						}
+					}
+				}
+				if ok {
+					pc = int(decoder[pc+2*j+1])
+					continue Decode
+				}
+			}
+			inst.Op = 0
+			break Decode
+
+		case xCondSlashR:
+			pc = int(decoder[pc+regop&7])
+
+		// Input.
+
+		case xReadSlashR:
+			// done above
+
+		case xReadIb:
+			if pos >= len(src) {
+				return truncated(src, mode)
+			}
+			imm8 = int8(src[pos])
+			pos++
+
+		case xReadIw:
+			if pos+2 > len(src) {
+				return truncated(src, mode)
+			}
+			imm = int64(binary.LittleEndian.Uint16(src[pos:]))
+			pos += 2
+
+		case xReadId:
+			if pos+4 > len(src) {
+				return truncated(src, mode)
+			}
+			imm = int64(binary.LittleEndian.Uint32(src[pos:]))
+			pos += 4
+
+		case xReadIo:
+			if pos+8 > len(src) {
+				return truncated(src, mode)
+			}
+			imm = int64(binary.LittleEndian.Uint64(src[pos:]))
+			pos += 8
+
+		case xReadCb:
+			if pos >= len(src) {
+				return truncated(src, mode)
+			}
+			immcpos = pos
+			immc = int64(src[pos])
+			pos++
+
+		case xReadCw:
+			if pos+2 > len(src) {
+				return truncated(src, mode)
+			}
+			immcpos = pos
+			immc = int64(binary.LittleEndian.Uint16(src[pos:]))
+			pos += 2
+
+		case xReadCm:
+			immcpos = pos
+			if addrMode == 16 {
+				if pos+2 > len(src) {
+					return truncated(src, mode)
+				}
+				immc = int64(binary.LittleEndian.Uint16(src[pos:]))
+				pos += 2
+			} else if addrMode == 32 {
+				if pos+4 > len(src) {
+					return truncated(src, mode)
+				}
+				immc = int64(binary.LittleEndian.Uint32(src[pos:]))
+				pos += 4
+			} else {
+				if pos+8 > len(src) {
+					return truncated(src, mode)
+				}
+				immc = int64(binary.LittleEndian.Uint64(src[pos:]))
+				pos += 8
+			}
+		case xReadCd:
+			immcpos = pos
+			if pos+4 > len(src) {
+				return truncated(src, mode)
+			}
+			immc = int64(binary.LittleEndian.Uint32(src[pos:]))
+			pos += 4
+
+		case xReadCp:
+			immcpos = pos
+			if pos+6 > len(src) {
+				return truncated(src, mode)
+			}
+			w := binary.LittleEndian.Uint32(src[pos:])
+			w2 := binary.LittleEndian.Uint16(src[pos+4:])
+			immc = int64(w2)<<32 | int64(w)
+			pos += 6
+
+		// Output.
+
+		case xSetOp:
+			inst.Op = Op(decoder[pc])
+			pc++
+
+		case xArg1,
+			xArg3,
+			xArgAL,
+			xArgAX,
+			xArgCL,
+			xArgCS,
+			xArgDS,
+			xArgDX,
+			xArgEAX,
+			xArgEDX,
+			xArgES,
+			xArgFS,
+			xArgGS,
+			xArgRAX,
+			xArgRDX,
+			xArgSS,
+			xArgST,
+			xArgXMM0:
+			inst.Args[narg] = fixedArg[x]
+			narg++
+
+		case xArgImm8:
+			inst.Args[narg] = Imm(imm8)
+			narg++
+
+		case xArgImm8u:
+			inst.Args[narg] = Imm(uint8(imm8))
+			narg++
+
+		case xArgImm16:
+			inst.Args[narg] = Imm(int16(imm))
+			narg++
+
+		case xArgImm16u:
+			inst.Args[narg] = Imm(uint16(imm))
+			narg++
+
+		case xArgImm32:
+			inst.Args[narg] = Imm(int32(imm))
+			narg++
+
+		case xArgImm64:
+			inst.Args[narg] = Imm(imm)
+			narg++
+
+		case xArgM,
+			xArgM128,
+			xArgM1428byte,
+			xArgM16,
+			xArgM16and16,
+			xArgM16and32,
+			xArgM16and64,
+			xArgM16colon16,
+			xArgM16colon32,
+			xArgM16colon64,
+			xArgM16int,
+			xArgM2byte,
+			xArgM32,
+			xArgM32and32,
+			xArgM32fp,
+			xArgM32int,
+			xArgM512byte,
+			xArgM64,
+			xArgM64fp,
+			xArgM64int,
+			xArgM8,
+			xArgM80bcd,
+			xArgM80dec,
+			xArgM80fp,
+			xArgM94108byte,
+			xArgMem:
+			if !haveMem {
+				inst.Op = 0
+				break Decode
+			}
+			inst.Args[narg] = mem
+			inst.MemBytes = int(memBytes[decodeOp(x)])
+			if mem.Base == RIP {
+				inst.PCRel = displen
+				inst.PCRelOff = dispoff
+			}
+			narg++
+
+		case xArgPtr16colon16:
+			inst.Args[narg] = Imm(immc >> 16)
+			inst.Args[narg+1] = Imm(immc & (1<<16 - 1))
+			narg += 2
+
+		case xArgPtr16colon32:
+			inst.Args[narg] = Imm(immc >> 32)
+			inst.Args[narg+1] = Imm(immc & (1<<32 - 1))
+			narg += 2
+
+		case xArgMoffs8, xArgMoffs16, xArgMoffs32, xArgMoffs64:
+			// TODO(rsc): Can address be 64 bits?
+			mem = Mem{Disp: int64(immc)}
+			if segIndex >= 0 {
+				mem.Segment = prefixToSegment(inst.Prefix[segIndex])
+				inst.Prefix[segIndex] |= PrefixImplicit
+			}
+			inst.Args[narg] = mem
+			inst.MemBytes = int(memBytes[decodeOp(x)])
+			if mem.Base == RIP {
+				inst.PCRel = displen
+				inst.PCRelOff = dispoff
+			}
+			narg++
+
+		case xArgR8, xArgR16, xArgR32, xArgR64, xArgXmm, xArgXmm1, xArgDR0dashDR7:
+			base := baseReg[x]
+			index := Reg(regop)
+			if rex != 0 && base == AL && index >= 4 {
+				rexUsed |= PrefixREX
+				index -= 4
+				base = SPB
+			}
+			inst.Args[narg] = base + index
+			narg++
+
+		case xArgMm, xArgMm1, xArgTR0dashTR7:
+			inst.Args[narg] = baseReg[x] + Reg(regop&7)
+			narg++
+
+		case xArgCR0dashCR7:
+			// AMD documents an extension that the LOCK prefix
+			// can be used in place of a REX prefix in order to access
+			// CR8 from 32-bit mode. The LOCK prefix is allowed in
+			// all modes, provided the corresponding CPUID bit is set.
+			if lockIndex >= 0 {
+				inst.Prefix[lockIndex] |= PrefixImplicit
+				regop += 8
+			}
+			inst.Args[narg] = CR0 + Reg(regop)
+			narg++
+
+		case xArgSreg:
+			regop &= 7
+			if regop >= 6 {
+				inst.Op = 0
+				break Decode
+			}
+			inst.Args[narg] = ES + Reg(regop)
+			narg++
+
+		case xArgRmf16, xArgRmf32, xArgRmf64:
+			base := baseReg[x]
+			index := Reg(modrm & 07)
+			if rex&PrefixREXB != 0 {
+				rexUsed |= PrefixREXB
+				index += 8
+			}
+			inst.Args[narg] = base + index
+			narg++
+
+		case xArgR8op, xArgR16op, xArgR32op, xArgR64op, xArgSTi:
+			n := inst.Opcode >> uint(opshift+8) & 07
+			base := baseReg[x]
+			index := Reg(n)
+			if rex&PrefixREXB != 0 && decodeOp(x) != xArgSTi {
+				rexUsed |= PrefixREXB
+				index += 8
+			}
+			if rex != 0 && base == AL && index >= 4 {
+				rexUsed |= PrefixREX
+				index -= 4
+				base = SPB
+			}
+			inst.Args[narg] = base + index
+			narg++
+
+		case xArgRM8, xArgRM16, xArgRM32, xArgRM64, xArgR32M16, xArgR32M8, xArgR64M16,
+			xArgMmM32, xArgMmM64, xArgMm2M64,
+			xArgXmm2M16, xArgXmm2M32, xArgXmm2M64, xArgXmmM64, xArgXmmM128, xArgXmmM32, xArgXmm2M128:
+			if haveMem {
+				inst.Args[narg] = mem
+				inst.MemBytes = int(memBytes[decodeOp(x)])
+				if mem.Base == RIP {
+					inst.PCRel = displen
+					inst.PCRelOff = dispoff
+				}
+			} else {
+				base := baseReg[x]
+				index := Reg(rm)
+				switch decodeOp(x) {
+				case xArgMmM32, xArgMmM64, xArgMm2M64:
+					// There are only 8 MMX registers, so these ignore the REX.X bit.
+					index &= 7
+				case xArgRM8:
+					if rex != 0 && index >= 4 {
+						rexUsed |= PrefixREX
+						index -= 4
+						base = SPB
+					}
+				}
+				inst.Args[narg] = base + index
+			}
+			narg++
+
+		case xArgMm2: // register only; TODO(rsc): Handle with tag modrm_regonly tag
+			if haveMem {
+				inst.Op = 0
+				break Decode
+			}
+			inst.Args[narg] = baseReg[x] + Reg(rm&7)
+			narg++
+
+		case xArgXmm2: // register only; TODO(rsc): Handle with tag modrm_regonly tag
+			if haveMem {
+				inst.Op = 0
+				break Decode
+			}
+			inst.Args[narg] = baseReg[x] + Reg(rm)
+			narg++
+
+		case xArgRel8:
+			inst.PCRelOff = immcpos
+			inst.PCRel = 1
+			inst.Args[narg] = Rel(int8(immc))
+			narg++
+
+		case xArgRel16:
+			inst.PCRelOff = immcpos
+			inst.PCRel = 2
+			inst.Args[narg] = Rel(int16(immc))
+			narg++
+
+		case xArgRel32:
+			inst.PCRelOff = immcpos
+			inst.PCRel = 4
+			inst.Args[narg] = Rel(int32(immc))
+			narg++
+		}
+	}
+
+	if inst.Op == 0 {
+		// Invalid instruction.
+		if nprefix > 0 {
+			return instPrefix(src[0], mode) // invalid instruction
+		}
+		return Inst{Len: pos}, ErrUnrecognized
+	}
+
+	// Matched! Hooray!
+
+	// 90 decodes as XCHG EAX, EAX but is NOP.
+	// 66 90 decodes as XCHG AX, AX and is NOP too.
+	// 48 90 decodes as XCHG RAX, RAX and is NOP too.
+	// 43 90 decodes as XCHG R8D, EAX and is *not* NOP.
+	// F3 90 decodes as REP XCHG EAX, EAX but is PAUSE.
+	// It's all too special to handle in the decoding tables, at least for now.
+	if inst.Op == XCHG && inst.Opcode>>24 == 0x90 {
+		if inst.Args[0] == RAX || inst.Args[0] == EAX || inst.Args[0] == AX {
+			inst.Op = NOP
+			if dataSizeIndex >= 0 {
+				inst.Prefix[dataSizeIndex] &^= PrefixImplicit
+			}
+			inst.Args[0] = nil
+			inst.Args[1] = nil
+		}
+		if repIndex >= 0 && inst.Prefix[repIndex] == 0xF3 {
+			inst.Prefix[repIndex] |= PrefixImplicit
+			inst.Op = PAUSE
+			inst.Args[0] = nil
+			inst.Args[1] = nil
+		} else if gnuCompat {
+			for i := nprefix - 1; i >= 0; i-- {
+				if inst.Prefix[i]&0xFF == 0xF3 {
+					inst.Prefix[i] |= PrefixImplicit
+					inst.Op = PAUSE
+					inst.Args[0] = nil
+					inst.Args[1] = nil
+					break
+				}
+			}
+		}
+	}
+
+	// defaultSeg returns the default segment for an implicit
+	// memory reference: the final override if present, or else DS.
+	defaultSeg := func() Reg {
+		if segIndex >= 0 {
+			inst.Prefix[segIndex] |= PrefixImplicit
+			return prefixToSegment(inst.Prefix[segIndex])
+		}
+		return DS
+	}
+
+	// Add implicit arguments not present in the tables.
+	// Normally we shy away from making implicit arguments explicit,
+	// following the Intel manuals, but adding the arguments seems
+	// the best way to express the effect of the segment override prefixes.
+	// TODO(rsc): Perhaps add these to the tables and
+	// create bytecode instructions for them.
+	usedAddrSize := false
+	switch inst.Op {
+	case INSB, INSW, INSD:
+		inst.Args[0] = Mem{Segment: ES, Base: baseRegForBits(addrMode) + DI - AX}
+		inst.Args[1] = DX
+		usedAddrSize = true
+
+	case OUTSB, OUTSW, OUTSD:
+		inst.Args[0] = DX
+		inst.Args[1] = Mem{Segment: defaultSeg(), Base: baseRegForBits(addrMode) + SI - AX}
+		usedAddrSize = true
+
+	case MOVSB, MOVSW, MOVSD, MOVSQ:
+		inst.Args[0] = Mem{Segment: ES, Base: baseRegForBits(addrMode) + DI - AX}
+		inst.Args[1] = Mem{Segment: defaultSeg(), Base: baseRegForBits(addrMode) + SI - AX}
+		usedAddrSize = true
+
+	case CMPSB, CMPSW, CMPSD, CMPSQ:
+		inst.Args[0] = Mem{Segment: defaultSeg(), Base: baseRegForBits(addrMode) + SI - AX}
+		inst.Args[1] = Mem{Segment: ES, Base: baseRegForBits(addrMode) + DI - AX}
+		usedAddrSize = true
+
+	case LODSB, LODSW, LODSD, LODSQ:
+		switch inst.Op {
+		case LODSB:
+			inst.Args[0] = AL
+		case LODSW:
+			inst.Args[0] = AX
+		case LODSD:
+			inst.Args[0] = EAX
+		case LODSQ:
+			inst.Args[0] = RAX
+		}
+		inst.Args[1] = Mem{Segment: defaultSeg(), Base: baseRegForBits(addrMode) + SI - AX}
+		usedAddrSize = true
+
+	case STOSB, STOSW, STOSD, STOSQ:
+		inst.Args[0] = Mem{Segment: ES, Base: baseRegForBits(addrMode) + DI - AX}
+		switch inst.Op {
+		case STOSB:
+			inst.Args[1] = AL
+		case STOSW:
+			inst.Args[1] = AX
+		case STOSD:
+			inst.Args[1] = EAX
+		case STOSQ:
+			inst.Args[1] = RAX
+		}
+		usedAddrSize = true
+
+	case SCASB, SCASW, SCASD, SCASQ:
+		inst.Args[1] = Mem{Segment: ES, Base: baseRegForBits(addrMode) + DI - AX}
+		switch inst.Op {
+		case SCASB:
+			inst.Args[0] = AL
+		case SCASW:
+			inst.Args[0] = AX
+		case SCASD:
+			inst.Args[0] = EAX
+		case SCASQ:
+			inst.Args[0] = RAX
+		}
+		usedAddrSize = true
+
+	case XLATB:
+		inst.Args[0] = Mem{Segment: defaultSeg(), Base: baseRegForBits(addrMode) + BX - AX}
+		usedAddrSize = true
+	}
+
+	// If we used the address size annotation to construct the
+	// argument list, mark that prefix as implicit: it doesn't need
+	// to be shown when printing the instruction.
+	if haveMem || usedAddrSize {
+		if addrSizeIndex >= 0 {
+			inst.Prefix[addrSizeIndex] |= PrefixImplicit
+		}
+	}
+
+	// Similarly, if there's some memory operand, the segment
+	// will be shown there and doesn't need to be shown as an
+	// explicit prefix.
+	if haveMem {
+		if segIndex >= 0 {
+			inst.Prefix[segIndex] |= PrefixImplicit
+		}
+	}
+
+	// Branch predict prefixes are overloaded segment prefixes,
+	// since segment prefixes don't make sense on conditional jumps.
+	// Rewrite final instance to prediction prefix.
+	// The set of instructions to which the prefixes apply (other then the
+	// Jcc conditional jumps) is not 100% clear from the manuals, but
+	// the disassemblers seem to agree about the LOOP and JCXZ instructions,
+	// so we'll follow along.
+	// TODO(rsc): Perhaps this instruction class should be derived from the CSV.
+	if isCondJmp[inst.Op] || isLoop[inst.Op] || inst.Op == JCXZ || inst.Op == JECXZ || inst.Op == JRCXZ {
+	PredictLoop:
+		for i := nprefix - 1; i >= 0; i-- {
+			p := inst.Prefix[i]
+			switch p & 0xFF {
+			case PrefixCS:
+				inst.Prefix[i] = PrefixPN
+				break PredictLoop
+			case PrefixDS:
+				inst.Prefix[i] = PrefixPT
+				break PredictLoop
+			}
+		}
+	}
+
+	// The BND prefix is part of the Intel Memory Protection Extensions (MPX).
+	// A REPN applied to certain control transfers is a BND prefix to bound
+	// the range of possible destinations. There's surprisingly little documentation
+	// about this, so we just do what libopcodes and xed agree on.
+	// In particular, it's unclear why a REPN applied to LOOP or JCXZ instructions
+	// does not turn into a BND.
+	// TODO(rsc): Perhaps this instruction class should be derived from the CSV.
+	if isCondJmp[inst.Op] || inst.Op == JMP || inst.Op == CALL || inst.Op == RET {
+		for i := nprefix - 1; i >= 0; i-- {
+			p := inst.Prefix[i]
+			if p&^PrefixIgnored == PrefixREPN {
+				inst.Prefix[i] = PrefixBND
+				break
+			}
+		}
+	}
+
+	// The LOCK prefix only applies to certain instructions, and then only
+	// to instances of the instruction with a memory destination.
+	// Other uses of LOCK are invalid and cause a processor exception,
+	// in contrast to the "just ignore it" spirit applied to all other prefixes.
+	// Mark invalid lock prefixes.
+	hasLock := false
+	if lockIndex >= 0 && inst.Prefix[lockIndex]&PrefixImplicit == 0 {
+		switch inst.Op {
+		// TODO(rsc): Perhaps this instruction class should be derived from the CSV.
+		case ADD, ADC, AND, BTC, BTR, BTS, CMPXCHG, CMPXCHG8B, CMPXCHG16B, DEC, INC, NEG, NOT, OR, SBB, SUB, XOR, XADD, XCHG:
+			if isMem(inst.Args[0]) {
+				hasLock = true
+				break
+			}
+			fallthrough
+		default:
+			inst.Prefix[lockIndex] |= PrefixInvalid
+		}
+	}
+
+	// In certain cases, all of which require a memory destination,
+	// the REPN and REP prefixes are interpreted as XACQUIRE and XRELEASE
+	// from the Intel Transactional Synchroniation Extensions (TSX).
+	//
+	// The specific rules are:
+	// (1) Any instruction with a valid LOCK prefix can have XACQUIRE or XRELEASE.
+	// (2) Any XCHG, which always has an implicit LOCK, can have XACQUIRE or XRELEASE.
+	// (3) Any 0x88-, 0x89-, 0xC6-, or 0xC7-opcode MOV can have XRELEASE.
+	if isMem(inst.Args[0]) {
+		if inst.Op == XCHG {
+			hasLock = true
+		}
+
+		for i := len(inst.Prefix) - 1; i >= 0; i-- {
+			p := inst.Prefix[i] &^ PrefixIgnored
+			switch p {
+			case PrefixREPN:
+				if hasLock {
+					inst.Prefix[i] = inst.Prefix[i]&PrefixIgnored | PrefixXACQUIRE
+				}
+
+			case PrefixREP:
+				if hasLock {
+					inst.Prefix[i] = inst.Prefix[i]&PrefixIgnored | PrefixXRELEASE
+				}
+
+				if inst.Op == MOV {
+					op := (inst.Opcode >> 24) &^ 1
+					if op == 0x88 || op == 0xC6 {
+						inst.Prefix[i] = inst.Prefix[i]&PrefixIgnored | PrefixXRELEASE
+					}
+				}
+			}
+		}
+	}
+
+	// If REP is used on a non-REP-able instruction, mark the prefix as ignored.
+	if repIndex >= 0 {
+		switch inst.Prefix[repIndex] {
+		case PrefixREP, PrefixREPN:
+			switch inst.Op {
+			// According to the manuals, the REP/REPE prefix applies to all of these,
+			// while the REPN applies only to some of them. However, both libopcodes
+			// and xed show both prefixes explicitly for all instructions, so we do the same.
+			// TODO(rsc): Perhaps this instruction class should be derived from the CSV.
+			case INSB, INSW, INSD,
+				MOVSB, MOVSW, MOVSD, MOVSQ,
+				OUTSB, OUTSW, OUTSD,
+				LODSB, LODSW, LODSD, LODSQ,
+				CMPSB, CMPSW, CMPSD, CMPSQ,
+				SCASB, SCASW, SCASD, SCASQ,
+				STOSB, STOSW, STOSD, STOSQ:
+				// ok
+			default:
+				inst.Prefix[repIndex] |= PrefixIgnored
+			}
+		}
+	}
+
+	// If REX was present, mark implicit if all the 1 bits were consumed.
+	if rexIndex >= 0 {
+		if rexUsed != 0 {
+			rexUsed |= PrefixREX
+		}
+		if rex&^rexUsed == 0 {
+			inst.Prefix[rexIndex] |= PrefixImplicit
+		}
+	}
+
+	inst.DataSize = dataMode
+	inst.AddrSize = addrMode
+	inst.Mode = mode
+	inst.Len = pos
+	return inst, nil
+}
+
+var errInternal = errors.New("internal error")
+
+// addr16 records the eight 16-bit addressing modes.
+var addr16 = [8]Mem{
+	{Base: BX, Scale: 1, Index: SI},
+	{Base: BX, Scale: 1, Index: DI},
+	{Base: BP, Scale: 1, Index: SI},
+	{Base: BP, Scale: 1, Index: DI},
+	{Base: SI},
+	{Base: DI},
+	{Base: BP},
+	{Base: BX},
+}
+
+// baseReg returns the base register for a given register size in bits.
+func baseRegForBits(bits int) Reg {
+	switch bits {
+	case 8:
+		return AL
+	case 16:
+		return AX
+	case 32:
+		return EAX
+	case 64:
+		return RAX
+	}
+	return 0
+}
+
+// baseReg records the base register for argument types that specify
+// a range of registers indexed by op, regop, or rm.
+var baseReg = [...]Reg{
+	xArgDR0dashDR7: DR0,
+	xArgMm1:        M0,
+	xArgMm2:        M0,
+	xArgMm2M64:     M0,
+	xArgMm:         M0,
+	xArgMmM32:      M0,
+	xArgMmM64:      M0,
+	xArgR16:        AX,
+	xArgR16op:      AX,
+	xArgR32:        EAX,
+	xArgR32M16:     EAX,
+	xArgR32M8:      EAX,
+	xArgR32op:      EAX,
+	xArgR64:        RAX,
+	xArgR64M16:     RAX,
+	xArgR64op:      RAX,
+	xArgR8:         AL,
+	xArgR8op:       AL,
+	xArgRM16:       AX,
+	xArgRM32:       EAX,
+	xArgRM64:       RAX,
+	xArgRM8:        AL,
+	xArgRmf16:      AX,
+	xArgRmf32:      EAX,
+	xArgRmf64:      RAX,
+	xArgSTi:        F0,
+	xArgTR0dashTR7: TR0,
+	xArgXmm1:       X0,
+	xArgXmm2:       X0,
+	xArgXmm2M128:   X0,
+	xArgXmm2M16:    X0,
+	xArgXmm2M32:    X0,
+	xArgXmm2M64:    X0,
+	xArgXmm:        X0,
+	xArgXmmM128:    X0,
+	xArgXmmM32:     X0,
+	xArgXmmM64:     X0,
+}
+
+// prefixToSegment returns the segment register
+// corresponding to a particular segment prefix.
+func prefixToSegment(p Prefix) Reg {
+	switch p &^ PrefixImplicit {
+	case PrefixCS:
+		return CS
+	case PrefixDS:
+		return DS
+	case PrefixES:
+		return ES
+	case PrefixFS:
+		return FS
+	case PrefixGS:
+		return GS
+	case PrefixSS:
+		return SS
+	}
+	return 0
+}
+
+// fixedArg records the fixed arguments corresponding to the given bytecodes.
+var fixedArg = [...]Arg{
+	xArg1:    Imm(1),
+	xArg3:    Imm(3),
+	xArgAL:   AL,
+	xArgAX:   AX,
+	xArgDX:   DX,
+	xArgEAX:  EAX,
+	xArgEDX:  EDX,
+	xArgRAX:  RAX,
+	xArgRDX:  RDX,
+	xArgCL:   CL,
+	xArgCS:   CS,
+	xArgDS:   DS,
+	xArgES:   ES,
+	xArgFS:   FS,
+	xArgGS:   GS,
+	xArgSS:   SS,
+	xArgST:   F0,
+	xArgXMM0: X0,
+}
+
+// memBytes records the size of the memory pointed at
+// by a memory argument of the given form.
+var memBytes = [...]int8{
+	xArgM128:       128 / 8,
+	xArgM16:        16 / 8,
+	xArgM16and16:   (16 + 16) / 8,
+	xArgM16colon16: (16 + 16) / 8,
+	xArgM16colon32: (16 + 32) / 8,
+	xArgM16int:     16 / 8,
+	xArgM2byte:     2,
+	xArgM32:        32 / 8,
+	xArgM32and32:   (32 + 32) / 8,
+	xArgM32fp:      32 / 8,
+	xArgM32int:     32 / 8,
+	xArgM64:        64 / 8,
+	xArgM64fp:      64 / 8,
+	xArgM64int:     64 / 8,
+	xArgMm2M64:     64 / 8,
+	xArgMmM32:      32 / 8,
+	xArgMmM64:      64 / 8,
+	xArgMoffs16:    16 / 8,
+	xArgMoffs32:    32 / 8,
+	xArgMoffs64:    64 / 8,
+	xArgMoffs8:     8 / 8,
+	xArgR32M16:     16 / 8,
+	xArgR32M8:      8 / 8,
+	xArgR64M16:     16 / 8,
+	xArgRM16:       16 / 8,
+	xArgRM32:       32 / 8,
+	xArgRM64:       64 / 8,
+	xArgRM8:        8 / 8,
+	xArgXmm2M128:   128 / 8,
+	xArgXmm2M16:    16 / 8,
+	xArgXmm2M32:    32 / 8,
+	xArgXmm2M64:    64 / 8,
+	xArgXmm:        128 / 8,
+	xArgXmmM128:    128 / 8,
+	xArgXmmM32:     32 / 8,
+	xArgXmmM64:     64 / 8,
+}
+
+// isCondJmp records the conditional jumps.
+var isCondJmp = [maxOp + 1]bool{
+	JA:  true,
+	JAE: true,
+	JB:  true,
+	JBE: true,
+	JE:  true,
+	JG:  true,
+	JGE: true,
+	JL:  true,
+	JLE: true,
+	JNE: true,
+	JNO: true,
+	JNP: true,
+	JNS: true,
+	JO:  true,
+	JP:  true,
+	JS:  true,
+}
+
+// isLoop records the loop operators.
+var isLoop = [maxOp + 1]bool{
+	LOOP:   true,
+	LOOPE:  true,
+	LOOPNE: true,
+	JECXZ:  true,
+	JRCXZ:  true,
+}
diff --git a/x86/x86asm/decode_test.go b/x86/x86asm/decode_test.go
new file mode 100644
index 0000000..7db2460
--- /dev/null
+++ b/x86/x86asm/decode_test.go
@@ -0,0 +1,71 @@
+// Copyright 2014 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package x86asm
+
+import (
+	"encoding/hex"
+	"io/ioutil"
+	"strconv"
+	"strings"
+	"testing"
+)
+
+func TestDecode(t *testing.T) {
+	data, err := ioutil.ReadFile("testdata/decode.txt")
+	if err != nil {
+		t.Fatal(err)
+	}
+	all := string(data)
+	for strings.Contains(all, "\t\t") {
+		all = strings.Replace(all, "\t\t", "\t", -1)
+	}
+	for _, line := range strings.Split(all, "\n") {
+		line = strings.TrimSpace(line)
+		if line == "" || strings.HasPrefix(line, "#") {
+			continue
+		}
+		f := strings.SplitN(line, "\t", 4)
+		i := strings.Index(f[0], "|")
+		if i < 0 {
+			t.Errorf("parsing %q: missing | separator", f[0])
+			continue
+		}
+		if i%2 != 0 {
+			t.Errorf("parsing %q: misaligned | separator", f[0])
+		}
+		size := i / 2
+		code, err := hex.DecodeString(f[0][:i] + f[0][i+1:])
+		if err != nil {
+			t.Errorf("parsing %q: %v", f[0], err)
+			continue
+		}
+		mode, err := strconv.Atoi(f[1])
+		if err != nil {
+			t.Errorf("invalid mode %q in: %s", f[1], line)
+			continue
+		}
+		syntax, asm := f[2], f[3]
+		inst, err := Decode(code, mode)
+		var out string
+		if err != nil {
+			out = "error: " + err.Error()
+		} else {
+			switch syntax {
+			case "gnu":
+				out = GNUSyntax(inst)
+			case "intel":
+				out = IntelSyntax(inst)
+			case "plan9":
+				out = plan9Syntax(inst, 0, nil)
+			default:
+				t.Errorf("unknown syntax %q", syntax)
+				continue
+			}
+		}
+		if out != asm || inst.Len != size {
+			t.Errorf("Decode(%s) [%s] = %s, %d, want %s, %d", f[0], syntax, out, inst.Len, asm, size)
+		}
+	}
+}
diff --git a/x86/x86asm/ext_test.go b/x86/x86asm/ext_test.go
new file mode 100644
index 0000000..8c1e00e
--- /dev/null
+++ b/x86/x86asm/ext_test.go
@@ -0,0 +1,811 @@
+// Copyright 2014 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Support for testing against external disassembler program.
+
+package x86asm
+
+import (
+	"bufio"
+	"bytes"
+	"encoding/hex"
+	"flag"
+	"fmt"
+	"io/ioutil"
+	"log"
+	"math/rand"
+	"os"
+	"os/exec"
+	"regexp"
+	"runtime"
+	"strings"
+	"testing"
+	"time"
+)
+
+var (
+	printTests = flag.Bool("printtests", false, "print test cases that exercise new code paths")
+	dumpTest   = flag.Bool("dump", false, "dump all encodings")
+	mismatch   = flag.Bool("mismatch", false, "log allowed mismatches")
+	longTest   = flag.Bool("long", false, "long test")
+	keep       = flag.Bool("keep", false, "keep object files around")
+	debug      = false
+)
+
+// A ExtInst represents a single decoded instruction parsed
+// from an external disassembler's output.
+type ExtInst struct {
+	addr uint32
+	enc  [32]byte
+	nenc int
+	text string
+}
+
+func (r ExtInst) String() string {
+	return fmt.Sprintf("%#x: % x: %s", r.addr, r.enc, r.text)
+}
+
+// An ExtDis is a connection between an external disassembler and a test.
+type ExtDis struct {
+	Arch     int
+	Dec      chan ExtInst
+	File     *os.File
+	Size     int
+	KeepFile bool
+	Cmd      *exec.Cmd
+}
+
+// Run runs the given command - the external disassembler - and returns
+// a buffered reader of its standard output.
+func (ext *ExtDis) Run(cmd ...string) (*bufio.Reader, error) {
+	if *keep {
+		log.Printf("%s\n", strings.Join(cmd, " "))
+	}
+	ext.Cmd = exec.Command(cmd[0], cmd[1:]...)
+	out, err := ext.Cmd.StdoutPipe()
+	if err != nil {
+		return nil, fmt.Errorf("stdoutpipe: %v", err)
+	}
+	if err := ext.Cmd.Start(); err != nil {
+		return nil, fmt.Errorf("exec: %v", err)
+	}
+
+	b := bufio.NewReaderSize(out, 1<<20)
+	return b, nil
+}
+
+// Wait waits for the command started with Run to exit.
+func (ext *ExtDis) Wait() error {
+	return ext.Cmd.Wait()
+}
+
+// testExtDis tests a set of byte sequences against an external disassembler.
+// The disassembler is expected to produce the given syntax and be run
+// in the given architecture mode (16, 32, or 64-bit).
+// The extdis function must start the external disassembler
+// and then parse its output, sending the parsed instructions on ext.Dec.
+// The generate function calls its argument f once for each byte sequence
+// to be tested. The generate function itself will be called twice, and it must
+// make the same sequence of calls to f each time.
+// When a disassembly does not match the internal decoding,
+// allowedMismatch determines whether this mismatch should be
+// allowed, or else considered an error.
+func testExtDis(
+	t *testing.T,
+	syntax string,
+	arch int,
+	extdis func(ext *ExtDis) error,
+	generate func(f func([]byte)),
+	allowedMismatch func(text string, size int, inst *Inst, dec ExtInst) bool,
+) {
+	start := time.Now()
+	ext := &ExtDis{
+		Dec:  make(chan ExtInst),
+		Arch: arch,
+	}
+	errc := make(chan error)
+
+	// First pass: write instructions to input file for external disassembler.
+	file, f, size, err := writeInst(generate)
+	if err != nil {
+		t.Fatal(err)
+	}
+	ext.Size = size
+	ext.File = f
+	defer func() {
+		f.Close()
+		if !*keep {
+			os.Remove(file)
+		}
+	}()
+
+	// Second pass: compare disassembly against our decodings.
+	var (
+		totalTests  = 0
+		totalSkips  = 0
+		totalErrors = 0
+
+		errors = make([]string, 0, 100) // sampled errors, at most cap
+	)
+	go func() {
+		errc <- extdis(ext)
+	}()
+	generate(func(enc []byte) {
+		dec, ok := <-ext.Dec
+		if !ok {
+			t.Errorf("decoding stream ended early")
+			return
+		}
+		inst, text := disasm(syntax, arch, pad(enc))
+		totalTests++
+		if *dumpTest {
+			fmt.Printf("%x -> %s [%d]\n", enc[:len(enc)], dec.text, dec.nenc)
+		}
+		if text != dec.text || inst.Len != dec.nenc {
+			suffix := ""
+			if allowedMismatch(text, size, &inst, dec) {
+				totalSkips++
+				if !*mismatch {
+					return
+				}
+				suffix += " (allowed mismatch)"
+			}
+			totalErrors++
+			if len(errors) >= cap(errors) {
+				j := rand.Intn(totalErrors)
+				if j >= cap(errors) {
+					return
+				}
+				errors = append(errors[:j], errors[j+1:]...)
+			}
+			errors = append(errors, fmt.Sprintf("decode(%x) = %q, %d, want %q, %d%s", enc, text, inst.Len, dec.text, dec.nenc, suffix))
+		}
+	})
+
+	if *mismatch {
+		totalErrors -= totalSkips
+	}
+
+	for _, b := range errors {
+		t.Log(b)
+	}
+
+	if totalErrors > 0 {
+		t.Fail()
+	}
+	t.Logf("%d test cases, %d expected mismatches, %d failures; %.0f cases/second", totalTests, totalSkips, totalErrors, float64(totalTests)/time.Since(start).Seconds())
+
+	if err := <-errc; err != nil {
+		t.Fatal("external disassembler: %v", err)
+	}
+
+}
+
+const start = 0x8000 // start address of text
+
+// writeInst writes the generated byte sequences to a new file
+// starting at offset start. That file is intended to be the input to
+// the external disassembler.
+func writeInst(generate func(func([]byte))) (file string, f *os.File, size int, err error) {
+	f, err = ioutil.TempFile("", "x86map")
+	if err != nil {
+		return
+	}
+
+	file = f.Name()
+
+	f.Seek(start, 0)
+	w := bufio.NewWriter(f)
+	defer w.Flush()
+	size = 0
+	generate(func(x []byte) {
+		if len(x) > 16 {
+			x = x[:16]
+		}
+		if debug {
+			fmt.Printf("%#x: %x%x\n", start+size, x, pops[len(x):])
+		}
+		w.Write(x)
+		w.Write(pops[len(x):])
+		size += len(pops)
+	})
+	return file, f, size, nil
+}
+
+// 0x5F is a single-byte pop instruction.
+// We pad the bytes we want decoded with enough 0x5Fs
+// that no matter what state the instruction stream is in
+// after reading our bytes, the pops will get us back to
+// a forced instruction boundary.
+var pops = []byte{
+	0x5f, 0x5f, 0x5f, 0x5f, 0x5f, 0x5f, 0x5f, 0x5f,
+	0x5f, 0x5f, 0x5f, 0x5f, 0x5f, 0x5f, 0x5f, 0x5f,
+	0x5f, 0x5f, 0x5f, 0x5f, 0x5f, 0x5f, 0x5f, 0x5f,
+	0x5f, 0x5f, 0x5f, 0x5f, 0x5f, 0x5f, 0x5f, 0x5f,
+}
+
+// pad pads the code sequenc with pops.
+func pad(enc []byte) []byte {
+	return append(enc[:len(enc):len(enc)], pops...)
+}
+
+// disasm returns the decoded instruction and text
+// for the given source bytes, using the given syntax and mode.
+func disasm(syntax string, mode int, src []byte) (inst Inst, text string) {
+	// If printTests is set, we record the coverage value
+	// before and after, and we write out the inputs for which
+	// coverage went up, in the format expected in testdata/decode.text.
+	// This produces a fairly small set of test cases that exercise nearly
+	// all the code.
+	var cover float64
+	if *printTests {
+		cover -= coverage()
+	}
+
+	inst, err := decode1(src, mode, syntax == "gnu")
+	if err != nil {
+		text = "error: " + err.Error()
+	} else {
+		switch syntax {
+		case "gnu":
+			text = GNUSyntax(inst)
+		case "intel":
+			text = IntelSyntax(inst)
+		case "plan9":
+			text = plan9Syntax(inst, 0, nil)
+		default:
+			text = "error: unknown syntax " + syntax
+		}
+	}
+
+	if *printTests {
+		cover += coverage()
+		if cover > 0 {
+			max := len(src)
+			if max > 16 && inst.Len <= 16 {
+				max = 16
+			}
+			fmt.Printf("%x|%x\t%d\t%s\t%s\n", src[:inst.Len], src[inst.Len:max], mode, syntax, text)
+		}
+	}
+
+	return
+}
+
+// coverage returns a floating point number denoting the
+// test coverage until now. The number increases when new code paths are exercised,
+// both in the Go program and in the decoder byte code.
+func coverage() float64 {
+	/*
+		testing.Coverage is not in the main distribution.
+		The implementation, which must go in package testing, is:
+
+		// Coverage reports the current code coverage as a fraction in the range [0, 1].
+		func Coverage() float64 {
+			var n, d int64
+			for _, counters := range cover.Counters {
+				for _, c := range counters {
+					if c > 0 {
+						n++
+					}
+					d++
+				}
+			}
+			if d == 0 {
+				return 0
+			}
+			return float64(n) / float64(d)
+		}
+	*/
+
+	var f float64
+	// f += testing.Coverage()
+	f += decodeCoverage()
+	return f
+}
+
+func decodeCoverage() float64 {
+	n := 0
+	for _, t := range decoderCover {
+		if t {
+			n++
+		}
+	}
+	return float64(1+n) / float64(1+len(decoderCover))
+}
+
+// Helpers for writing disassembler output parsers.
+
+// isPrefix reports whether text is the name of an instruction prefix.
+func isPrefix(text string) bool {
+	return prefixByte[text] > 0
+}
+
+// prefixByte maps instruction prefix text to actual prefix byte values.
+var prefixByte = map[string]byte{
+	"es":       0x26,
+	"cs":       0x2e,
+	"ss":       0x36,
+	"ds":       0x3e,
+	"fs":       0x64,
+	"gs":       0x65,
+	"data16":   0x66,
+	"addr16":   0x67,
+	"lock":     0xf0,
+	"repn":     0xf2,
+	"repne":    0xf2,
+	"rep":      0xf3,
+	"repe":     0xf3,
+	"xacquire": 0xf2,
+	"xrelease": 0xf3,
+	"bnd":      0xf2,
+	"addr32":   0x66,
+	"data32":   0x67,
+}
+
+// hasPrefix reports whether any of the space-separated words in the text s
+// begins with any of the given prefixes.
+func hasPrefix(s string, prefixes ...string) bool {
+	for _, prefix := range prefixes {
+		for s := s; s != ""; {
+			if strings.HasPrefix(s, prefix) {
+				return true
+			}
+			i := strings.Index(s, " ")
+			if i < 0 {
+				break
+			}
+			s = s[i+1:]
+		}
+	}
+	return false
+}
+
+// contains reports whether the text s contains any of the given substrings.
+func contains(s string, substrings ...string) bool {
+	for _, sub := range substrings {
+		if strings.Contains(s, sub) {
+			return true
+		}
+	}
+	return false
+}
+
+// isHex reports whether b is a hexadecimal character (0-9A-Fa-f).
+func isHex(b byte) bool { return b == '0' || unhex[b] > 0 }
+
+// parseHex parses the hexadecimal byte dump in hex,
+// appending the parsed bytes to raw and returning the updated slice.
+// The returned bool signals whether any invalid hex was found.
+// Spaces and tabs between bytes are okay but any other non-hex is not.
+func parseHex(hex []byte, raw []byte) ([]byte, bool) {
+	hex = trimSpace(hex)
+	for j := 0; j < len(hex); {
+		for hex[j] == ' ' || hex[j] == '\t' {
+			j++
+		}
+		if j >= len(hex) {
+			break
+		}
+		if j+2 > len(hex) || !isHex(hex[j]) || !isHex(hex[j+1]) {
+			return nil, false
+		}
+		raw = append(raw, unhex[hex[j]]<<4|unhex[hex[j+1]])
+		j += 2
+	}
+	return raw, true
+}
+
+var unhex = [256]byte{
+	'0': 0,
+	'1': 1,
+	'2': 2,
+	'3': 3,
+	'4': 4,
+	'5': 5,
+	'6': 6,
+	'7': 7,
+	'8': 8,
+	'9': 9,
+	'A': 10,
+	'B': 11,
+	'C': 12,
+	'D': 13,
+	'E': 14,
+	'F': 15,
+	'a': 10,
+	'b': 11,
+	'c': 12,
+	'd': 13,
+	'e': 14,
+	'f': 15,
+}
+
+// index is like bytes.Index(s, []byte(t)) but avoids the allocation.
+func index(s []byte, t string) int {
+	i := 0
+	for {
+		j := bytes.IndexByte(s[i:], t[0])
+		if j < 0 {
+			return -1
+		}
+		i = i + j
+		if i+len(t) > len(s) {
+			return -1
+		}
+		for k := 1; k < len(t); k++ {
+			if s[i+k] != t[k] {
+				goto nomatch
+			}
+		}
+		return i
+	nomatch:
+		i++
+	}
+}
+
+// fixSpace rewrites runs of spaces, tabs, and newline characters into single spaces in s.
+// If s must be rewritten, it is rewritten in place.
+func fixSpace(s []byte) []byte {
+	s = trimSpace(s)
+	for i := 0; i < len(s); i++ {
+		if s[i] == '\t' || s[i] == '\n' || i > 0 && s[i] == ' ' && s[i-1] == ' ' {
+			goto Fix
+		}
+	}
+	return s
+
+Fix:
+	b := s
+	w := 0
+	for i := 0; i < len(s); i++ {
+		c := s[i]
+		if c == '\t' || c == '\n' {
+			c = ' '
+		}
+		if c == ' ' && w > 0 && b[w-1] == ' ' {
+			continue
+		}
+		b[w] = c
+		w++
+	}
+	if w > 0 && b[w-1] == ' ' {
+		w--
+	}
+	return b[:w]
+}
+
+// trimSpace trims leading and trailing space from s, returning a subslice of s.
+func trimSpace(s []byte) []byte {
+	j := len(s)
+	for j > 0 && (s[j-1] == ' ' || s[j-1] == '\t' || s[j-1] == '\n') {
+		j--
+	}
+	i := 0
+	for i < j && (s[i] == ' ' || s[i] == '\t') {
+		i++
+	}
+	return s[i:j]
+}
+
+// pcrel and pcrelw match instructions using relative addressing mode.
+var (
+	pcrel  = regexp.MustCompile(`^((?:.* )?(?:j[a-z]+|call|ljmp|loopn?e?w?|xbegin)q?(?:,p[nt])?) 0x([0-9a-f]+)$`)
+	pcrelw = regexp.MustCompile(`^((?:.* )?(?:callw|jmpw|xbeginw|ljmpw)(?:,p[nt])?) 0x([0-9a-f]+)$`)
+)
+
+// Generators.
+//
+// The test cases are described as functions that invoke a callback repeatedly,
+// with a new input sequence each time. These helpers make writing those
+// a little easier.
+
+// hexCases generates the cases written in hexadecimal in the encoded string.
+// Spaces in 'encoded' separate entire test cases, not individual bytes.
+func hexCases(t *testing.T, encoded string) func(func([]byte)) {
+	return func(try func([]byte)) {
+		for _, x := range strings.Fields(encoded) {
+			src, err := hex.DecodeString(x)
+			if err != nil {
+				t.Errorf("parsing %q: %v", x, err)
+			}
+			try(src)
+		}
+	}
+}
+
+// testdataCases generates the test cases recorded in testdata/decode.txt.
+// It only uses the inputs; it ignores the answers recorded in that file.
+func testdataCases(t *testing.T) func(func([]byte)) {
+	var codes [][]byte
+	data, err := ioutil.ReadFile("testdata/decode.txt")
+	if err != nil {
+		t.Fatal(err)
+	}
+	for _, line := range strings.Split(string(data), "\n") {
+		line = strings.TrimSpace(line)
+		if line == "" || strings.HasPrefix(line, "#") {
+			continue
+		}
+		f := strings.Fields(line)[0]
+		i := strings.Index(f, "|")
+		if i < 0 {
+			t.Errorf("parsing %q: missing | separator", f)
+			continue
+		}
+		if i%2 != 0 {
+			t.Errorf("parsing %q: misaligned | separator", f)
+		}
+		code, err := hex.DecodeString(f[:i] + f[i+1:])
+		if err != nil {
+			t.Errorf("parsing %q: %v", f, err)
+			continue
+		}
+		codes = append(codes, code)
+	}
+
+	return func(try func([]byte)) {
+		for _, code := range codes {
+			try(code)
+		}
+	}
+}
+
+// manyPrefixes generates all possible 2⁹ combinations of nine chosen prefixes.
+// The relative ordering of the prefixes within the combinations varies deterministically.
+func manyPrefixes(try func([]byte)) {
+	var prefixBytes = []byte{0x66, 0x67, 0xF0, 0xF2, 0xF3, 0x3E, 0x36, 0x66, 0x67}
+	var enc []byte
+	for i := 0; i < 1<<uint(len(prefixBytes)); i++ {
+		enc = enc[:0]
+		for j, p := range prefixBytes {
+			if i&(1<<uint(j)) != 0 {
+				enc = append(enc, p)
+			}
+		}
+		if len(enc) > 0 {
+			k := i % len(enc)
+			enc[0], enc[k] = enc[k], enc[0]
+		}
+		try(enc)
+	}
+}
+
+// basicPrefixes geneartes 8 different possible prefix cases: no prefix
+// and then one each of seven different prefix bytes.
+func basicPrefixes(try func([]byte)) {
+	try(nil)
+	for _, b := range []byte{0x66, 0x67, 0xF0, 0xF2, 0xF3, 0x3E, 0x36} {
+		try([]byte{b})
+	}
+}
+
+func rexPrefixes(try func([]byte)) {
+	try(nil)
+	for _, b := range []byte{0x40, 0x48, 0x43, 0x4C} {
+		try([]byte{b})
+	}
+}
+
+// concat takes two generators and returns a generator for the
+// cross product of the two, concatenating the results from each.
+func concat(gen1, gen2 func(func([]byte))) func(func([]byte)) {
+	return func(try func([]byte)) {
+		gen1(func(enc1 []byte) {
+			gen2(func(enc2 []byte) {
+				try(append(enc1[:len(enc1):len(enc1)], enc2...))
+			})
+		})
+	}
+}
+
+// concat3 takes three generators and returns a generator for the
+// cross product of the three, concatenating the results from each.
+func concat3(gen1, gen2, gen3 func(func([]byte))) func(func([]byte)) {
+	return func(try func([]byte)) {
+		gen1(func(enc1 []byte) {
+			gen2(func(enc2 []byte) {
+				gen3(func(enc3 []byte) {
+					try(append(append(enc1[:len(enc1):len(enc1)], enc2...), enc3...))
+				})
+			})
+		})
+	}
+}
+
+// concat4 takes four generators and returns a generator for the
+// cross product of the four, concatenating the results from each.
+func concat4(gen1, gen2, gen3, gen4 func(func([]byte))) func(func([]byte)) {
+	return func(try func([]byte)) {
+		gen1(func(enc1 []byte) {
+			gen2(func(enc2 []byte) {
+				gen3(func(enc3 []byte) {
+					gen4(func(enc4 []byte) {
+						try(append(append(append(enc1[:len(enc1):len(enc1)], enc2...), enc3...), enc4...))
+					})
+				})
+			})
+		})
+	}
+}
+
+// filter generates the sequences from gen that satisfy ok.
+func filter(gen func(func([]byte)), ok func([]byte) bool) func(func([]byte)) {
+	return func(try func([]byte)) {
+		gen(func(enc []byte) {
+			if ok(enc) {
+				try(enc)
+			}
+		})
+	}
+}
+
+// enum8bit generates all possible 1-byte sequences, followed by distinctive padding.
+func enum8bit(try func([]byte)) {
+	for i := 0; i < 1<<8; i++ {
+		try([]byte{byte(i), 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77, 0x88})
+	}
+}
+
+// enum8bit generates all possible 2-byte sequences, followed by distinctive padding.
+func enum16bit(try func([]byte)) {
+	for i := 0; i < 1<<16; i++ {
+		try([]byte{byte(i), byte(i >> 8), 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77, 0x88})
+	}
+}
+
+// enum24bit generates all possible 3-byte sequences, followed by distinctive padding.
+func enum24bit(try func([]byte)) {
+	for i := 0; i < 1<<24; i++ {
+		try([]byte{byte(i), byte(i >> 8), byte(i >> 16), 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77, 0x88})
+	}
+}
+
+// enumModRM generates all possible modrm bytes and, for modrm values that indicate
+// a following sib byte, all possible modrm, sib combinations.
+func enumModRM(try func([]byte)) {
+	for i := 0; i < 256; i++ {
+		if (i>>3)&07 == 04 && i>>6 != 3 { // has sib
+			for j := 0; j < 256; j++ {
+				try([]byte{0, byte(i), byte(j), 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77, 0x88}) // byte encodings
+				try([]byte{1, byte(i), byte(j), 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77, 0x88}) // word encodings
+			}
+		} else {
+			try([]byte{0, byte(i), 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77, 0x88}) // byte encodings
+			try([]byte{1, byte(i), 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77, 0x88}) // word encodings
+		}
+	}
+}
+
+// fixed generates the single case b.
+// It's mainly useful to prepare an argument for concat or concat3.
+func fixed(b ...byte) func(func([]byte)) {
+	return func(try func([]byte)) {
+		try(b)
+	}
+}
+
+// testBasic runs the given test function with cases all using opcode as the initial opcode bytes.
+// It runs three phases:
+//
+// First, zero-or-one prefixes followed by opcode followed by all possible 1-byte values.
+// If in -short mode, that's all.
+//
+// Second, zero-or-one prefixes followed by opcode followed by all possible 2-byte values.
+// If not in -long mode, that's all. This phase and the next run in parallel with other tests
+// (using t.Parallel).
+//
+// Finally, opcode followed by all possible 3-byte values. The test can take a very long time
+// and prints progress messages to package log.
+func testBasic(t *testing.T, testfn func(*testing.T, func(func([]byte))), opcode ...byte) {
+	testfn(t, concat3(basicPrefixes, fixed(opcode...), enum8bit))
+	if testing.Short() {
+		return
+	}
+
+	t.Parallel()
+	testfn(t, concat3(basicPrefixes, fixed(opcode...), enum16bit))
+	if !*longTest {
+		return
+	}
+
+	name := caller(2)
+	op1 := make([]byte, len(opcode)+1)
+	copy(op1, opcode)
+	for i := 0; i < 256; i++ {
+		log.Printf("%s 24-bit: %d/256\n", name, i)
+		op1[len(opcode)] = byte(i)
+		testfn(t, concat(fixed(op1...), enum16bit))
+	}
+}
+
+func testBasicREX(t *testing.T, testfn func(*testing.T, func(func([]byte))), opcode ...byte) {
+	testfn(t, filter(concat4(basicPrefixes, rexPrefixes, fixed(opcode...), enum8bit), isValidREX))
+	if testing.Short() {
+		return
+	}
+
+	t.Parallel()
+	testfn(t, filter(concat4(basicPrefixes, rexPrefixes, fixed(opcode...), enum16bit), isValidREX))
+	if !*longTest {
+		return
+	}
+
+	name := caller(2)
+	op1 := make([]byte, len(opcode)+1)
+	copy(op1, opcode)
+	for i := 0; i < 256; i++ {
+		log.Printf("%s 24-bit: %d/256\n", name, i)
+		op1[len(opcode)] = byte(i)
+		testfn(t, filter(concat3(rexPrefixes, fixed(op1...), enum16bit), isValidREX))
+	}
+}
+
+// testPrefix runs the given test function for all many prefix possibilities
+// followed by all possible 1-byte sequences.
+//
+// If in -long mode, it then runs a test of all the prefix possibilities followed
+// by all possible 2-byte sequences.
+func testPrefix(t *testing.T, testfn func(*testing.T, func(func([]byte)))) {
+	t.Parallel()
+	testfn(t, concat(manyPrefixes, enum8bit))
+	if testing.Short() || !*longTest {
+		return
+	}
+
+	name := caller(2)
+	for i := 0; i < 256; i++ {
+		log.Printf("%s 16-bit: %d/256\n", name, i)
+		testfn(t, concat3(manyPrefixes, fixed(byte(i)), enum8bit))
+	}
+}
+
+func testPrefixREX(t *testing.T, testfn func(*testing.T, func(func([]byte)))) {
+	t.Parallel()
+	testfn(t, filter(concat3(manyPrefixes, rexPrefixes, enum8bit), isValidREX))
+	if testing.Short() || !*longTest {
+		return
+	}
+
+	name := caller(2)
+	for i := 0; i < 256; i++ {
+		log.Printf("%s 16-bit: %d/256\n", name, i)
+		testfn(t, filter(concat4(manyPrefixes, rexPrefixes, fixed(byte(i)), enum8bit), isValidREX))
+	}
+}
+
+func caller(skip int) string {
+	pc, _, _, _ := runtime.Caller(skip)
+	f := runtime.FuncForPC(pc)
+	name := "?"
+	if f != nil {
+		name = f.Name()
+		if i := strings.LastIndex(name, "."); i >= 0 {
+			name = name[i+1:]
+		}
+	}
+	return name
+}
+
+func isValidREX(x []byte) bool {
+	i := 0
+	for i < len(x) && isPrefixByte(x[i]) {
+		i++
+	}
+	if i < len(x) && Prefix(x[i]).IsREX() {
+		i++
+		if i < len(x) {
+			return !isPrefixByte(x[i]) && !Prefix(x[i]).IsREX()
+		}
+	}
+	return true
+}
+
+func isPrefixByte(b byte) bool {
+	switch b {
+	case 0x26, 0x2E, 0x36, 0x3E, 0x64, 0x65, 0x66, 0x67, 0xF0, 0xF2, 0xF3:
+		return true
+	}
+	return false
+}
diff --git a/x86/x86asm/gnu.go b/x86/x86asm/gnu.go
new file mode 100644
index 0000000..e2ff801
--- /dev/null
+++ b/x86/x86asm/gnu.go
@@ -0,0 +1,926 @@
+// Copyright 2014 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package x86asm
+
+import (
+	"fmt"
+	"strings"
+)
+
+// GNUSyntax returns the GNU assembler syntax for the instruction, as defined by GNU binutils.
+// This general form is often called ``AT&T syntax'' as a reference to AT&T System V Unix.
+func GNUSyntax(inst Inst) string {
+	// Rewrite instruction to mimic GNU peculiarities.
+	// Note that inst has been passed by value and contains
+	// no pointers, so any changes we make here are local
+	// and will not propagate back out to the caller.
+
+	// Adjust opcode [sic].
+	switch inst.Op {
+	case FDIV, FDIVR, FSUB, FSUBR, FDIVP, FDIVRP, FSUBP, FSUBRP:
+		// DC E0, DC F0: libopcodes swaps FSUBR/FSUB and FDIVR/FDIV, at least
+		// if you believe the Intel manual is correct (the encoding is irregular as given;
+		// libopcodes uses the more regular expected encoding).
+		// TODO(rsc): Test to ensure Intel manuals are correct and report to libopcodes maintainers?
+		// NOTE: iant thinks this is deliberate, but we can't find the history.
+		_, reg1 := inst.Args[0].(Reg)
+		_, reg2 := inst.Args[1].(Reg)
+		if reg1 && reg2 && (inst.Opcode>>24 == 0xDC || inst.Opcode>>24 == 0xDE) {
+			switch inst.Op {
+			case FDIV:
+				inst.Op = FDIVR
+			case FDIVR:
+				inst.Op = FDIV
+			case FSUB:
+				inst.Op = FSUBR
+			case FSUBR:
+				inst.Op = FSUB
+			case FDIVP:
+				inst.Op = FDIVRP
+			case FDIVRP:
+				inst.Op = FDIVP
+			case FSUBP:
+				inst.Op = FSUBRP
+			case FSUBRP:
+				inst.Op = FSUBP
+			}
+		}
+
+	case MOVNTSD:
+		// MOVNTSD is F2 0F 2B /r.
+		// MOVNTSS is F3 0F 2B /r (supposedly; not in manuals).
+		// Usually inner prefixes win for display,
+		// so that F3 F2 0F 2B 11 is REP MOVNTSD
+		// and F2 F3 0F 2B 11 is REPN MOVNTSS.
+		// Libopcodes always prefers MOVNTSS regardless of prefix order.
+		if countPrefix(&inst, 0xF3) > 0 {
+			found := false
+			for i := len(inst.Prefix) - 1; i >= 0; i-- {
+				switch inst.Prefix[i] & 0xFF {
+				case 0xF3:
+					if !found {
+						found = true
+						inst.Prefix[i] |= PrefixImplicit
+					}
+				case 0xF2:
+					inst.Prefix[i] &^= PrefixImplicit
+				}
+			}
+			inst.Op = MOVNTSS
+		}
+	}
+
+	// Add implicit arguments.
+	switch inst.Op {
+	case MONITOR:
+		inst.Args[0] = EDX
+		inst.Args[1] = ECX
+		inst.Args[2] = EAX
+		if inst.AddrSize == 16 {
+			inst.Args[2] = AX
+		}
+
+	case MWAIT:
+		if inst.Mode == 64 {
+			inst.Args[0] = RCX
+			inst.Args[1] = RAX
+		} else {
+			inst.Args[0] = ECX
+			inst.Args[1] = EAX
+		}
+	}
+
+	// Adjust which prefixes will be displayed.
+	// The rule is to display all the prefixes not implied by
+	// the usual instruction display, that is, all the prefixes
+	// except the ones with PrefixImplicit set.
+	// However, of course, there are exceptions to the rule.
+	switch inst.Op {
+	case CRC32:
+		// CRC32 has a mandatory F2 prefix.
+		// If there are multiple F2s and no F3s, the extra F2s do not print.
+		// (And Decode has already marked them implicit.)
+		// However, if there is an F3 anywhere, then the extra F2s do print.
+		// If there are multiple F2 prefixes *and* an (ignored) F3,
+		// then libopcodes prints the extra F2s as REPNs.
+		if countPrefix(&inst, 0xF2) > 1 {
+			unmarkImplicit(&inst, 0xF2)
+			markLastImplicit(&inst, 0xF2)
+		}
+
+		// An unused data size override should probably be shown,
+		// to distinguish DATA16 CRC32B from plain CRC32B,
+		// but libopcodes always treats the final override as implicit
+		// and the others as explicit.
+		unmarkImplicit(&inst, PrefixDataSize)
+		markLastImplicit(&inst, PrefixDataSize)
+
+	case CVTSI2SD, CVTSI2SS:
+		if !isMem(inst.Args[1]) {
+			markLastImplicit(&inst, PrefixDataSize)
+		}
+
+	case CVTSD2SI, CVTSS2SI, CVTTSD2SI, CVTTSS2SI,
+		ENTER, FLDENV, FNSAVE, FNSTENV, FRSTOR, LGDT, LIDT, LRET,
+		POP, PUSH, RET, SGDT, SIDT, SYSRET, XBEGIN:
+		markLastImplicit(&inst, PrefixDataSize)
+
+	case LOOP, LOOPE, LOOPNE, MONITOR:
+		markLastImplicit(&inst, PrefixAddrSize)
+
+	case MOV:
+		// The 16-bit and 32-bit forms of MOV Sreg, dst and MOV src, Sreg
+		// cannot be distinguished when src or dst refers to memory, because
+		// Sreg is always a 16-bit value, even when we're doing a 32-bit
+		// instruction. Because the instruction tables distinguished these two,
+		// any operand size prefix has been marked as used (to decide which
+		// branch to take). Unmark it, so that it will show up in disassembly,
+		// so that the reader can tell the size of memory operand.
+		// up with the same arguments
+		dst, _ := inst.Args[0].(Reg)
+		src, _ := inst.Args[1].(Reg)
+		if ES <= src && src <= GS && isMem(inst.Args[0]) || ES <= dst && dst <= GS && isMem(inst.Args[1]) {
+			unmarkImplicit(&inst, PrefixDataSize)
+		}
+
+	case MOVDQU:
+		if countPrefix(&inst, 0xF3) > 1 {
+			unmarkImplicit(&inst, 0xF3)
+			markLastImplicit(&inst, 0xF3)
+		}
+
+	case MOVQ2DQ:
+		markLastImplicit(&inst, PrefixDataSize)
+
+	case SLDT, SMSW, STR, FXRSTOR, XRSTOR, XSAVE, XSAVEOPT, CMPXCHG8B:
+		if isMem(inst.Args[0]) {
+			unmarkImplicit(&inst, PrefixDataSize)
+		}
+
+	case SYSEXIT:
+		unmarkImplicit(&inst, PrefixDataSize)
+	}
+
+	if isCondJmp[inst.Op] || isLoop[inst.Op] || inst.Op == JCXZ || inst.Op == JECXZ || inst.Op == JRCXZ {
+		if countPrefix(&inst, PrefixCS) > 0 && countPrefix(&inst, PrefixDS) > 0 {
+			for i, p := range inst.Prefix {
+				switch p & 0xFFF {
+				case PrefixPN, PrefixPT:
+					inst.Prefix[i] &= 0xF0FF // cut interpretation bits, producing original segment prefix
+				}
+			}
+		}
+	}
+
+	// XACQUIRE/XRELEASE adjustment.
+	if inst.Op == MOV {
+		// MOV into memory is a candidate for turning REP into XRELEASE.
+		// However, if the REP is followed by a REPN, that REPN blocks the
+		// conversion.
+		haveREPN := false
+		for i := len(inst.Prefix) - 1; i >= 0; i-- {
+			switch inst.Prefix[i] &^ PrefixIgnored {
+			case PrefixREPN:
+				haveREPN = true
+			case PrefixXRELEASE:
+				if haveREPN {
+					inst.Prefix[i] = PrefixREP
+				}
+			}
+		}
+	}
+
+	// We only format the final F2/F3 as XRELEASE/XACQUIRE.
+	haveXA := false
+	haveXR := false
+	for i := len(inst.Prefix) - 1; i >= 0; i-- {
+		switch inst.Prefix[i] &^ PrefixIgnored {
+		case PrefixXRELEASE:
+			if !haveXR {
+				haveXR = true
+			} else {
+				inst.Prefix[i] = PrefixREP
+			}
+
+		case PrefixXACQUIRE:
+			if !haveXA {
+				haveXA = true
+			} else {
+				inst.Prefix[i] = PrefixREPN
+			}
+		}
+	}
+
+	// Determine opcode.
+	op := strings.ToLower(inst.Op.String())
+	if alt := gnuOp[inst.Op]; alt != "" {
+		op = alt
+	}
+
+	// Determine opcode suffix.
+	// Libopcodes omits the suffix if the width of the operation
+	// can be inferred from a register arguments. For example,
+	// add $1, %ebx has no suffix because you can tell from the
+	// 32-bit register destination that it is a 32-bit add,
+	// but in addl $1, (%ebx), the destination is memory, so the
+	// size is not evident without the l suffix.
+	needSuffix := true
+SuffixLoop:
+	for i, a := range inst.Args {
+		if a == nil {
+			break
+		}
+		switch a := a.(type) {
+		case Reg:
+			switch inst.Op {
+			case MOVSX, MOVZX:
+				continue
+
+			case SHL, SHR, RCL, RCR, ROL, ROR, SAR:
+				if i == 1 {
+					// shift count does not tell us operand size
+					continue
+				}
+
+			case CRC32:
+				// The source argument does tell us operand size,
+				// but libopcodes still always puts a suffix on crc32.
+				continue
+
+			case PUSH, POP:
+				// Even though segment registers are 16-bit, push and pop
+				// can save/restore them from 32-bit slots, so they
+				// do not imply operand size.
+				if ES <= a && a <= GS {
+					continue
+				}
+
+			case CVTSI2SD, CVTSI2SS:
+				// The integer register argument takes priority.
+				if X0 <= a && a <= X15 {
+					continue
+				}
+			}
+
+			if AL <= a && a <= R15 || ES <= a && a <= GS || X0 <= a && a <= X15 || M0 <= a && a <= M7 {
+				needSuffix = false
+				break SuffixLoop
+			}
+		}
+	}
+
+	if needSuffix {
+		switch inst.Op {
+		case CMPXCHG8B, FLDCW, FNSTCW, FNSTSW, LDMXCSR, LLDT, LMSW, LTR, PCLMULQDQ,
+			SETA, SETAE, SETB, SETBE, SETE, SETG, SETGE, SETL, SETLE, SETNE, SETNO, SETNP, SETNS, SETO, SETP, SETS,
+			SLDT, SMSW, STMXCSR, STR, VERR, VERW:
+			// For various reasons, libopcodes emits no suffix for these instructions.
+
+		case CRC32:
+			op += byteSizeSuffix(argBytes(&inst, inst.Args[1]))
+
+		case LGDT, LIDT, SGDT, SIDT:
+			op += byteSizeSuffix(inst.DataSize / 8)
+
+		case MOVZX, MOVSX:
+			// Integer size conversions get two suffixes.
+			op = op[:4] + byteSizeSuffix(argBytes(&inst, inst.Args[1])) + byteSizeSuffix(argBytes(&inst, inst.Args[0]))
+
+		case LOOP, LOOPE, LOOPNE:
+			// Add w suffix to indicate use of CX register instead of ECX.
+			if inst.AddrSize == 16 {
+				op += "w"
+			}
+
+		case CALL, ENTER, JMP, LCALL, LEAVE, LJMP, LRET, RET, SYSRET, XBEGIN:
+			// Add w suffix to indicate use of 16-bit target.
+			// Exclude JMP rel8.
+			if inst.Opcode>>24 == 0xEB {
+				break
+			}
+			if inst.DataSize == 16 && inst.Mode != 16 {
+				markLastImplicit(&inst, PrefixDataSize)
+				op += "w"
+			} else if inst.Mode == 64 {
+				op += "q"
+			}
+
+		case FRSTOR, FNSAVE, FNSTENV, FLDENV:
+			// Add s suffix to indicate shortened FPU state (I guess).
+			if inst.DataSize == 16 {
+				op += "s"
+			}
+
+		case PUSH, POP:
+			if markLastImplicit(&inst, PrefixDataSize) {
+				op += byteSizeSuffix(inst.DataSize / 8)
+			} else if inst.Mode == 64 {
+				op += "q"
+			} else {
+				op += byteSizeSuffix(inst.MemBytes)
+			}
+
+		default:
+			if isFloat(inst.Op) {
+				// I can't explain any of this, but it's what libopcodes does.
+				switch inst.MemBytes {
+				default:
+					if (inst.Op == FLD || inst.Op == FSTP) && isMem(inst.Args[0]) {
+						op += "t"
+					}
+				case 4:
+					if isFloatInt(inst.Op) {
+						op += "l"
+					} else {
+						op += "s"
+					}
+				case 8:
+					if isFloatInt(inst.Op) {
+						op += "ll"
+					} else {
+						op += "l"
+					}
+				}
+				break
+			}
+
+			op += byteSizeSuffix(inst.MemBytes)
+		}
+	}
+
+	// Adjust special case opcodes.
+	switch inst.Op {
+	case 0:
+		if inst.Prefix[0] != 0 {
+			return strings.ToLower(inst.Prefix[0].String())
+		}
+
+	case INT:
+		if inst.Opcode>>24 == 0xCC {
+			inst.Args[0] = nil
+			op = "int3"
+		}
+
+	case CMPPS, CMPPD, CMPSD_XMM, CMPSS:
+		imm, ok := inst.Args[2].(Imm)
+		if ok && 0 <= imm && imm < 8 {
+			inst.Args[2] = nil
+			op = cmppsOps[imm] + op[3:]
+		}
+
+	case PCLMULQDQ:
+		imm, ok := inst.Args[2].(Imm)
+		if ok && imm&^0x11 == 0 {
+			inst.Args[2] = nil
+			op = pclmulqOps[(imm&0x10)>>3|(imm&1)]
+		}
+
+	case XLATB:
+		if markLastImplicit(&inst, PrefixAddrSize) {
+			op = "xlat" // not xlatb
+		}
+	}
+
+	// Build list of argument strings.
+	var (
+		usedPrefixes bool     // segment prefixes consumed by Mem formatting
+		args         []string // formatted arguments
+	)
+	for i, a := range inst.Args {
+		if a == nil {
+			break
+		}
+		switch inst.Op {
+		case MOVSB, MOVSW, MOVSD, MOVSQ, OUTSB, OUTSW, OUTSD:
+			if i == 0 {
+				usedPrefixes = true // disable use of prefixes for first argument
+			} else {
+				usedPrefixes = false
+			}
+		}
+		if a == Imm(1) && (inst.Opcode>>24)&^1 == 0xD0 {
+			continue
+		}
+		args = append(args, gnuArg(&inst, a, &usedPrefixes))
+	}
+
+	// The default is to print the arguments in reverse Intel order.
+	// A few instructions inhibit this behavior.
+	switch inst.Op {
+	case BOUND, LCALL, ENTER, LJMP:
+		// no reverse
+	default:
+		// reverse args
+		for i, j := 0, len(args)-1; i < j; i, j = i+1, j-1 {
+			args[i], args[j] = args[j], args[i]
+		}
+	}
+
+	// Build prefix string.
+	// Must be after argument formatting, which can turn off segment prefixes.
+	var (
+		prefix       = "" // output string
+		numAddr      = 0
+		numData      = 0
+		implicitData = false
+	)
+	for _, p := range inst.Prefix {
+		if p&0xFF == PrefixDataSize && p&PrefixImplicit != 0 {
+			implicitData = true
+		}
+	}
+	for _, p := range inst.Prefix {
+		if p == 0 {
+			break
+		}
+		if p&PrefixImplicit != 0 {
+			continue
+		}
+		switch p &^ (PrefixIgnored | PrefixInvalid) {
+		default:
+			if p.IsREX() {
+				if p&0xFF == PrefixREX {
+					prefix += "rex "
+				} else {
+					prefix += "rex." + p.String()[4:] + " "
+				}
+				break
+			}
+			prefix += strings.ToLower(p.String()) + " "
+
+		case PrefixPN:
+			op += ",pn"
+			continue
+
+		case PrefixPT:
+			op += ",pt"
+			continue
+
+		case PrefixAddrSize, PrefixAddr16, PrefixAddr32:
+			// For unknown reasons, if the addr16 prefix is repeated,
+			// libopcodes displays all but the last as addr32, even though
+			// the addressing form used in a memory reference is clearly
+			// still 16-bit.
+			n := 32
+			if inst.Mode == 32 {
+				n = 16
+			}
+			numAddr++
+			if countPrefix(&inst, PrefixAddrSize) > numAddr {
+				n = inst.Mode
+			}
+			prefix += fmt.Sprintf("addr%d ", n)
+			continue
+
+		case PrefixData16, PrefixData32:
+			if implicitData && countPrefix(&inst, PrefixDataSize) > 1 {
+				// Similar to the addr32 logic above, but it only kicks in
+				// when something used the data size prefix (one is implicit).
+				n := 16
+				if inst.Mode == 16 {
+					n = 32
+				}
+				numData++
+				if countPrefix(&inst, PrefixDataSize) > numData {
+					if inst.Mode == 16 {
+						n = 16
+					} else {
+						n = 32
+					}
+				}
+				prefix += fmt.Sprintf("data%d ", n)
+				continue
+			}
+			prefix += strings.ToLower(p.String()) + " "
+		}
+	}
+
+	// Finally! Put it all together.
+	text := prefix + op
+	if args != nil {
+		text += " "
+		// Indirect call/jmp gets a star to distinguish from direct jump address.
+		if (inst.Op == CALL || inst.Op == JMP || inst.Op == LJMP || inst.Op == LCALL) && (isMem(inst.Args[0]) || isReg(inst.Args[0])) {
+			text += "*"
+		}
+		text += strings.Join(args, ",")
+	}
+	return text
+}
+
+// gnuArg returns the GNU syntax for the argument x from the instruction inst.
+// If *usedPrefixes is false and x is a Mem, then the formatting
+// includes any segment prefixes and sets *usedPrefixes to true.
+func gnuArg(inst *Inst, x Arg, usedPrefixes *bool) string {
+	if x == nil {
+		return "<nil>"
+	}
+	switch x := x.(type) {
+	case Reg:
+		switch inst.Op {
+		case CVTSI2SS, CVTSI2SD, CVTSS2SI, CVTSD2SI, CVTTSD2SI, CVTTSS2SI:
+			if inst.DataSize == 16 && EAX <= x && x <= R15L {
+				x -= EAX - AX
+			}
+
+		case IN, INSB, INSW, INSD, OUT, OUTSB, OUTSW, OUTSD:
+			// DX is the port, but libopcodes prints it as if it were a memory reference.
+			if x == DX {
+				return "(%dx)"
+			}
+		}
+		return gccRegName[x]
+	case Mem:
+		seg := ""
+		var haveCS, haveDS, haveES, haveFS, haveGS, haveSS bool
+		switch x.Segment {
+		case CS:
+			haveCS = true
+		case DS:
+			haveDS = true
+		case ES:
+			haveES = true
+		case FS:
+			haveFS = true
+		case GS:
+			haveGS = true
+		case SS:
+			haveSS = true
+		}
+		switch inst.Op {
+		case INSB, INSW, INSD, STOSB, STOSW, STOSD, STOSQ, SCASB, SCASW, SCASD, SCASQ:
+			// These do not accept segment prefixes, at least in the GNU rendering.
+		default:
+			if *usedPrefixes {
+				break
+			}
+			for i := len(inst.Prefix) - 1; i >= 0; i-- {
+				p := inst.Prefix[i] &^ PrefixIgnored
+				if p == 0 {
+					continue
+				}
+				switch p {
+				case PrefixCS:
+					if !haveCS {
+						haveCS = true
+						inst.Prefix[i] |= PrefixImplicit
+					}
+				case PrefixDS:
+					if !haveDS {
+						haveDS = true
+						inst.Prefix[i] |= PrefixImplicit
+					}
+				case PrefixES:
+					if !haveES {
+						haveES = true
+						inst.Prefix[i] |= PrefixImplicit
+					}
+				case PrefixFS:
+					if !haveFS {
+						haveFS = true
+						inst.Prefix[i] |= PrefixImplicit
+					}
+				case PrefixGS:
+					if !haveGS {
+						haveGS = true
+						inst.Prefix[i] |= PrefixImplicit
+					}
+				case PrefixSS:
+					if !haveSS {
+						haveSS = true
+						inst.Prefix[i] |= PrefixImplicit
+					}
+				}
+			}
+			*usedPrefixes = true
+		}
+		if haveCS {
+			seg += "%cs:"
+		}
+		if haveDS {
+			seg += "%ds:"
+		}
+		if haveSS {
+			seg += "%ss:"
+		}
+		if haveES {
+			seg += "%es:"
+		}
+		if haveFS {
+			seg += "%fs:"
+		}
+		if haveGS {
+			seg += "%gs:"
+		}
+		disp := ""
+		if x.Disp != 0 {
+			disp = fmt.Sprintf("%#x", x.Disp)
+		}
+		if x.Scale == 0 || x.Index == 0 && x.Scale == 1 && (x.Base == ESP || x.Base == RSP || x.Base == 0 && inst.Mode == 64) {
+			if x.Base == 0 {
+				return seg + disp
+			}
+			return fmt.Sprintf("%s%s(%s)", seg, disp, gccRegName[x.Base])
+		}
+		base := gccRegName[x.Base]
+		if x.Base == 0 {
+			base = ""
+		}
+		index := gccRegName[x.Index]
+		if x.Index == 0 {
+			if inst.AddrSize == 64 {
+				index = "%riz"
+			} else {
+				index = "%eiz"
+			}
+		}
+		if AX <= x.Base && x.Base <= DI {
+			// 16-bit addressing - no scale
+			return fmt.Sprintf("%s%s(%s,%s)", seg, disp, base, index)
+		}
+		return fmt.Sprintf("%s%s(%s,%s,%d)", seg, disp, base, index, x.Scale)
+	case Rel:
+		return fmt.Sprintf(".%+#x", int32(x))
+	case Imm:
+		if inst.Mode == 32 {
+			return fmt.Sprintf("$%#x", uint32(x))
+		}
+		return fmt.Sprintf("$%#x", int64(x))
+	}
+	return x.String()
+}
+
+var gccRegName = [...]string{
+	0:    "REG0",
+	AL:   "%al",
+	CL:   "%cl",
+	BL:   "%bl",
+	DL:   "%dl",
+	AH:   "%ah",
+	CH:   "%ch",
+	BH:   "%bh",
+	DH:   "%dh",
+	SPB:  "%spl",
+	BPB:  "%bpl",
+	SIB:  "%sil",
+	DIB:  "%dil",
+	R8B:  "%r8b",
+	R9B:  "%r9b",
+	R10B: "%r10b",
+	R11B: "%r11b",
+	R12B: "%r12b",
+	R13B: "%r13b",
+	R14B: "%r14b",
+	R15B: "%r15b",
+	AX:   "%ax",
+	CX:   "%cx",
+	BX:   "%bx",
+	DX:   "%dx",
+	SP:   "%sp",
+	BP:   "%bp",
+	SI:   "%si",
+	DI:   "%di",
+	R8W:  "%r8w",
+	R9W:  "%r9w",
+	R10W: "%r10w",
+	R11W: "%r11w",
+	R12W: "%r12w",
+	R13W: "%r13w",
+	R14W: "%r14w",
+	R15W: "%r15w",
+	EAX:  "%eax",
+	ECX:  "%ecx",
+	EDX:  "%edx",
+	EBX:  "%ebx",
+	ESP:  "%esp",
+	EBP:  "%ebp",
+	ESI:  "%esi",
+	EDI:  "%edi",
+	R8L:  "%r8d",
+	R9L:  "%r9d",
+	R10L: "%r10d",
+	R11L: "%r11d",
+	R12L: "%r12d",
+	R13L: "%r13d",
+	R14L: "%r14d",
+	R15L: "%r15d",
+	RAX:  "%rax",
+	RCX:  "%rcx",
+	RDX:  "%rdx",
+	RBX:  "%rbx",
+	RSP:  "%rsp",
+	RBP:  "%rbp",
+	RSI:  "%rsi",
+	RDI:  "%rdi",
+	R8:   "%r8",
+	R9:   "%r9",
+	R10:  "%r10",
+	R11:  "%r11",
+	R12:  "%r12",
+	R13:  "%r13",
+	R14:  "%r14",
+	R15:  "%r15",
+	IP:   "%ip",
+	EIP:  "%eip",
+	RIP:  "%rip",
+	F0:   "%st",
+	F1:   "%st(1)",
+	F2:   "%st(2)",
+	F3:   "%st(3)",
+	F4:   "%st(4)",
+	F5:   "%st(5)",
+	F6:   "%st(6)",
+	F7:   "%st(7)",
+	M0:   "%mm0",
+	M1:   "%mm1",
+	M2:   "%mm2",
+	M3:   "%mm3",
+	M4:   "%mm4",
+	M5:   "%mm5",
+	M6:   "%mm6",
+	M7:   "%mm7",
+	X0:   "%xmm0",
+	X1:   "%xmm1",
+	X2:   "%xmm2",
+	X3:   "%xmm3",
+	X4:   "%xmm4",
+	X5:   "%xmm5",
+	X6:   "%xmm6",
+	X7:   "%xmm7",
+	X8:   "%xmm8",
+	X9:   "%xmm9",
+	X10:  "%xmm10",
+	X11:  "%xmm11",
+	X12:  "%xmm12",
+	X13:  "%xmm13",
+	X14:  "%xmm14",
+	X15:  "%xmm15",
+	CS:   "%cs",
+	SS:   "%ss",
+	DS:   "%ds",
+	ES:   "%es",
+	FS:   "%fs",
+	GS:   "%gs",
+	GDTR: "%gdtr",
+	IDTR: "%idtr",
+	LDTR: "%ldtr",
+	MSW:  "%msw",
+	TASK: "%task",
+	CR0:  "%cr0",
+	CR1:  "%cr1",
+	CR2:  "%cr2",
+	CR3:  "%cr3",
+	CR4:  "%cr4",
+	CR5:  "%cr5",
+	CR6:  "%cr6",
+	CR7:  "%cr7",
+	CR8:  "%cr8",
+	CR9:  "%cr9",
+	CR10: "%cr10",
+	CR11: "%cr11",
+	CR12: "%cr12",
+	CR13: "%cr13",
+	CR14: "%cr14",
+	CR15: "%cr15",
+	DR0:  "%db0",
+	DR1:  "%db1",
+	DR2:  "%db2",
+	DR3:  "%db3",
+	DR4:  "%db4",
+	DR5:  "%db5",
+	DR6:  "%db6",
+	DR7:  "%db7",
+	TR0:  "%tr0",
+	TR1:  "%tr1",
+	TR2:  "%tr2",
+	TR3:  "%tr3",
+	TR4:  "%tr4",
+	TR5:  "%tr5",
+	TR6:  "%tr6",
+	TR7:  "%tr7",
+}
+
+var gnuOp = map[Op]string{
+	CBW:       "cbtw",
+	CDQ:       "cltd",
+	CMPSD:     "cmpsl",
+	CMPSD_XMM: "cmpsd",
+	CWD:       "cwtd",
+	CWDE:      "cwtl",
+	CQO:       "cqto",
+	INSD:      "insl",
+	IRET:      "iretw",
+	IRETD:     "iret",
+	IRETQ:     "iretq",
+	LODSB:     "lods",
+	LODSD:     "lods",
+	LODSQ:     "lods",
+	LODSW:     "lods",
+	MOVSD:     "movsl",
+	MOVSD_XMM: "movsd",
+	OUTSD:     "outsl",
+	POPA:      "popaw",
+	POPAD:     "popa",
+	POPF:      "popfw",
+	POPFD:     "popf",
+	PUSHA:     "pushaw",
+	PUSHAD:    "pusha",
+	PUSHF:     "pushfw",
+	PUSHFD:    "pushf",
+	SCASB:     "scas",
+	SCASD:     "scas",
+	SCASQ:     "scas",
+	SCASW:     "scas",
+	STOSB:     "stos",
+	STOSD:     "stos",
+	STOSQ:     "stos",
+	STOSW:     "stos",
+	XLATB:     "xlat",
+}
+
+var cmppsOps = []string{
+	"cmpeq",
+	"cmplt",
+	"cmple",
+	"cmpunord",
+	"cmpneq",
+	"cmpnlt",
+	"cmpnle",
+	"cmpord",
+}
+
+var pclmulqOps = []string{
+	"pclmullqlqdq",
+	"pclmulhqlqdq",
+	"pclmullqhqdq",
+	"pclmulhqhqdq",
+}
+
+func countPrefix(inst *Inst, target Prefix) int {
+	n := 0
+	for _, p := range inst.Prefix {
+		if p&0xFF == target&0xFF {
+			n++
+		}
+	}
+	return n
+}
+
+func markLastImplicit(inst *Inst, prefix Prefix) bool {
+	for i := len(inst.Prefix) - 1; i >= 0; i-- {
+		p := inst.Prefix[i]
+		if p&0xFF == prefix {
+			inst.Prefix[i] |= PrefixImplicit
+			return true
+		}
+	}
+	return false
+}
+
+func unmarkImplicit(inst *Inst, prefix Prefix) {
+	for i := len(inst.Prefix) - 1; i >= 0; i-- {
+		p := inst.Prefix[i]
+		if p&0xFF == prefix {
+			inst.Prefix[i] &^= PrefixImplicit
+		}
+	}
+}
+
+func byteSizeSuffix(b int) string {
+	switch b {
+	case 1:
+		return "b"
+	case 2:
+		return "w"
+	case 4:
+		return "l"
+	case 8:
+		return "q"
+	}
+	return ""
+}
+
+func argBytes(inst *Inst, arg Arg) int {
+	if isMem(arg) {
+		return inst.MemBytes
+	}
+	return regBytes(arg)
+}
+
+func isFloat(op Op) bool {
+	switch op {
+	case FADD, FCOM, FCOMP, FDIV, FDIVR, FIADD, FICOM, FICOMP, FIDIV, FIDIVR, FILD, FIMUL, FIST, FISTP, FISTTP, FISUB, FISUBR, FLD, FMUL, FST, FSTP, FSUB, FSUBR:
+		return true
+	}
+	return false
+}
+
+func isFloatInt(op Op) bool {
+	switch op {
+	case FIADD, FICOM, FICOMP, FIDIV, FIDIVR, FILD, FIMUL, FIST, FISTP, FISTTP, FISUB, FISUBR:
+		return true
+	}
+	return false
+}
diff --git a/x86/x86asm/inst.go b/x86/x86asm/inst.go
new file mode 100644
index 0000000..ef74025
--- /dev/null
+++ b/x86/x86asm/inst.go
@@ -0,0 +1,643 @@
+// Copyright 2014 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Package x86asm implements decoding of x86 machine code.
+package x86asm
+
+import (
+	"bytes"
+	"fmt"
+)
+
+// An Inst is a single instruction.
+type Inst struct {
+	Prefix   Prefixes // Prefixes applied to the instruction.
+	Op       Op       // Opcode mnemonic
+	Opcode   uint32   // Encoded opcode bits, left aligned (first byte is Opcode>>24, etc)
+	Args     Args     // Instruction arguments, in Intel order
+	Mode     int      // processor mode in bits: 16, 32, or 64
+	AddrSize int      // address size in bits: 16, 32, or 64
+	DataSize int      // operand size in bits: 16, 32, or 64
+	MemBytes int      // size of memory argument in bytes: 1, 2, 4, 8, 16, and so on.
+	Len      int      // length of encoded instruction in bytes
+	PCRel    int      // length of PC-relative address in instruction encoding
+	PCRelOff int      // index of start of PC-relative address in instruction encoding
+}
+
+// Prefixes is an array of prefixes associated with a single instruction.
+// The prefixes are listed in the same order as found in the instruction:
+// each prefix byte corresponds to one slot in the array. The first zero
+// in the array marks the end of the prefixes.
+type Prefixes [14]Prefix
+
+// A Prefix represents an Intel instruction prefix.
+// The low 8 bits are the actual prefix byte encoding,
+// and the top 8 bits contain distinguishing bits and metadata.
+type Prefix uint16
+
+const (
+	// Metadata about the role of a prefix in an instruction.
+	PrefixImplicit Prefix = 0x8000 // prefix is implied by instruction text
+	PrefixIgnored  Prefix = 0x4000 // prefix is ignored: either irrelevant or overridden by a later prefix
+	PrefixInvalid  Prefix = 0x2000 // prefix makes entire instruction invalid (bad LOCK)
+
+	// Memory segment overrides.
+	PrefixES Prefix = 0x26 // ES segment override
+	PrefixCS Prefix = 0x2E // CS segment override
+	PrefixSS Prefix = 0x36 // SS segment override
+	PrefixDS Prefix = 0x3E // DS segment override
+	PrefixFS Prefix = 0x64 // FS segment override
+	PrefixGS Prefix = 0x65 // GS segment override
+
+	// Branch prediction.
+	PrefixPN Prefix = 0x12E // predict not taken (conditional branch only)
+	PrefixPT Prefix = 0x13E // predict taken (conditional branch only)
+
+	// Size attributes.
+	PrefixDataSize Prefix = 0x66 // operand size override
+	PrefixData16   Prefix = 0x166
+	PrefixData32   Prefix = 0x266
+	PrefixAddrSize Prefix = 0x67 // address size override
+	PrefixAddr16   Prefix = 0x167
+	PrefixAddr32   Prefix = 0x267
+
+	// One of a kind.
+	PrefixLOCK     Prefix = 0xF0 // lock
+	PrefixREPN     Prefix = 0xF2 // repeat not zero
+	PrefixXACQUIRE Prefix = 0x1F2
+	PrefixBND      Prefix = 0x2F2
+	PrefixREP      Prefix = 0xF3 // repeat
+	PrefixXRELEASE Prefix = 0x1F3
+
+	// The REX prefixes must be in the range [PrefixREX, PrefixREX+0x10).
+	// the other bits are set or not according to the intended use.
+	PrefixREX  Prefix = 0x40 // REX 64-bit extension prefix
+	PrefixREXW Prefix = 0x08 // extension bit W (64-bit instruction width)
+	PrefixREXR Prefix = 0x04 // extension bit R (r field in modrm)
+	PrefixREXX Prefix = 0x02 // extension bit X (index field in sib)
+	PrefixREXB Prefix = 0x01 // extension bit B (r/m field in modrm or base field in sib)
+)
+
+// IsREX reports whether p is a REX prefix byte.
+func (p Prefix) IsREX() bool {
+	return p&0xF0 == PrefixREX
+}
+
+func (p Prefix) String() string {
+	p &^= PrefixImplicit | PrefixIgnored | PrefixInvalid
+	if s := prefixNames[p]; s != "" {
+		return s
+	}
+
+	if p.IsREX() {
+		s := "REX."
+		if p&PrefixREXW != 0 {
+			s += "W"
+		}
+		if p&PrefixREXR != 0 {
+			s += "R"
+		}
+		if p&PrefixREXX != 0 {
+			s += "X"
+		}
+		if p&PrefixREXB != 0 {
+			s += "B"
+		}
+		return s
+	}
+
+	return fmt.Sprintf("Prefix(%#x)", int(p))
+}
+
+// An Op is an x86 opcode.
+type Op uint32
+
+func (op Op) String() string {
+	i := int(op)
+	if i < 0 || i >= len(opNames) || opNames[i] == "" {
+		return fmt.Sprintf("Op(%d)", i)
+	}
+	return opNames[i]
+}
+
+// An Args holds the instruction arguments.
+// If an instruction has fewer than 4 arguments,
+// the final elements in the array are nil.
+type Args [4]Arg
+
+// An Arg is a single instruction argument,
+// one of these types: Reg, Mem, Imm, Rel.
+type Arg interface {
+	String() string
+	isArg()
+}
+
+// Note that the implements of Arg that follow are all sized
+// so that on a 64-bit machine the data can be inlined in
+// the interface value instead of requiring an allocation.
+
+// A Reg is a single register.
+// The zero Reg value has no name but indicates ``no register.''
+type Reg uint8
+
+const (
+	_ Reg = iota
+
+	// 8-bit
+	AL
+	CL
+	DL
+	BL
+	AH
+	CH
+	DH
+	BH
+	SPB
+	BPB
+	SIB
+	DIB
+	R8B
+	R9B
+	R10B
+	R11B
+	R12B
+	R13B
+	R14B
+	R15B
+
+	// 16-bit
+	AX
+	CX
+	DX
+	BX
+	SP
+	BP
+	SI
+	DI
+	R8W
+	R9W
+	R10W
+	R11W
+	R12W
+	R13W
+	R14W
+	R15W
+
+	// 32-bit
+	EAX
+	ECX
+	EDX
+	EBX
+	ESP
+	EBP
+	ESI
+	EDI
+	R8L
+	R9L
+	R10L
+	R11L
+	R12L
+	R13L
+	R14L
+	R15L
+
+	// 64-bit
+	RAX
+	RCX
+	RDX
+	RBX
+	RSP
+	RBP
+	RSI
+	RDI
+	R8
+	R9
+	R10
+	R11
+	R12
+	R13
+	R14
+	R15
+
+	// Instruction pointer.
+	IP  // 16-bit
+	EIP // 32-bit
+	RIP // 64-bit
+
+	// 387 floating point registers.
+	F0
+	F1
+	F2
+	F3
+	F4
+	F5
+	F6
+	F7
+
+	// MMX registers.
+	M0
+	M1
+	M2
+	M3
+	M4
+	M5
+	M6
+	M7
+
+	// XMM registers.
+	X0
+	X1
+	X2
+	X3
+	X4
+	X5
+	X6
+	X7
+	X8
+	X9
+	X10
+	X11
+	X12
+	X13
+	X14
+	X15
+
+	// Segment registers.
+	ES
+	CS
+	SS
+	DS
+	FS
+	GS
+
+	// System registers.
+	GDTR
+	IDTR
+	LDTR
+	MSW
+	TASK
+
+	// Control registers.
+	CR0
+	CR1
+	CR2
+	CR3
+	CR4
+	CR5
+	CR6
+	CR7
+	CR8
+	CR9
+	CR10
+	CR11
+	CR12
+	CR13
+	CR14
+	CR15
+
+	// Debug registers.
+	DR0
+	DR1
+	DR2
+	DR3
+	DR4
+	DR5
+	DR6
+	DR7
+	DR8
+	DR9
+	DR10
+	DR11
+	DR12
+	DR13
+	DR14
+	DR15
+
+	// Task registers.
+	TR0
+	TR1
+	TR2
+	TR3
+	TR4
+	TR5
+	TR6
+	TR7
+)
+
+const regMax = TR7
+
+func (Reg) isArg() {}
+
+func (r Reg) String() string {
+	i := int(r)
+	if i < 0 || i >= len(regNames) || regNames[i] == "" {
+		return fmt.Sprintf("Reg(%d)", i)
+	}
+	return regNames[i]
+}
+
+// A Mem is a memory reference.
+// The general form is Segment:[Base+Scale*Index+Disp].
+type Mem struct {
+	Segment Reg
+	Base    Reg
+	Scale   uint8
+	Index   Reg
+	Disp    int64
+}
+
+func (Mem) isArg() {}
+
+func (m Mem) String() string {
+	var base, plus, scale, index, disp string
+
+	if m.Base != 0 {
+		base = m.Base.String()
+	}
+	if m.Scale != 0 {
+		if m.Base != 0 {
+			plus = "+"
+		}
+		if m.Scale > 1 {
+			scale = fmt.Sprintf("%d*", m.Scale)
+		}
+		index = m.Index.String()
+	}
+	if m.Disp != 0 || m.Base == 0 && m.Scale == 0 {
+		disp = fmt.Sprintf("%+#x", m.Disp)
+	}
+	return "[" + base + plus + scale + index + disp + "]"
+}
+
+// A Rel is an offset relative to the current instruction pointer.
+type Rel int32
+
+func (Rel) isArg() {}
+
+func (r Rel) String() string {
+	return fmt.Sprintf(".%+d", r)
+}
+
+// An Imm is an integer constant.
+type Imm int64
+
+func (Imm) isArg() {}
+
+func (i Imm) String() string {
+	return fmt.Sprintf("%#x", int64(i))
+}
+
+func (i Inst) String() string {
+	var buf bytes.Buffer
+	for _, p := range i.Prefix {
+		if p == 0 {
+			break
+		}
+		if p&PrefixImplicit != 0 {
+			continue
+		}
+		fmt.Fprintf(&buf, "%v ", p)
+	}
+	fmt.Fprintf(&buf, "%v", i.Op)
+	sep := " "
+	for _, v := range i.Args {
+		if v == nil {
+			break
+		}
+		fmt.Fprintf(&buf, "%s%v", sep, v)
+		sep = ", "
+	}
+	return buf.String()
+}
+
+func isReg(a Arg) bool {
+	_, ok := a.(Reg)
+	return ok
+}
+
+func isSegReg(a Arg) bool {
+	r, ok := a.(Reg)
+	return ok && ES <= r && r <= GS
+}
+
+func isMem(a Arg) bool {
+	_, ok := a.(Mem)
+	return ok
+}
+
+func isImm(a Arg) bool {
+	_, ok := a.(Imm)
+	return ok
+}
+
+func regBytes(a Arg) int {
+	r, ok := a.(Reg)
+	if !ok {
+		return 0
+	}
+	if AL <= r && r <= R15B {
+		return 1
+	}
+	if AX <= r && r <= R15W {
+		return 2
+	}
+	if EAX <= r && r <= R15L {
+		return 4
+	}
+	if RAX <= r && r <= R15 {
+		return 8
+	}
+	return 0
+}
+
+func isSegment(p Prefix) bool {
+	switch p {
+	case PrefixCS, PrefixDS, PrefixES, PrefixFS, PrefixGS, PrefixSS:
+		return true
+	}
+	return false
+}
+
+// The Op definitions and string list are in tables.go.
+
+var prefixNames = map[Prefix]string{
+	PrefixCS:       "CS",
+	PrefixDS:       "DS",
+	PrefixES:       "ES",
+	PrefixFS:       "FS",
+	PrefixGS:       "GS",
+	PrefixSS:       "SS",
+	PrefixLOCK:     "LOCK",
+	PrefixREP:      "REP",
+	PrefixREPN:     "REPN",
+	PrefixAddrSize: "ADDRSIZE",
+	PrefixDataSize: "DATASIZE",
+	PrefixAddr16:   "ADDR16",
+	PrefixData16:   "DATA16",
+	PrefixAddr32:   "ADDR32",
+	PrefixData32:   "DATA32",
+	PrefixBND:      "BND",
+	PrefixXACQUIRE: "XACQUIRE",
+	PrefixXRELEASE: "XRELEASE",
+	PrefixREX:      "REX",
+	PrefixPT:       "PT",
+	PrefixPN:       "PN",
+}
+
+var regNames = [...]string{
+	AL:   "AL",
+	CL:   "CL",
+	BL:   "BL",
+	DL:   "DL",
+	AH:   "AH",
+	CH:   "CH",
+	BH:   "BH",
+	DH:   "DH",
+	SPB:  "SPB",
+	BPB:  "BPB",
+	SIB:  "SIB",
+	DIB:  "DIB",
+	R8B:  "R8B",
+	R9B:  "R9B",
+	R10B: "R10B",
+	R11B: "R11B",
+	R12B: "R12B",
+	R13B: "R13B",
+	R14B: "R14B",
+	R15B: "R15B",
+	AX:   "AX",
+	CX:   "CX",
+	BX:   "BX",
+	DX:   "DX",
+	SP:   "SP",
+	BP:   "BP",
+	SI:   "SI",
+	DI:   "DI",
+	R8W:  "R8W",
+	R9W:  "R9W",
+	R10W: "R10W",
+	R11W: "R11W",
+	R12W: "R12W",
+	R13W: "R13W",
+	R14W: "R14W",
+	R15W: "R15W",
+	EAX:  "EAX",
+	ECX:  "ECX",
+	EDX:  "EDX",
+	EBX:  "EBX",
+	ESP:  "ESP",
+	EBP:  "EBP",
+	ESI:  "ESI",
+	EDI:  "EDI",
+	R8L:  "R8L",
+	R9L:  "R9L",
+	R10L: "R10L",
+	R11L: "R11L",
+	R12L: "R12L",
+	R13L: "R13L",
+	R14L: "R14L",
+	R15L: "R15L",
+	RAX:  "RAX",
+	RCX:  "RCX",
+	RDX:  "RDX",
+	RBX:  "RBX",
+	RSP:  "RSP",
+	RBP:  "RBP",
+	RSI:  "RSI",
+	RDI:  "RDI",
+	R8:   "R8",
+	R9:   "R9",
+	R10:  "R10",
+	R11:  "R11",
+	R12:  "R12",
+	R13:  "R13",
+	R14:  "R14",
+	R15:  "R15",
+	IP:   "IP",
+	EIP:  "EIP",
+	RIP:  "RIP",
+	F0:   "F0",
+	F1:   "F1",
+	F2:   "F2",
+	F3:   "F3",
+	F4:   "F4",
+	F5:   "F5",
+	F6:   "F6",
+	F7:   "F7",
+	M0:   "M0",
+	M1:   "M1",
+	M2:   "M2",
+	M3:   "M3",
+	M4:   "M4",
+	M5:   "M5",
+	M6:   "M6",
+	M7:   "M7",
+	X0:   "X0",
+	X1:   "X1",
+	X2:   "X2",
+	X3:   "X3",
+	X4:   "X4",
+	X5:   "X5",
+	X6:   "X6",
+	X7:   "X7",
+	X8:   "X8",
+	X9:   "X9",
+	X10:  "X10",
+	X11:  "X11",
+	X12:  "X12",
+	X13:  "X13",
+	X14:  "X14",
+	X15:  "X15",
+	CS:   "CS",
+	SS:   "SS",
+	DS:   "DS",
+	ES:   "ES",
+	FS:   "FS",
+	GS:   "GS",
+	GDTR: "GDTR",
+	IDTR: "IDTR",
+	LDTR: "LDTR",
+	MSW:  "MSW",
+	TASK: "TASK",
+	CR0:  "CR0",
+	CR1:  "CR1",
+	CR2:  "CR2",
+	CR3:  "CR3",
+	CR4:  "CR4",
+	CR5:  "CR5",
+	CR6:  "CR6",
+	CR7:  "CR7",
+	CR8:  "CR8",
+	CR9:  "CR9",
+	CR10: "CR10",
+	CR11: "CR11",
+	CR12: "CR12",
+	CR13: "CR13",
+	CR14: "CR14",
+	CR15: "CR15",
+	DR0:  "DR0",
+	DR1:  "DR1",
+	DR2:  "DR2",
+	DR3:  "DR3",
+	DR4:  "DR4",
+	DR5:  "DR5",
+	DR6:  "DR6",
+	DR7:  "DR7",
+	DR8:  "DR8",
+	DR9:  "DR9",
+	DR10: "DR10",
+	DR11: "DR11",
+	DR12: "DR12",
+	DR13: "DR13",
+	DR14: "DR14",
+	DR15: "DR15",
+	TR0:  "TR0",
+	TR1:  "TR1",
+	TR2:  "TR2",
+	TR3:  "TR3",
+	TR4:  "TR4",
+	TR5:  "TR5",
+	TR6:  "TR6",
+	TR7:  "TR7",
+}
diff --git a/x86/x86asm/inst_test.go b/x86/x86asm/inst_test.go
new file mode 100644
index 0000000..23ac523
--- /dev/null
+++ b/x86/x86asm/inst_test.go
@@ -0,0 +1,20 @@
+// Copyright 2014 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package x86asm
+
+import (
+	"strings"
+	"testing"
+)
+
+func TestRegString(t *testing.T) {
+	for r := Reg(1); r <= regMax; r++ {
+		if regNames[r] == "" {
+			t.Errorf("regNames[%d] is missing", int(r))
+		} else if s := r.String(); strings.Contains(s, "Reg(") {
+			t.Errorf("Reg(%d).String() = %s, want proper name", int(r), s)
+		}
+	}
+}
diff --git a/x86/x86asm/intel.go b/x86/x86asm/intel.go
new file mode 100644
index 0000000..90af9dd
--- /dev/null
+++ b/x86/x86asm/intel.go
@@ -0,0 +1,518 @@
+// Copyright 2014 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package x86asm
+
+import (
+	"fmt"
+	"strings"
+)
+
+// IntelSyntax returns the Intel assembler syntax for the instruction, as defined by Intel's XED tool.
+func IntelSyntax(inst Inst) string {
+	var iargs []Arg
+	for _, a := range inst.Args {
+		if a == nil {
+			break
+		}
+		iargs = append(iargs, a)
+	}
+
+	switch inst.Op {
+	case INSB, INSD, INSW, OUTSB, OUTSD, OUTSW, LOOPNE, JCXZ, JECXZ, JRCXZ, LOOP, LOOPE, MOV, XLATB:
+		if inst.Op == MOV && (inst.Opcode>>16)&0xFFFC != 0x0F20 {
+			break
+		}
+		for i, p := range inst.Prefix {
+			if p&0xFF == PrefixAddrSize {
+				inst.Prefix[i] &^= PrefixImplicit
+			}
+		}
+	}
+
+	switch inst.Op {
+	case MOV:
+		dst, _ := inst.Args[0].(Reg)
+		src, _ := inst.Args[1].(Reg)
+		if ES <= dst && dst <= GS && EAX <= src && src <= R15L {
+			src -= EAX - AX
+			iargs[1] = src
+		}
+		if ES <= dst && dst <= GS && RAX <= src && src <= R15 {
+			src -= RAX - AX
+			iargs[1] = src
+		}
+
+		if inst.Opcode>>24&^3 == 0xA0 {
+			for i, p := range inst.Prefix {
+				if p&0xFF == PrefixAddrSize {
+					inst.Prefix[i] |= PrefixImplicit
+				}
+			}
+		}
+	}
+
+	switch inst.Op {
+	case AAM, AAD:
+		if imm, ok := iargs[0].(Imm); ok {
+			if inst.DataSize == 32 {
+				iargs[0] = Imm(uint32(int8(imm)))
+			} else if inst.DataSize == 16 {
+				iargs[0] = Imm(uint16(int8(imm)))
+			}
+		}
+
+	case PUSH:
+		if imm, ok := iargs[0].(Imm); ok {
+			iargs[0] = Imm(uint32(imm))
+		}
+	}
+
+	for _, p := range inst.Prefix {
+		if p&PrefixImplicit != 0 {
+			for j, pj := range inst.Prefix {
+				if pj&0xFF == p&0xFF {
+					inst.Prefix[j] |= PrefixImplicit
+				}
+			}
+		}
+	}
+
+	if inst.Op != 0 {
+		for i, p := range inst.Prefix {
+			switch p &^ PrefixIgnored {
+			case PrefixData16, PrefixData32, PrefixCS, PrefixDS, PrefixES, PrefixSS:
+				inst.Prefix[i] |= PrefixImplicit
+			}
+			if p.IsREX() {
+				inst.Prefix[i] |= PrefixImplicit
+			}
+		}
+	}
+
+	if isLoop[inst.Op] || inst.Op == JCXZ || inst.Op == JECXZ || inst.Op == JRCXZ {
+		for i, p := range inst.Prefix {
+			if p == PrefixPT || p == PrefixPN {
+				inst.Prefix[i] |= PrefixImplicit
+			}
+		}
+	}
+
+	switch inst.Op {
+	case AAA, AAS, CBW, CDQE, CLC, CLD, CLI, CLTS, CMC, CPUID, CQO, CWD, DAA, DAS,
+		FDECSTP, FINCSTP, FNCLEX, FNINIT, FNOP, FWAIT, HLT,
+		ICEBP, INSB, INSD, INSW, INT, INTO, INVD, IRET, IRETQ,
+		LAHF, LEAVE, LRET, MONITOR, MWAIT, NOP, OUTSB, OUTSD, OUTSW,
+		PAUSE, POPA, POPF, POPFQ, PUSHA, PUSHF, PUSHFQ,
+		RDMSR, RDPMC, RDTSC, RDTSCP, RET, RSM,
+		SAHF, STC, STD, STI, SYSENTER, SYSEXIT, SYSRET,
+		UD2, WBINVD, WRMSR, XEND, XLATB, XTEST:
+
+		if inst.Op == NOP && inst.Opcode>>24 != 0x90 {
+			break
+		}
+		if inst.Op == RET && inst.Opcode>>24 != 0xC3 {
+			break
+		}
+		if inst.Op == INT && inst.Opcode>>24 != 0xCC {
+			break
+		}
+		if inst.Op == LRET && inst.Opcode>>24 != 0xcb {
+			break
+		}
+		for i, p := range inst.Prefix {
+			if p&0xFF == PrefixDataSize {
+				inst.Prefix[i] &^= PrefixImplicit | PrefixIgnored
+			}
+		}
+
+	case 0:
+		// ok
+	}
+
+	switch inst.Op {
+	case INSB, INSD, INSW, OUTSB, OUTSD, OUTSW, MONITOR, MWAIT, XLATB:
+		iargs = nil
+
+	case STOSB, STOSW, STOSD, STOSQ:
+		iargs = iargs[:1]
+
+	case LODSB, LODSW, LODSD, LODSQ, SCASB, SCASW, SCASD, SCASQ:
+		iargs = iargs[1:]
+	}
+
+	const (
+		haveData16 = 1 << iota
+		haveData32
+		haveAddr16
+		haveAddr32
+		haveXacquire
+		haveXrelease
+		haveLock
+		haveHintTaken
+		haveHintNotTaken
+		haveBnd
+	)
+	var prefixBits uint32
+	prefix := ""
+	for _, p := range inst.Prefix {
+		if p == 0 {
+			break
+		}
+		if p&0xFF == 0xF3 {
+			prefixBits &^= haveBnd
+		}
+		if p&(PrefixImplicit|PrefixIgnored) != 0 {
+			continue
+		}
+		switch p {
+		default:
+			prefix += strings.ToLower(p.String()) + " "
+		case PrefixCS, PrefixDS, PrefixES, PrefixFS, PrefixGS, PrefixSS:
+			if inst.Op == 0 {
+				prefix += strings.ToLower(p.String()) + " "
+			}
+		case PrefixREPN:
+			prefix += "repne "
+		case PrefixLOCK:
+			prefixBits |= haveLock
+		case PrefixData16, PrefixDataSize:
+			prefixBits |= haveData16
+		case PrefixData32:
+			prefixBits |= haveData32
+		case PrefixAddrSize, PrefixAddr16:
+			prefixBits |= haveAddr16
+		case PrefixAddr32:
+			prefixBits |= haveAddr32
+		case PrefixXACQUIRE:
+			prefixBits |= haveXacquire
+		case PrefixXRELEASE:
+			prefixBits |= haveXrelease
+		case PrefixPT:
+			prefixBits |= haveHintTaken
+		case PrefixPN:
+			prefixBits |= haveHintNotTaken
+		case PrefixBND:
+			prefixBits |= haveBnd
+		}
+	}
+	switch inst.Op {
+	case JMP:
+		if inst.Opcode>>24 == 0xEB {
+			prefixBits &^= haveBnd
+		}
+	case RET, LRET:
+		prefixBits &^= haveData16 | haveData32
+	}
+
+	if prefixBits&haveXacquire != 0 {
+		prefix += "xacquire "
+	}
+	if prefixBits&haveXrelease != 0 {
+		prefix += "xrelease "
+	}
+	if prefixBits&haveLock != 0 {
+		prefix += "lock "
+	}
+	if prefixBits&haveBnd != 0 {
+		prefix += "bnd "
+	}
+	if prefixBits&haveHintTaken != 0 {
+		prefix += "hint-taken "
+	}
+	if prefixBits&haveHintNotTaken != 0 {
+		prefix += "hint-not-taken "
+	}
+	if prefixBits&haveAddr16 != 0 {
+		prefix += "addr16 "
+	}
+	if prefixBits&haveAddr32 != 0 {
+		prefix += "addr32 "
+	}
+	if prefixBits&haveData16 != 0 {
+		prefix += "data16 "
+	}
+	if prefixBits&haveData32 != 0 {
+		prefix += "data32 "
+	}
+
+	if inst.Op == 0 {
+		if prefix == "" {
+			return "<no instruction>"
+		}
+		return prefix[:len(prefix)-1]
+	}
+
+	var args []string
+	for _, a := range iargs {
+		if a == nil {
+			break
+		}
+		args = append(args, intelArg(&inst, a))
+	}
+
+	var op string
+	switch inst.Op {
+	case NOP:
+		if inst.Opcode>>24 == 0x0F {
+			if inst.DataSize == 16 {
+				args = append(args, "ax")
+			} else {
+				args = append(args, "eax")
+			}
+		}
+
+	case BLENDVPD, BLENDVPS, PBLENDVB:
+		args = args[:2]
+
+	case INT:
+		if inst.Opcode>>24 == 0xCC {
+			args = nil
+			op = "int3"
+		}
+
+	case LCALL, LJMP:
+		if len(args) == 2 {
+			args[0], args[1] = args[1], args[0]
+		}
+
+	case FCHS, FABS, FTST, FLDPI, FLDL2E, FLDLG2, F2XM1, FXAM, FLD1, FLDL2T, FSQRT, FRNDINT, FCOS, FSIN:
+		if len(args) == 0 {
+			args = append(args, "st0")
+		}
+
+	case FPTAN, FSINCOS, FUCOMPP, FCOMPP, FYL2X, FPATAN, FXTRACT, FPREM1, FPREM, FYL2XP1, FSCALE:
+		if len(args) == 0 {
+			args = []string{"st0", "st1"}
+		}
+
+	case FST, FSTP, FISTTP, FIST, FISTP, FBSTP:
+		if len(args) == 1 {
+			args = append(args, "st0")
+		}
+
+	case FLD, FXCH, FCOM, FCOMP, FIADD, FIMUL, FICOM, FICOMP, FISUBR, FIDIV, FUCOM, FUCOMP, FILD, FBLD, FADD, FMUL, FSUB, FSUBR, FISUB, FDIV, FDIVR, FIDIVR:
+		if len(args) == 1 {
+			args = []string{"st0", args[0]}
+		}
+
+	case MASKMOVDQU, MASKMOVQ, XLATB, OUTSB, OUTSW, OUTSD:
+	FixSegment:
+		for i := len(inst.Prefix) - 1; i >= 0; i-- {
+			p := inst.Prefix[i] & 0xFF
+			switch p {
+			case PrefixCS, PrefixES, PrefixFS, PrefixGS, PrefixSS:
+				if inst.Mode != 64 || p == PrefixFS || p == PrefixGS {
+					args = append(args, strings.ToLower((inst.Prefix[i] & 0xFF).String()))
+					break FixSegment
+				}
+			case PrefixDS:
+				if inst.Mode != 64 {
+					break FixSegment
+				}
+			}
+		}
+	}
+
+	if op == "" {
+		op = intelOp[inst.Op]
+	}
+	if op == "" {
+		op = strings.ToLower(inst.Op.String())
+	}
+	if args != nil {
+		op += " " + strings.Join(args, ", ")
+	}
+	return prefix + op
+}
+
+func intelArg(inst *Inst, arg Arg) string {
+	switch a := arg.(type) {
+	case Imm:
+		if inst.Mode == 32 {
+			return fmt.Sprintf("%#x", uint32(a))
+		}
+		if Imm(int32(a)) == a {
+			return fmt.Sprintf("%#x", int64(a))
+		}
+		return fmt.Sprintf("%#x", uint64(a))
+	case Mem:
+		if a.Base == EIP {
+			a.Base = RIP
+		}
+		prefix := ""
+		switch inst.MemBytes {
+		case 1:
+			prefix = "byte "
+		case 2:
+			prefix = "word "
+		case 4:
+			prefix = "dword "
+		case 8:
+			prefix = "qword "
+		case 16:
+			prefix = "xmmword "
+		}
+		switch inst.Op {
+		case INVLPG:
+			prefix = "byte "
+		case STOSB, MOVSB, CMPSB, LODSB, SCASB:
+			prefix = "byte "
+		case STOSW, MOVSW, CMPSW, LODSW, SCASW:
+			prefix = "word "
+		case STOSD, MOVSD, CMPSD, LODSD, SCASD:
+			prefix = "dword "
+		case STOSQ, MOVSQ, CMPSQ, LODSQ, SCASQ:
+			prefix = "qword "
+		case LAR:
+			prefix = "word "
+		case BOUND:
+			if inst.Mode == 32 {
+				prefix = "qword "
+			} else {
+				prefix = "dword "
+			}
+		case PREFETCHW, PREFETCHNTA, PREFETCHT0, PREFETCHT1, PREFETCHT2, CLFLUSH:
+			prefix = "zmmword "
+		}
+		switch inst.Op {
+		case MOVSB, MOVSW, MOVSD, MOVSQ, CMPSB, CMPSW, CMPSD, CMPSQ, STOSB, STOSW, STOSD, STOSQ, SCASB, SCASW, SCASD, SCASQ, LODSB, LODSW, LODSD, LODSQ:
+			switch a.Base {
+			case DI, EDI, RDI:
+				if a.Segment == ES {
+					a.Segment = 0
+				}
+			case SI, ESI, RSI:
+				if a.Segment == DS {
+					a.Segment = 0
+				}
+			}
+		case LEA:
+			a.Segment = 0
+		default:
+			switch a.Base {
+			case SP, ESP, RSP, BP, EBP, RBP:
+				if a.Segment == SS {
+					a.Segment = 0
+				}
+			default:
+				if a.Segment == DS {
+					a.Segment = 0
+				}
+			}
+		}
+
+		if inst.Mode == 64 && a.Segment != FS && a.Segment != GS {
+			a.Segment = 0
+		}
+
+		prefix += "ptr "
+		if a.Segment != 0 {
+			prefix += strings.ToLower(a.Segment.String()) + ":"
+		}
+		prefix += "["
+		if a.Base != 0 {
+			prefix += intelArg(inst, a.Base)
+		}
+		if a.Scale != 0 && a.Index != 0 {
+			if a.Base != 0 {
+				prefix += "+"
+			}
+			prefix += fmt.Sprintf("%s*%d", intelArg(inst, a.Index), a.Scale)
+		}
+		if a.Disp != 0 {
+			if prefix[len(prefix)-1] == '[' && (a.Disp >= 0 || int64(int32(a.Disp)) != a.Disp) {
+				prefix += fmt.Sprintf("%#x", uint64(a.Disp))
+			} else {
+				prefix += fmt.Sprintf("%+#x", a.Disp)
+			}
+		}
+		prefix += "]"
+		return prefix
+	case Rel:
+		return fmt.Sprintf(".%+#x", int64(a))
+	case Reg:
+		if int(a) < len(intelReg) && intelReg[a] != "" {
+			return intelReg[a]
+		}
+	}
+	return strings.ToLower(arg.String())
+}
+
+var intelOp = map[Op]string{
+	JAE:       "jnb",
+	JA:        "jnbe",
+	JGE:       "jnl",
+	JNE:       "jnz",
+	JG:        "jnle",
+	JE:        "jz",
+	SETAE:     "setnb",
+	SETA:      "setnbe",
+	SETGE:     "setnl",
+	SETNE:     "setnz",
+	SETG:      "setnle",
+	SETE:      "setz",
+	CMOVAE:    "cmovnb",
+	CMOVA:     "cmovnbe",
+	CMOVGE:    "cmovnl",
+	CMOVNE:    "cmovnz",
+	CMOVG:     "cmovnle",
+	CMOVE:     "cmovz",
+	LCALL:     "call far",
+	LJMP:      "jmp far",
+	LRET:      "ret far",
+	ICEBP:     "int1",
+	MOVSD_XMM: "movsd",
+	XLATB:     "xlat",
+}
+
+var intelReg = [...]string{
+	F0:  "st0",
+	F1:  "st1",
+	F2:  "st2",
+	F3:  "st3",
+	F4:  "st4",
+	F5:  "st5",
+	F6:  "st6",
+	F7:  "st7",
+	M0:  "mmx0",
+	M1:  "mmx1",
+	M2:  "mmx2",
+	M3:  "mmx3",
+	M4:  "mmx4",
+	M5:  "mmx5",
+	M6:  "mmx6",
+	M7:  "mmx7",
+	X0:  "xmm0",
+	X1:  "xmm1",
+	X2:  "xmm2",
+	X3:  "xmm3",
+	X4:  "xmm4",
+	X5:  "xmm5",
+	X6:  "xmm6",
+	X7:  "xmm7",
+	X8:  "xmm8",
+	X9:  "xmm9",
+	X10: "xmm10",
+	X11: "xmm11",
+	X12: "xmm12",
+	X13: "xmm13",
+	X14: "xmm14",
+	X15: "xmm15",
+
+	// TODO: Maybe the constants are named wrong.
+	SPB: "spl",
+	BPB: "bpl",
+	SIB: "sil",
+	DIB: "dil",
+
+	R8L:  "r8d",
+	R9L:  "r9d",
+	R10L: "r10d",
+	R11L: "r11d",
+	R12L: "r12d",
+	R13L: "r13d",
+	R14L: "r14d",
+	R15L: "r15d",
+}
diff --git a/x86/x86asm/objdump_test.go b/x86/x86asm/objdump_test.go
new file mode 100644
index 0000000..3d4e146
--- /dev/null
+++ b/x86/x86asm/objdump_test.go
@@ -0,0 +1,385 @@
+// Copyright 2014 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package x86asm
+
+import (
+	"bytes"
+	"strings"
+	"testing"
+)
+
+func TestObjdump32Manual(t *testing.T)   { testObjdump32(t, hexCases(t, objdumpManualTests)) }
+func TestObjdump32Testdata(t *testing.T) { testObjdump32(t, concat(basicPrefixes, testdataCases(t))) }
+func TestObjdump32ModRM(t *testing.T)    { testObjdump32(t, concat(basicPrefixes, enumModRM)) }
+func TestObjdump32OneByte(t *testing.T)  { testBasic(t, testObjdump32) }
+func TestObjdump320F(t *testing.T)       { testBasic(t, testObjdump32, 0x0F) }
+func TestObjdump320F38(t *testing.T)     { testBasic(t, testObjdump32, 0x0F, 0x38) }
+func TestObjdump320F3A(t *testing.T)     { testBasic(t, testObjdump32, 0x0F, 0x3A) }
+func TestObjdump32Prefix(t *testing.T)   { testPrefix(t, testObjdump32) }
+
+func TestObjdump64Manual(t *testing.T)   { testObjdump64(t, hexCases(t, objdumpManualTests)) }
+func TestObjdump64Testdata(t *testing.T) { testObjdump64(t, concat(basicPrefixes, testdataCases(t))) }
+func TestObjdump64ModRM(t *testing.T)    { testObjdump64(t, concat(basicPrefixes, enumModRM)) }
+func TestObjdump64OneByte(t *testing.T)  { testBasic(t, testObjdump64) }
+func TestObjdump640F(t *testing.T)       { testBasic(t, testObjdump64, 0x0F) }
+func TestObjdump640F38(t *testing.T)     { testBasic(t, testObjdump64, 0x0F, 0x38) }
+func TestObjdump640F3A(t *testing.T)     { testBasic(t, testObjdump64, 0x0F, 0x3A) }
+func TestObjdump64Prefix(t *testing.T)   { testPrefix(t, testObjdump64) }
+
+func TestObjdump64REXTestdata(t *testing.T) {
+	testObjdump64(t, filter(concat3(basicPrefixes, rexPrefixes, testdataCases(t)), isValidREX))
+}
+func TestObjdump64REXModRM(t *testing.T) {
+	testObjdump64(t, concat3(basicPrefixes, rexPrefixes, enumModRM))
+}
+func TestObjdump64REXOneByte(t *testing.T) { testBasicREX(t, testObjdump64) }
+func TestObjdump64REX0F(t *testing.T)      { testBasicREX(t, testObjdump64, 0x0F) }
+func TestObjdump64REX0F38(t *testing.T)    { testBasicREX(t, testObjdump64, 0x0F, 0x38) }
+func TestObjdump64REX0F3A(t *testing.T)    { testBasicREX(t, testObjdump64, 0x0F, 0x3A) }
+func TestObjdump64REXPrefix(t *testing.T)  { testPrefixREX(t, testObjdump64) }
+
+// objdumpManualTests holds test cases that will be run by TestObjdumpManual.
+// If you are debugging a few cases that turned up in a longer run, it can be useful
+// to list them here and then use -run=ObjdumpManual, particularly with tracing enabled.
+var objdumpManualTests = `
+4883FE017413
+488DFC2500000000
+488D3D00000000
+`
+
+// allowedMismatchObjdump reports whether the mismatch between text and dec
+// should be allowed by the test.
+func allowedMismatchObjdump(text string, size int, inst *Inst, dec ExtInst) bool {
+	if size == 15 && dec.nenc == 15 && contains(text, "truncated") && contains(dec.text, "(bad)") {
+		return true
+	}
+
+	if i := strings.LastIndex(dec.text, " "); isPrefix(dec.text[i+1:]) && size == 1 && isPrefix(text) {
+		return true
+	}
+
+	if size == dec.nenc && contains(dec.text, "movupd") && contains(dec.text, "data32") {
+		s := strings.Replace(dec.text, "data32 ", "", -1)
+		if text == s {
+			return true
+		}
+	}
+
+	// Simplify our invalid instruction text.
+	if text == "error: unrecognized instruction" {
+		text = "BAD"
+	}
+
+	// Invalid instructions for which libopcodes prints %? register.
+	// FF E8 11 22 33 44:
+	// Invalid instructions for which libopcodes prints "internal disassembler error".
+	// Invalid instructions for which libopcodes prints 8087 only (e.g., DB E0)
+	// or prints 287 only (e.g., DB E4).
+	if contains(dec.text, "%?", "<internal disassembler error>", "(8087 only)", "(287 only)") {
+		dec.text = "(bad)"
+	}
+
+	// 0F 19 11, 0F 1C 11, 0F 1D 11, 0F 1E 11, 0F 1F 11: libopcodes says nop,
+	// but the Intel manuals say that the only NOP there is 0F 1F /0.
+	// Perhaps libopcodes is reporting an older encoding.
+	i := bytes.IndexByte(dec.enc[:], 0x0F)
+	if contains(dec.text, "nop") && i >= 0 && i+2 < len(dec.enc) && dec.enc[i+1]&^7 == 0x18 && (dec.enc[i+1] != 0x1F || (dec.enc[i+2]>>3)&7 != 0) {
+		dec.text = "(bad)"
+	}
+
+	// Any invalid instruction.
+	if text == "BAD" && contains(dec.text, "(bad)") {
+		return true
+	}
+
+	// Instructions libopcodes knows but we do not (e.g., 0F 19 11).
+	if (text == "BAD" || size == 1 && isPrefix(text)) && hasPrefix(dec.text, unsupported...) {
+		return true
+	}
+
+	// Instructions we know but libopcodes does not (e.g., 0F D0 11).
+	if (contains(dec.text, "(bad)") || dec.nenc == 1 && isPrefix(dec.text)) && hasPrefix(text, libopcodesUnsupported...) {
+		return true
+	}
+
+	// Libopcodes rejects F2 90 as NOP. Not sure why.
+	if (contains(dec.text, "(bad)") || dec.nenc == 1 && isPrefix(dec.text)) && inst.Opcode>>24 == 0x90 && countPrefix(inst, 0xF2) > 0 {
+		return true
+	}
+
+	// 0F 20 11, 0F 21 11, 0F 22 11, 0F 23 11, 0F 24 11:
+	// Moves into and out of some control registers seem to be unsupported by libopcodes.
+	// TODO(rsc): Are they invalid somehow?
+	if (contains(dec.text, "(bad)") || dec.nenc == 1 && isPrefix(dec.text)) && contains(text, "%cr", "%db", "%tr") {
+		return true
+	}
+
+	if contains(dec.text, "fwait") && dec.nenc == 1 && dec.enc[0] != 0x9B {
+		return true
+	}
+
+	// 9B D9 11: libopcodes reports FSTSW instead of FWAIT + FNSTSW.
+	// This is correct in that FSTSW is a pseudo-op for the pair, but it really
+	// is a pair of instructions: execution can stop between them.
+	// Our decoder chooses to separate them.
+	if (text == "fwait" || strings.HasSuffix(text, " fwait")) && dec.nenc >= len(strings.Fields(text)) && dec.enc[len(strings.Fields(text))-1] == 0x9B {
+		return true
+	}
+
+	// 0F 18 77 11:
+	// Invalid instructions for which libopcodes prints "nop/reserved".
+	// Perhaps libopcodes is reporting an older encoding.
+	if text == "BAD" && contains(dec.text, "nop/reserved") {
+		return true
+	}
+
+	// 0F C7 B0 11 22 33 44: libopcodes says vmptrld 0x44332211(%eax); we say rdrand %eax.
+	// TODO(rsc): Fix, since we are probably wrong, but we don't have vmptrld in the manual.
+	if contains(text, "rdrand") && contains(dec.text, "vmptrld", "vmxon", "vmclear") {
+		return true
+	}
+
+	// DD C8: libopcodes says FNOP but the Intel manual is clear FNOP is only D9 D0.
+	// Perhaps libopcodes is reporting an older encoding.
+	if text == "BAD" && contains(dec.text, "fnop") && (dec.enc[0] != 0xD9 || dec.enc[1] != 0xD0) {
+		return true
+	}
+
+	// 66 90: libopcodes says xchg %ax,%ax; we say 'data16 nop'.
+	// The 16-bit swap will preserve the high bits of the register,
+	// so they are the same.
+	if contains(text, "nop") && contains(dec.text, "xchg %ax,%ax") {
+		return true
+	}
+
+	// If there are multiple prefixes, allow libopcodes to use an alternate name.
+	if size == 1 && dec.nenc == 1 && prefixByte[text] > 0 && prefixByte[text] == prefixByte[dec.text] {
+		return true
+	}
+
+	// 26 9B: libopcodes reports "fwait"/1, ignoring segment prefix.
+	// https://sourceware.org/bugzilla/show_bug.cgi?id=16891
+	// F0 82: Decode="lock"/1 but libopcodes="lock (bad)"/2.
+	if size == 1 && dec.nenc >= 1 && prefixByte[text] == dec.enc[0] && contains(dec.text, "(bad)", "fwait", "fnop") {
+		return true
+	}
+
+	// libopcodes interprets 660f801122 as taking a rel16 but
+	// truncating the address at 16 bits. Not sure what is correct.
+	if contains(text, ".+0x2211", ".+0x11") && contains(dec.text, " .-") {
+		return true
+	}
+
+	// 66 F3 0F D6 C5, 66 F2 0F D6 C0: libopcodes reports use of XMM register instead of MMX register,
+	// but only when the instruction has a 66 prefix. Maybe they know something we don't.
+	if countPrefix(inst, 0x66) > 0 && contains(dec.text, "movdq2q", "movq2dq") && !contains(dec.text, "%mm") {
+		return true
+	}
+
+	// 0F 01 F8, 0F 05, 0F 07: these are 64-bit instructions but libopcodes accepts them.
+	if (text == "BAD" || size == 1 && isPrefix(text)) && contains(dec.text, "swapgs", "syscall", "sysret", "rdfsbase", "rdgsbase", "wrfsbase", "wrgsbase") {
+		return true
+	}
+
+	return false
+}
+
+// Instructions known to libopcodes (or xed) but not to us.
+// Most of these come from supplementary manuals of one form or another.
+var unsupported = strings.Fields(`
+	bndc
+	bndl
+	bndm
+	bnds
+	clac
+	clgi
+	femms
+	fldln
+	fldz
+	getsec
+	invlpga
+	kmov
+	montmul
+	pavg
+	pf2i
+	pfacc
+	pfadd
+	pfcmp
+	pfmax
+	pfmin
+	pfmul
+	pfna
+	pfpnac
+	pfrc
+	pfrs
+	pfsub
+	phadd
+	phsub
+	pi2f
+	pmulhr
+	prefetch
+	pswap
+	ptest
+	rdseed
+	sha1
+	sha256
+	skinit
+	stac
+	stgi
+	vadd
+	vand
+	vcmp
+	vcomis
+	vcvt
+	vcvt
+	vdiv
+	vhadd
+	vhsub
+	vld
+	vmax
+	vmcall
+	vmfunc
+	vmin
+	vmlaunch
+	vmload
+	vmmcall
+	vmov
+	vmov
+	vmov
+	vmptrld
+	vmptrst
+	vmread
+	vmresume
+	vmrun
+	vmsave
+	vmul
+	vmwrite
+	vmxoff
+	vor
+	vpack
+	vpadd
+	vpand
+	vpavg
+	vpcmp
+	vpcmp
+	vpins
+	vpmadd
+	vpmax
+	vpmin
+	vpmul
+	vpmul
+	vpor
+	vpsad
+	vpshuf
+	vpsll
+	vpsra
+	vpsrad
+	vpsrl
+	vpsub
+	vpunp
+	vpxor
+	vrcp
+	vrsqrt
+	vshuf
+	vsqrt
+	vsub
+	vucomis
+	vunp
+	vxor
+	vzero
+	xcrypt
+	xsha1
+	xsha256
+	xstore-rng
+	insertq
+	extrq
+	vmclear
+	invvpid
+	adox
+	vmxon
+	invept
+	adcx
+	vmclear
+	prefetchwt1
+	enclu
+	encls
+	salc
+	fstpnce
+	fdisi8087_nop
+	fsetpm287_nop
+	feni8087_nop
+	syscall
+	sysret
+`)
+
+// Instructions known to us but not to libopcodes (at least in binutils 2.24).
+var libopcodesUnsupported = strings.Fields(`
+	addsubps
+	aes
+	blend
+	cvttpd2dq
+	dpp
+	extract
+	haddps
+	hsubps
+	insert
+	invpcid
+	lddqu
+	movmsk
+	movnt
+	movq2dq
+	mps
+	pack
+	pblend
+	pclmul
+	pcmp
+	pext
+	phmin
+	pins
+	pmax
+	pmin
+	pmov
+	pmovmsk
+	pmul
+	popcnt
+	pslld
+	psllq
+	psllw
+	psrad
+	psraw
+	psrl
+	ptest
+	punpck
+	round
+	xrstor
+	xsavec
+	xsaves
+	comis
+	ucomis
+	movhps
+	movntps
+	rsqrt
+	rcpp
+	puncpck
+	bsf
+	movq2dq
+	cvttpd2dq
+	movq
+	hsubpd
+	movdqa
+	movhpd
+	addsubpd
+	movd
+	haddpd
+	cvtps2dq
+	bsr
+	cvtdq2ps
+	rdrand
+	maskmov
+	movq2dq
+	movlhps
+	movbe
+	movlpd
+`)
diff --git a/x86/x86asm/objdumpext_test.go b/x86/x86asm/objdumpext_test.go
new file mode 100644
index 0000000..37a5513
--- /dev/null
+++ b/x86/x86asm/objdumpext_test.go
@@ -0,0 +1,314 @@
+// Copyright 2014 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package x86asm
+
+import (
+	"bytes"
+	"debug/elf"
+	"encoding/binary"
+	"fmt"
+	"io"
+	"log"
+	"os"
+	"strconv"
+	"strings"
+	"testing"
+)
+
+// Apologies for the proprietary path, but we need objdump 2.24 + some committed patches that will land in 2.25.
+const objdumpPath = "/Users/rsc/bin/objdump2"
+
+func testObjdump32(t *testing.T, generate func(func([]byte))) {
+	testObjdumpArch(t, generate, 32)
+}
+
+func testObjdump64(t *testing.T, generate func(func([]byte))) {
+	testObjdumpArch(t, generate, 64)
+}
+
+func testObjdumpArch(t *testing.T, generate func(func([]byte)), arch int) {
+	if testing.Short() {
+		t.Skip("skipping objdump test in short mode")
+	}
+
+	if _, err := os.Stat(objdumpPath); err != nil {
+		t.Fatal(err)
+	}
+
+	testExtDis(t, "gnu", arch, objdump, generate, allowedMismatchObjdump)
+}
+
+func objdump(ext *ExtDis) error {
+	// File already written with instructions; add ELF header.
+	if ext.Arch == 32 {
+		if err := writeELF32(ext.File, ext.Size); err != nil {
+			return err
+		}
+	} else {
+		if err := writeELF64(ext.File, ext.Size); err != nil {
+			return err
+		}
+	}
+
+	b, err := ext.Run(objdumpPath, "-d", "-z", ext.File.Name())
+	if err != nil {
+		return err
+	}
+
+	var (
+		nmatch  int
+		reading bool
+		next    uint32 = start
+		addr    uint32
+		encbuf  [32]byte
+		enc     []byte
+		text    string
+	)
+	flush := func() {
+		if addr == next {
+			switch text {
+			case "repz":
+				text = "rep"
+			case "repnz":
+				text = "repn"
+			default:
+				text = strings.Replace(text, "repz ", "rep ", -1)
+				text = strings.Replace(text, "repnz ", "repn ", -1)
+			}
+			if m := pcrelw.FindStringSubmatch(text); m != nil {
+				targ, _ := strconv.ParseUint(m[2], 16, 64)
+				text = fmt.Sprintf("%s .%+#x", m[1], int16(uint32(targ)-uint32(uint16(addr))-uint32(len(enc))))
+			}
+			if m := pcrel.FindStringSubmatch(text); m != nil {
+				targ, _ := strconv.ParseUint(m[2], 16, 64)
+				text = fmt.Sprintf("%s .%+#x", m[1], int32(uint32(targ)-addr-uint32(len(enc))))
+			}
+			text = strings.Replace(text, "0x0(", "(", -1)
+			text = strings.Replace(text, "%st(0)", "%st", -1)
+
+			ext.Dec <- ExtInst{addr, encbuf, len(enc), text}
+			encbuf = [32]byte{}
+			enc = nil
+			next += 32
+		}
+	}
+	var textangle = []byte("<.text>:")
+	for {
+		line, err := b.ReadSlice('\n')
+		if err != nil {
+			if err == io.EOF {
+				break
+			}
+			return fmt.Errorf("reading objdump output: %v", err)
+		}
+		if bytes.Contains(line, textangle) {
+			reading = true
+			continue
+		}
+		if !reading {
+			continue
+		}
+		if debug {
+			os.Stdout.Write(line)
+		}
+		if enc1 := parseContinuation(line, encbuf[:len(enc)]); enc1 != nil {
+			enc = enc1
+			continue
+		}
+		flush()
+		nmatch++
+		addr, enc, text = parseLine(line, encbuf[:0])
+		if addr > next {
+			return fmt.Errorf("address out of sync expected <= %#x at %q in:\n%s", next, line, line)
+		}
+	}
+	flush()
+	if next != start+uint32(ext.Size) {
+		return fmt.Errorf("not enough results found [%d %d]", next, start+ext.Size)
+	}
+	if err := ext.Wait(); err != nil {
+		return fmt.Errorf("exec: %v", err)
+	}
+
+	return nil
+}
+
+func parseLine(line []byte, encstart []byte) (addr uint32, enc []byte, text string) {
+	oline := line
+	i := index(line, ":\t")
+	if i < 0 {
+		log.Fatalf("cannot parse disassembly: %q", oline)
+	}
+	x, err := strconv.ParseUint(string(trimSpace(line[:i])), 16, 32)
+	if err != nil {
+		log.Fatalf("cannot parse disassembly: %q", oline)
+	}
+	addr = uint32(x)
+	line = line[i+2:]
+	i = bytes.IndexByte(line, '\t')
+	if i < 0 {
+		log.Fatalf("cannot parse disassembly: %q", oline)
+	}
+	enc, ok := parseHex(line[:i], encstart)
+	if !ok {
+		log.Fatalf("cannot parse disassembly: %q", oline)
+	}
+	line = trimSpace(line[i:])
+	if i := bytes.IndexByte(line, '#'); i >= 0 {
+		line = trimSpace(line[:i])
+	}
+	text = string(fixSpace(line))
+	return
+}
+
+func parseContinuation(line []byte, enc []byte) []byte {
+	i := index(line, ":\t")
+	if i < 0 {
+		return nil
+	}
+	line = line[i+1:]
+	enc, _ = parseHex(line, enc)
+	return enc
+}
+
+// writeELF32 writes an ELF32 header to the file,
+// describing a text segment that starts at start
+// and extends for size bytes.
+func writeELF32(f *os.File, size int) error {
+	f.Seek(0, 0)
+	var hdr elf.Header32
+	var prog elf.Prog32
+	var sect elf.Section32
+	var buf bytes.Buffer
+	binary.Write(&buf, binary.LittleEndian, &hdr)
+	off1 := buf.Len()
+	binary.Write(&buf, binary.LittleEndian, &prog)
+	off2 := buf.Len()
+	binary.Write(&buf, binary.LittleEndian, &sect)
+	off3 := buf.Len()
+	buf.Reset()
+	data := byte(elf.ELFDATA2LSB)
+	hdr = elf.Header32{
+		Ident:     [16]byte{0x7F, 'E', 'L', 'F', 1, data, 1},
+		Type:      2,
+		Machine:   uint16(elf.EM_386),
+		Version:   1,
+		Entry:     start,
+		Phoff:     uint32(off1),
+		Shoff:     uint32(off2),
+		Flags:     0x05000002,
+		Ehsize:    uint16(off1),
+		Phentsize: uint16(off2 - off1),
+		Phnum:     1,
+		Shentsize: uint16(off3 - off2),
+		Shnum:     3,
+		Shstrndx:  2,
+	}
+	binary.Write(&buf, binary.LittleEndian, &hdr)
+	prog = elf.Prog32{
+		Type:   1,
+		Off:    start,
+		Vaddr:  start,
+		Paddr:  start,
+		Filesz: uint32(size),
+		Memsz:  uint32(size),
+		Flags:  5,
+		Align:  start,
+	}
+	binary.Write(&buf, binary.LittleEndian, &prog)
+	binary.Write(&buf, binary.LittleEndian, &sect) // NULL section
+	sect = elf.Section32{
+		Name:      1,
+		Type:      uint32(elf.SHT_PROGBITS),
+		Addr:      start,
+		Off:       start,
+		Size:      uint32(size),
+		Flags:     uint32(elf.SHF_ALLOC | elf.SHF_EXECINSTR),
+		Addralign: 4,
+	}
+	binary.Write(&buf, binary.LittleEndian, &sect) // .text
+	sect = elf.Section32{
+		Name:      uint32(len("\x00.text\x00")),
+		Type:      uint32(elf.SHT_STRTAB),
+		Addr:      0,
+		Off:       uint32(off2 + (off3-off2)*3),
+		Size:      uint32(len("\x00.text\x00.shstrtab\x00")),
+		Addralign: 1,
+	}
+	binary.Write(&buf, binary.LittleEndian, &sect)
+	buf.WriteString("\x00.text\x00.shstrtab\x00")
+	f.Write(buf.Bytes())
+	return nil
+}
+
+// writeELF64 writes an ELF64 header to the file,
+// describing a text segment that starts at start
+// and extends for size bytes.
+func writeELF64(f *os.File, size int) error {
+	f.Seek(0, 0)
+	var hdr elf.Header64
+	var prog elf.Prog64
+	var sect elf.Section64
+	var buf bytes.Buffer
+	binary.Write(&buf, binary.LittleEndian, &hdr)
+	off1 := buf.Len()
+	binary.Write(&buf, binary.LittleEndian, &prog)
+	off2 := buf.Len()
+	binary.Write(&buf, binary.LittleEndian, &sect)
+	off3 := buf.Len()
+	buf.Reset()
+	data := byte(elf.ELFDATA2LSB)
+	hdr = elf.Header64{
+		Ident:     [16]byte{0x7F, 'E', 'L', 'F', 2, data, 1},
+		Type:      2,
+		Machine:   uint16(elf.EM_X86_64),
+		Version:   1,
+		Entry:     start,
+		Phoff:     uint64(off1),
+		Shoff:     uint64(off2),
+		Flags:     0x05000002,
+		Ehsize:    uint16(off1),
+		Phentsize: uint16(off2 - off1),
+		Phnum:     1,
+		Shentsize: uint16(off3 - off2),
+		Shnum:     3,
+		Shstrndx:  2,
+	}
+	binary.Write(&buf, binary.LittleEndian, &hdr)
+	prog = elf.Prog64{
+		Type:   1,
+		Off:    start,
+		Vaddr:  start,
+		Paddr:  start,
+		Filesz: uint64(size),
+		Memsz:  uint64(size),
+		Flags:  5,
+		Align:  start,
+	}
+	binary.Write(&buf, binary.LittleEndian, &prog)
+	binary.Write(&buf, binary.LittleEndian, &sect) // NULL section
+	sect = elf.Section64{
+		Name:      1,
+		Type:      uint32(elf.SHT_PROGBITS),
+		Addr:      start,
+		Off:       start,
+		Size:      uint64(size),
+		Flags:     uint64(elf.SHF_ALLOC | elf.SHF_EXECINSTR),
+		Addralign: 4,
+	}
+	binary.Write(&buf, binary.LittleEndian, &sect) // .text
+	sect = elf.Section64{
+		Name:      uint32(len("\x00.text\x00")),
+		Type:      uint32(elf.SHT_STRTAB),
+		Addr:      0,
+		Off:       uint64(off2 + (off3-off2)*3),
+		Size:      uint64(len("\x00.text\x00.shstrtab\x00")),
+		Addralign: 1,
+	}
+	binary.Write(&buf, binary.LittleEndian, &sect)
+	buf.WriteString("\x00.text\x00.shstrtab\x00")
+	f.Write(buf.Bytes())
+	return nil
+}
diff --git a/x86/x86asm/plan9ext_test.go b/x86/x86asm/plan9ext_test.go
new file mode 100644
index 0000000..21f5bfd
--- /dev/null
+++ b/x86/x86asm/plan9ext_test.go
@@ -0,0 +1,120 @@
+// Copyright 2014 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package x86asm
+
+import (
+	"bytes"
+	"fmt"
+	"io"
+	"log"
+	"os"
+	"strconv"
+	"testing"
+)
+
+const plan9Path = "testdata/libmach8db"
+
+func testPlan9Arch(t *testing.T, arch int, generate func(func([]byte))) {
+	if testing.Short() {
+		t.Skip("skipping libmach test in short mode")
+	}
+
+	if _, err := os.Stat(plan9Path); err != nil {
+		t.Fatal(err)
+	}
+
+	testExtDis(t, "plan9", arch, plan9, generate, allowedMismatchPlan9)
+}
+
+func testPlan932(t *testing.T, generate func(func([]byte))) {
+	testPlan9Arch(t, 32, generate)
+}
+
+func testPlan964(t *testing.T, generate func(func([]byte))) {
+	testPlan9Arch(t, 64, generate)
+}
+
+func plan9(ext *ExtDis) error {
+	flag := "-8"
+	if ext.Arch == 64 {
+		flag = "-6"
+	}
+	b, err := ext.Run(plan9Path, flag, ext.File.Name())
+	if err != nil {
+		return err
+	}
+
+	nmatch := 0
+	next := uint32(start)
+	var (
+		addr   uint32
+		encbuf [32]byte
+		enc    []byte
+		text   string
+	)
+
+	for {
+		line, err := b.ReadSlice('\n')
+		if err != nil {
+			if err == io.EOF {
+				break
+			}
+			return fmt.Errorf("reading libmach8db output: %v", err)
+		}
+		if debug {
+			os.Stdout.Write(line)
+		}
+		nmatch++
+		addr, enc, text = parseLinePlan9(line, encbuf[:0])
+		if addr > next {
+			return fmt.Errorf("address out of sync expected <= %#x at %q in:\n%s", next, line, line)
+		}
+		if addr < next {
+			continue
+		}
+		if m := pcrelw.FindStringSubmatch(text); m != nil {
+			targ, _ := strconv.ParseUint(m[2], 16, 64)
+			text = fmt.Sprintf("%s .%+#x", m[1], int16(uint32(targ)-uint32(uint16(addr))-uint32(len(enc))))
+		}
+		if m := pcrel.FindStringSubmatch(text); m != nil {
+			targ, _ := strconv.ParseUint(m[2], 16, 64)
+			text = fmt.Sprintf("%s .%+#x", m[1], int32(uint32(targ)-addr-uint32(len(enc))))
+		}
+		ext.Dec <- ExtInst{addr, encbuf, len(enc), text}
+		encbuf = [32]byte{}
+		enc = nil
+		next += 32
+	}
+	if next != start+uint32(ext.Size) {
+		return fmt.Errorf("not enough results found [%d %d]", next, start+ext.Size)
+	}
+	if err := ext.Wait(); err != nil {
+		return fmt.Errorf("exec: %v", err)
+	}
+
+	return nil
+}
+
+func parseLinePlan9(line []byte, encstart []byte) (addr uint32, enc []byte, text string) {
+	i := bytes.IndexByte(line, ' ')
+	if i < 0 || line[0] != '0' || line[1] != 'x' {
+		log.Fatalf("cannot parse disassembly: %q", line)
+	}
+	j := bytes.IndexByte(line[i+1:], ' ')
+	if j < 0 {
+		log.Fatalf("cannot parse disassembly: %q", line)
+	}
+	j += i + 1
+	x, err := strconv.ParseUint(string(trimSpace(line[2:i])), 16, 32)
+	if err != nil {
+		log.Fatalf("cannot parse disassembly: %q", line)
+	}
+	addr = uint32(x)
+	enc, ok := parseHex(line[i+1:j], encstart)
+	if !ok {
+		log.Fatalf("cannot parse disassembly: %q", line)
+	}
+	return addr, enc, string(fixSpace(line[j+1:]))
+}
diff --git a/x86/x86asm/plan9x.go b/x86/x86asm/plan9x.go
new file mode 100644
index 0000000..ccbdea4
--- /dev/null
+++ b/x86/x86asm/plan9x.go
@@ -0,0 +1,346 @@
+// Copyright 2014 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package x86asm
+
+import (
+	"fmt"
+	"strings"
+)
+
+// plan9Syntax returns the Go assembler syntax for the instruction.
+// The syntax was originally defined by Plan 9.
+// The pc is the program counter of the instruction, used for expanding
+// PC-relative addresses into absolute ones.
+// The symname function queries the symbol table for the program
+// being disassembled. Given a target address it returns the name and base
+// address of the symbol containing the target, if any; otherwise it returns "", 0.
+func plan9Syntax(inst Inst, pc uint64, symname func(uint64) (string, uint64)) string {
+	if symname == nil {
+		symname = func(uint64) (string, uint64) { return "", 0 }
+	}
+	var args []string
+	for i := len(inst.Args) - 1; i >= 0; i-- {
+		a := inst.Args[i]
+		if a == nil {
+			continue
+		}
+		args = append(args, plan9Arg(&inst, pc, symname, a))
+	}
+
+	var last Prefix
+	for _, p := range inst.Prefix {
+		if p == 0 || p.IsREX() {
+			break
+		}
+		last = p
+	}
+
+	prefix := ""
+	switch last & 0xFF {
+	case 0, 0x66, 0x67:
+		// ignore
+	case PrefixREPN:
+		prefix += "REPNE "
+	default:
+		prefix += last.String() + " "
+	}
+
+	op := inst.Op.String()
+	if plan9Suffix[inst.Op] {
+		switch inst.DataSize {
+		case 8:
+			op += "B"
+		case 16:
+			op += "W"
+		case 32:
+			op += "L"
+		case 64:
+			op += "Q"
+		}
+	}
+
+	if args != nil {
+		op += " " + strings.Join(args, ", ")
+	}
+
+	return prefix + op
+}
+
+func plan9Arg(inst *Inst, pc uint64, symname func(uint64) (string, uint64), arg Arg) string {
+	switch a := arg.(type) {
+	case Reg:
+		return plan9Reg[a]
+	case Rel:
+		if pc == 0 {
+			break
+		}
+		// If the absolute address is the start of a symbol, use the name.
+		// Otherwise use the raw address, so that things like relative
+		// jumps show up as JMP 0x123 instead of JMP f+10(SB).
+		// It is usually easier to search for 0x123 than to do the mental
+		// arithmetic to find f+10.
+		addr := pc + uint64(inst.Len) + uint64(a)
+		if s, base := symname(addr); s != "" && addr == base {
+			return fmt.Sprintf("%s(SB)", s)
+		}
+		return fmt.Sprintf("%#x", addr)
+
+	case Imm:
+		if s, base := symname(uint64(a)); s != "" {
+			suffix := ""
+			if uint64(a) != base {
+				suffix = fmt.Sprintf("%+d", uint64(a)-base)
+			}
+			return fmt.Sprintf("$%s%s(SB)", s, suffix)
+		}
+		if inst.Mode == 32 {
+			return fmt.Sprintf("$%#x", uint32(a))
+		}
+		if Imm(int32(a)) == a {
+			return fmt.Sprintf("$%#x", int64(a))
+		}
+		return fmt.Sprintf("$%#x", uint64(a))
+	case Mem:
+		if a.Segment == 0 && a.Disp != 0 && a.Base == 0 && (a.Index == 0 || a.Scale == 0) {
+			if s, base := symname(uint64(a.Disp)); s != "" {
+				suffix := ""
+				if uint64(a.Disp) != base {
+					suffix = fmt.Sprintf("%+d", uint64(a.Disp)-base)
+				}
+				return fmt.Sprintf("%s%s(SB)", s, suffix)
+			}
+		}
+		s := ""
+		if a.Segment != 0 {
+			s += fmt.Sprintf("%s:", plan9Reg[a.Segment])
+		}
+		if a.Disp != 0 {
+			s += fmt.Sprintf("%#x", a.Disp)
+		} else {
+			s += "0"
+		}
+		if a.Base != 0 {
+			s += fmt.Sprintf("(%s)", plan9Reg[a.Base])
+		}
+		if a.Index != 0 && a.Scale != 0 {
+			s += fmt.Sprintf("(%s*%d)", plan9Reg[a.Index], a.Scale)
+		}
+		return s
+	}
+	return arg.String()
+}
+
+var plan9Suffix = [maxOp + 1]bool{
+	ADC:       true,
+	ADD:       true,
+	AND:       true,
+	BSF:       true,
+	BSR:       true,
+	BT:        true,
+	BTC:       true,
+	BTR:       true,
+	BTS:       true,
+	CMP:       true,
+	CMPXCHG:   true,
+	CVTSI2SD:  true,
+	CVTSI2SS:  true,
+	CVTSD2SI:  true,
+	CVTSS2SI:  true,
+	CVTTSD2SI: true,
+	CVTTSS2SI: true,
+	DEC:       true,
+	DIV:       true,
+	FLDENV:    true,
+	FRSTOR:    true,
+	IDIV:      true,
+	IMUL:      true,
+	IN:        true,
+	INC:       true,
+	LEA:       true,
+	MOV:       true,
+	MOVNTI:    true,
+	MUL:       true,
+	NEG:       true,
+	NOP:       true,
+	NOT:       true,
+	OR:        true,
+	OUT:       true,
+	POP:       true,
+	POPA:      true,
+	PUSH:      true,
+	PUSHA:     true,
+	RCL:       true,
+	RCR:       true,
+	ROL:       true,
+	ROR:       true,
+	SAR:       true,
+	SBB:       true,
+	SHL:       true,
+	SHLD:      true,
+	SHR:       true,
+	SHRD:      true,
+	SUB:       true,
+	TEST:      true,
+	XADD:      true,
+	XCHG:      true,
+	XOR:       true,
+}
+
+var plan9Reg = [...]string{
+	AL:   "AL",
+	CL:   "CL",
+	BL:   "BL",
+	DL:   "DL",
+	AH:   "AH",
+	CH:   "CH",
+	BH:   "BH",
+	DH:   "DH",
+	SPB:  "SP",
+	BPB:  "BP",
+	SIB:  "SI",
+	DIB:  "DI",
+	R8B:  "R8",
+	R9B:  "R9",
+	R10B: "R10",
+	R11B: "R11",
+	R12B: "R12",
+	R13B: "R13",
+	R14B: "R14",
+	R15B: "R15",
+	AX:   "AX",
+	CX:   "CX",
+	BX:   "BX",
+	DX:   "DX",
+	SP:   "SP",
+	BP:   "BP",
+	SI:   "SI",
+	DI:   "DI",
+	R8W:  "R8",
+	R9W:  "R9",
+	R10W: "R10",
+	R11W: "R11",
+	R12W: "R12",
+	R13W: "R13",
+	R14W: "R14",
+	R15W: "R15",
+	EAX:  "AX",
+	ECX:  "CX",
+	EDX:  "DX",
+	EBX:  "BX",
+	ESP:  "SP",
+	EBP:  "BP",
+	ESI:  "SI",
+	EDI:  "DI",
+	R8L:  "R8",
+	R9L:  "R9",
+	R10L: "R10",
+	R11L: "R11",
+	R12L: "R12",
+	R13L: "R13",
+	R14L: "R14",
+	R15L: "R15",
+	RAX:  "AX",
+	RCX:  "CX",
+	RDX:  "DX",
+	RBX:  "BX",
+	RSP:  "SP",
+	RBP:  "BP",
+	RSI:  "SI",
+	RDI:  "DI",
+	R8:   "R8",
+	R9:   "R9",
+	R10:  "R10",
+	R11:  "R11",
+	R12:  "R12",
+	R13:  "R13",
+	R14:  "R14",
+	R15:  "R15",
+	IP:   "IP",
+	EIP:  "IP",
+	RIP:  "IP",
+	F0:   "F0",
+	F1:   "F1",
+	F2:   "F2",
+	F3:   "F3",
+	F4:   "F4",
+	F5:   "F5",
+	F6:   "F6",
+	F7:   "F7",
+	M0:   "M0",
+	M1:   "M1",
+	M2:   "M2",
+	M3:   "M3",
+	M4:   "M4",
+	M5:   "M5",
+	M6:   "M6",
+	M7:   "M7",
+	X0:   "X0",
+	X1:   "X1",
+	X2:   "X2",
+	X3:   "X3",
+	X4:   "X4",
+	X5:   "X5",
+	X6:   "X6",
+	X7:   "X7",
+	X8:   "X8",
+	X9:   "X9",
+	X10:  "X10",
+	X11:  "X11",
+	X12:  "X12",
+	X13:  "X13",
+	X14:  "X14",
+	X15:  "X15",
+	CS:   "CS",
+	SS:   "SS",
+	DS:   "DS",
+	ES:   "ES",
+	FS:   "FS",
+	GS:   "GS",
+	GDTR: "GDTR",
+	IDTR: "IDTR",
+	LDTR: "LDTR",
+	MSW:  "MSW",
+	TASK: "TASK",
+	CR0:  "CR0",
+	CR1:  "CR1",
+	CR2:  "CR2",
+	CR3:  "CR3",
+	CR4:  "CR4",
+	CR5:  "CR5",
+	CR6:  "CR6",
+	CR7:  "CR7",
+	CR8:  "CR8",
+	CR9:  "CR9",
+	CR10: "CR10",
+	CR11: "CR11",
+	CR12: "CR12",
+	CR13: "CR13",
+	CR14: "CR14",
+	CR15: "CR15",
+	DR0:  "DR0",
+	DR1:  "DR1",
+	DR2:  "DR2",
+	DR3:  "DR3",
+	DR4:  "DR4",
+	DR5:  "DR5",
+	DR6:  "DR6",
+	DR7:  "DR7",
+	DR8:  "DR8",
+	DR9:  "DR9",
+	DR10: "DR10",
+	DR11: "DR11",
+	DR12: "DR12",
+	DR13: "DR13",
+	DR14: "DR14",
+	DR15: "DR15",
+	TR0:  "TR0",
+	TR1:  "TR1",
+	TR2:  "TR2",
+	TR3:  "TR3",
+	TR4:  "TR4",
+	TR5:  "TR5",
+	TR6:  "TR6",
+	TR7:  "TR7",
+}
diff --git a/x86/x86asm/plan9x_test.go b/x86/x86asm/plan9x_test.go
new file mode 100644
index 0000000..f2ea28c
--- /dev/null
+++ b/x86/x86asm/plan9x_test.go
@@ -0,0 +1,54 @@
+// Copyright 2014 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package x86asm
+
+import (
+	"strings"
+	"testing"
+)
+
+func TestPlan932Manual(t *testing.T)   { testPlan932(t, hexCases(t, plan9ManualTests)) }
+func TestPlan932Testdata(t *testing.T) { testPlan932(t, concat(basicPrefixes, testdataCases(t))) }
+func TestPlan932ModRM(t *testing.T)    { testPlan932(t, concat(basicPrefixes, enumModRM)) }
+func TestPlan932OneByte(t *testing.T)  { testBasic(t, testPlan932) }
+func TestPlan9320F(t *testing.T)       { testBasic(t, testPlan932, 0x0F) }
+func TestPlan9320F38(t *testing.T)     { testBasic(t, testPlan932, 0x0F, 0x38) }
+func TestPlan9320F3A(t *testing.T)     { testBasic(t, testPlan932, 0x0F, 0x3A) }
+func TestPlan932Prefix(t *testing.T)   { testPrefix(t, testPlan932) }
+
+func TestPlan964Manual(t *testing.T)   { testPlan964(t, hexCases(t, plan9ManualTests)) }
+func TestPlan964Testdata(t *testing.T) { testPlan964(t, concat(basicPrefixes, testdataCases(t))) }
+func TestPlan964ModRM(t *testing.T)    { testPlan964(t, concat(basicPrefixes, enumModRM)) }
+func TestPlan964OneByte(t *testing.T)  { testBasic(t, testPlan964) }
+func TestPlan9640F(t *testing.T)       { testBasic(t, testPlan964, 0x0F) }
+func TestPlan9640F38(t *testing.T)     { testBasic(t, testPlan964, 0x0F, 0x38) }
+func TestPlan9640F3A(t *testing.T)     { testBasic(t, testPlan964, 0x0F, 0x3A) }
+func TestPlan964Prefix(t *testing.T)   { testPrefix(t, testPlan964) }
+
+func TestPlan964REXTestdata(t *testing.T) {
+	testPlan964(t, filter(concat3(basicPrefixes, rexPrefixes, testdataCases(t)), isValidREX))
+}
+func TestPlan964REXModRM(t *testing.T)   { testPlan964(t, concat3(basicPrefixes, rexPrefixes, enumModRM)) }
+func TestPlan964REXOneByte(t *testing.T) { testBasicREX(t, testPlan964) }
+func TestPlan964REX0F(t *testing.T)      { testBasicREX(t, testPlan964, 0x0F) }
+func TestPlan964REX0F38(t *testing.T)    { testBasicREX(t, testPlan964, 0x0F, 0x38) }
+func TestPlan964REX0F3A(t *testing.T)    { testBasicREX(t, testPlan964, 0x0F, 0x3A) }
+func TestPlan964REXPrefix(t *testing.T)  { testPrefixREX(t, testPlan964) }
+
+// plan9ManualTests holds test cases that will be run by TestPlan9Manual32 and TestPlan9Manual64.
+// If you are debugging a few cases that turned up in a longer run, it can be useful
+// to list them here and then use -run=Plan9Manual, particularly with tracing enabled.
+var plan9ManualTests = `
+`
+
+// allowedMismatchPlan9 reports whether the mismatch between text and dec
+// should be allowed by the test.
+func allowedMismatchPlan9(text string, size int, inst *Inst, dec ExtInst) bool {
+	return false
+}
+
+// Instructions known to us but not to plan9.
+var plan9Unsupported = strings.Fields(`
+`)
diff --git a/x86/x86asm/tables.go b/x86/x86asm/tables.go
new file mode 100644
index 0000000..3d08d5e
--- /dev/null
+++ b/x86/x86asm/tables.go
@@ -0,0 +1,9760 @@
+// DO NOT EDIT
+// generated by: x86map -fmt=decoder ../x86.csv
+
+package x86asm
+
+var decoder = [...]uint16{
+	uint16(xFail),
+	/*1*/ uint16(xCondByte), 243,
+	0x00, 490,
+	0x01, 496,
+	0x02, 525,
+	0x03, 531,
+	0x04, 560,
+	0x05, 566,
+	0x06, 595,
+	0x07, 602,
+	0x08, 609,
+	0x09, 615,
+	0x0A, 644,
+	0x0B, 650,
+	0x0C, 679,
+	0x0D, 685,
+	0x0E, 714,
+	0x0F, 721,
+	0x10, 8026,
+	0x11, 8032,
+	0x12, 8061,
+	0x13, 8067,
+	0x14, 8096,
+	0x15, 8102,
+	0x16, 8131,
+	0x17, 8138,
+	0x18, 8145,
+	0x19, 8151,
+	0x1A, 8180,
+	0x1B, 8186,
+	0x1C, 8215,
+	0x1D, 8221,
+	0x1E, 8250,
+	0x1F, 8257,
+	0x20, 8264,
+	0x21, 8270,
+	0x22, 8299,
+	0x23, 8305,
+	0x24, 8334,
+	0x25, 8340,
+	0x27, 8369,
+	0x28, 8375,
+	0x29, 8381,
+	0x2A, 8410,
+	0x2B, 8416,
+	0x2C, 8445,
+	0x2D, 8451,
+	0x2F, 8480,
+	0x30, 8486,
+	0x31, 8492,
+	0x32, 8521,
+	0x33, 8527,
+	0x34, 8556,
+	0x35, 8562,
+	0x37, 8591,
+	0x38, 8597,
+	0x39, 8603,
+	0x3A, 8632,
+	0x3B, 8638,
+	0x3C, 8667,
+	0x3D, 8673,
+	0x3F, 8702,
+	0x40, 8708,
+	0x41, 8708,
+	0x42, 8708,
+	0x43, 8708,
+	0x44, 8708,
+	0x45, 8708,
+	0x46, 8708,
+	0x47, 8708,
+	0x48, 8723,
+	0x49, 8723,
+	0x4a, 8723,
+	0x4b, 8723,
+	0x4c, 8723,
+	0x4d, 8723,
+	0x4e, 8723,
+	0x4f, 8723,
+	0x50, 8738,
+	0x51, 8738,
+	0x52, 8738,
+	0x53, 8738,
+	0x54, 8738,
+	0x55, 8738,
+	0x56, 8738,
+	0x57, 8738,
+	0x58, 8765,
+	0x59, 8765,
+	0x5a, 8765,
+	0x5b, 8765,
+	0x5c, 8765,
+	0x5d, 8765,
+	0x5e, 8765,
+	0x5f, 8765,
+	0x60, 8792,
+	0x61, 8805,
+	0x62, 8818,
+	0x63, 8837,
+	0x68, 8868,
+	0x69, 8887,
+	0x6A, 8922,
+	0x6B, 8927,
+	0x6C, 8962,
+	0x6D, 8965,
+	0x6E, 8978,
+	0x6F, 8981,
+	0x70, 8994,
+	0x71, 8999,
+	0x72, 9004,
+	0x73, 9009,
+	0x74, 9014,
+	0x75, 9019,
+	0x76, 9024,
+	0x77, 9029,
+	0x78, 9034,
+	0x79, 9039,
+	0x7A, 9044,
+	0x7B, 9049,
+	0x7C, 9054,
+	0x7D, 9059,
+	0x7E, 9064,
+	0x7F, 9069,
+	0x80, 9074,
+	0x81, 9131,
+	0x83, 9372,
+	0x84, 9613,
+	0x85, 9619,
+	0x86, 9648,
+	0x87, 9654,
+	0x88, 9683,
+	0x89, 9689,
+	0x8A, 9711,
+	0x8B, 9717,
+	0x8C, 9739,
+	0x8D, 9768,
+	0x8E, 9797,
+	0x8F, 9826,
+	0x90, 9862,
+	0x91, 9862,
+	0x92, 9862,
+	0x93, 9862,
+	0x94, 9862,
+	0x95, 9862,
+	0x96, 9862,
+	0x97, 9862,
+	0x98, 9888,
+	0x99, 9908,
+	0x9A, 9928,
+	0x9B, 9945,
+	0x9C, 9948,
+	0x9D, 9971,
+	0x9E, 9994,
+	0x9F, 9997,
+	0xA0, 10000,
+	0xA1, 10019,
+	0xA2, 10041,
+	0xA3, 10060,
+	0xA4, 10082,
+	0xA5, 10085,
+	0xA6, 10105,
+	0xA7, 10108,
+	0xA8, 10128,
+	0xA9, 10134,
+	0xAA, 10163,
+	0xAB, 10166,
+	0xAC, 10186,
+	0xAD, 10189,
+	0xAE, 10209,
+	0xAF, 10212,
+	0xb0, 10232,
+	0xb1, 10232,
+	0xb2, 10232,
+	0xb3, 10232,
+	0xb4, 10232,
+	0xb5, 10232,
+	0xb6, 10232,
+	0xb7, 10232,
+	0xb8, 10238,
+	0xb9, 10238,
+	0xba, 10238,
+	0xbb, 10238,
+	0xbc, 10238,
+	0xbd, 10238,
+	0xbe, 10238,
+	0xbf, 10238,
+	0xC0, 10267,
+	0xC1, 10318,
+	0xC2, 10516,
+	0xC3, 10521,
+	0xC4, 10524,
+	0xC5, 10543,
+	0xC6, 10562,
+	0xC7, 10586,
+	0xC8, 10647,
+	0xC9, 10654,
+	0xCA, 10677,
+	0xCB, 10682,
+	0xCC, 10685,
+	0xCD, 10689,
+	0xCE, 10694,
+	0xCF, 10700,
+	0xD0, 10720,
+	0xD1, 10764,
+	0xD2, 10955,
+	0xD3, 10999,
+	0xD4, 11190,
+	0xD5, 11198,
+	0xD7, 11206,
+	0xD8, 11219,
+	0xD9, 11428,
+	0xDA, 11637,
+	0xDB, 11769,
+	0xDC, 11940,
+	0xDD, 12109,
+	0xDE, 12248,
+	0xDF, 12422,
+	0xE0, 12533,
+	0xE1, 12538,
+	0xE2, 12543,
+	0xE3, 12548,
+	0xE4, 12574,
+	0xE5, 12580,
+	0xE6, 12602,
+	0xE7, 12608,
+	0xE8, 12630,
+	0xE9, 12661,
+	0xEA, 12692,
+	0xEB, 12709,
+	0xEC, 12714,
+	0xED, 12719,
+	0xEE, 12738,
+	0xEF, 12743,
+	0xF1, 12762,
+	0xF4, 12765,
+	0xF5, 12768,
+	0xF6, 12771,
+	0xF7, 12810,
+	0xF8, 12986,
+	0xF9, 12989,
+	0xFA, 12992,
+	0xFB, 12995,
+	0xFC, 12998,
+	0xFD, 13001,
+	0xFE, 13004,
+	0xFF, 13021,
+	uint16(xFail),
+	/*490*/ uint16(xSetOp), uint16(ADD),
+	/*492*/ uint16(xReadSlashR),
+	/*493*/ uint16(xArgRM8),
+	/*494*/ uint16(xArgR8),
+	/*495*/ uint16(xMatch),
+	/*496*/ uint16(xCondIs64), 499, 515,
+	/*499*/ uint16(xCondDataSize), 503, 509, 0,
+	/*503*/ uint16(xSetOp), uint16(ADD),
+	/*505*/ uint16(xReadSlashR),
+	/*506*/ uint16(xArgRM16),
+	/*507*/ uint16(xArgR16),
+	/*508*/ uint16(xMatch),
+	/*509*/ uint16(xSetOp), uint16(ADD),
+	/*511*/ uint16(xReadSlashR),
+	/*512*/ uint16(xArgRM32),
+	/*513*/ uint16(xArgR32),
+	/*514*/ uint16(xMatch),
+	/*515*/ uint16(xCondDataSize), 503, 509, 519,
+	/*519*/ uint16(xSetOp), uint16(ADD),
+	/*521*/ uint16(xReadSlashR),
+	/*522*/ uint16(xArgRM64),
+	/*523*/ uint16(xArgR64),
+	/*524*/ uint16(xMatch),
+	/*525*/ uint16(xSetOp), uint16(ADD),
+	/*527*/ uint16(xReadSlashR),
+	/*528*/ uint16(xArgR8),
+	/*529*/ uint16(xArgRM8),
+	/*530*/ uint16(xMatch),
+	/*531*/ uint16(xCondIs64), 534, 550,
+	/*534*/ uint16(xCondDataSize), 538, 544, 0,
+	/*538*/ uint16(xSetOp), uint16(ADD),
+	/*540*/ uint16(xReadSlashR),
+	/*541*/ uint16(xArgR16),
+	/*542*/ uint16(xArgRM16),
+	/*543*/ uint16(xMatch),
+	/*544*/ uint16(xSetOp), uint16(ADD),
+	/*546*/ uint16(xReadSlashR),
+	/*547*/ uint16(xArgR32),
+	/*548*/ uint16(xArgRM32),
+	/*549*/ uint16(xMatch),
+	/*550*/ uint16(xCondDataSize), 538, 544, 554,
+	/*554*/ uint16(xSetOp), uint16(ADD),
+	/*556*/ uint16(xReadSlashR),
+	/*557*/ uint16(xArgR64),
+	/*558*/ uint16(xArgRM64),
+	/*559*/ uint16(xMatch),
+	/*560*/ uint16(xSetOp), uint16(ADD),
+	/*562*/ uint16(xReadIb),
+	/*563*/ uint16(xArgAL),
+	/*564*/ uint16(xArgImm8u),
+	/*565*/ uint16(xMatch),
+	/*566*/ uint16(xCondIs64), 569, 585,
+	/*569*/ uint16(xCondDataSize), 573, 579, 0,
+	/*573*/ uint16(xSetOp), uint16(ADD),
+	/*575*/ uint16(xReadIw),
+	/*576*/ uint16(xArgAX),
+	/*577*/ uint16(xArgImm16),
+	/*578*/ uint16(xMatch),
+	/*579*/ uint16(xSetOp), uint16(ADD),
+	/*581*/ uint16(xReadId),
+	/*582*/ uint16(xArgEAX),
+	/*583*/ uint16(xArgImm32),
+	/*584*/ uint16(xMatch),
+	/*585*/ uint16(xCondDataSize), 573, 579, 589,
+	/*589*/ uint16(xSetOp), uint16(ADD),
+	/*591*/ uint16(xReadId),
+	/*592*/ uint16(xArgRAX),
+	/*593*/ uint16(xArgImm32),
+	/*594*/ uint16(xMatch),
+	/*595*/ uint16(xCondIs64), 598, 0,
+	/*598*/ uint16(xSetOp), uint16(PUSH),
+	/*600*/ uint16(xArgES),
+	/*601*/ uint16(xMatch),
+	/*602*/ uint16(xCondIs64), 605, 0,
+	/*605*/ uint16(xSetOp), uint16(POP),
+	/*607*/ uint16(xArgES),
+	/*608*/ uint16(xMatch),
+	/*609*/ uint16(xSetOp), uint16(OR),
+	/*611*/ uint16(xReadSlashR),
+	/*612*/ uint16(xArgRM8),
+	/*613*/ uint16(xArgR8),
+	/*614*/ uint16(xMatch),
+	/*615*/ uint16(xCondIs64), 618, 634,
+	/*618*/ uint16(xCondDataSize), 622, 628, 0,
+	/*622*/ uint16(xSetOp), uint16(OR),
+	/*624*/ uint16(xReadSlashR),
+	/*625*/ uint16(xArgRM16),
+	/*626*/ uint16(xArgR16),
+	/*627*/ uint16(xMatch),
+	/*628*/ uint16(xSetOp), uint16(OR),
+	/*630*/ uint16(xReadSlashR),
+	/*631*/ uint16(xArgRM32),
+	/*632*/ uint16(xArgR32),
+	/*633*/ uint16(xMatch),
+	/*634*/ uint16(xCondDataSize), 622, 628, 638,
+	/*638*/ uint16(xSetOp), uint16(OR),
+	/*640*/ uint16(xReadSlashR),
+	/*641*/ uint16(xArgRM64),
+	/*642*/ uint16(xArgR64),
+	/*643*/ uint16(xMatch),
+	/*644*/ uint16(xSetOp), uint16(OR),
+	/*646*/ uint16(xReadSlashR),
+	/*647*/ uint16(xArgR8),
+	/*648*/ uint16(xArgRM8),
+	/*649*/ uint16(xMatch),
+	/*650*/ uint16(xCondIs64), 653, 669,
+	/*653*/ uint16(xCondDataSize), 657, 663, 0,
+	/*657*/ uint16(xSetOp), uint16(OR),
+	/*659*/ uint16(xReadSlashR),
+	/*660*/ uint16(xArgR16),
+	/*661*/ uint16(xArgRM16),
+	/*662*/ uint16(xMatch),
+	/*663*/ uint16(xSetOp), uint16(OR),
+	/*665*/ uint16(xReadSlashR),
+	/*666*/ uint16(xArgR32),
+	/*667*/ uint16(xArgRM32),
+	/*668*/ uint16(xMatch),
+	/*669*/ uint16(xCondDataSize), 657, 663, 673,
+	/*673*/ uint16(xSetOp), uint16(OR),
+	/*675*/ uint16(xReadSlashR),
+	/*676*/ uint16(xArgR64),
+	/*677*/ uint16(xArgRM64),
+	/*678*/ uint16(xMatch),
+	/*679*/ uint16(xSetOp), uint16(OR),
+	/*681*/ uint16(xReadIb),
+	/*682*/ uint16(xArgAL),
+	/*683*/ uint16(xArgImm8u),
+	/*684*/ uint16(xMatch),
+	/*685*/ uint16(xCondIs64), 688, 704,
+	/*688*/ uint16(xCondDataSize), 692, 698, 0,
+	/*692*/ uint16(xSetOp), uint16(OR),
+	/*694*/ uint16(xReadIw),
+	/*695*/ uint16(xArgAX),
+	/*696*/ uint16(xArgImm16),
+	/*697*/ uint16(xMatch),
+	/*698*/ uint16(xSetOp), uint16(OR),
+	/*700*/ uint16(xReadId),
+	/*701*/ uint16(xArgEAX),
+	/*702*/ uint16(xArgImm32),
+	/*703*/ uint16(xMatch),
+	/*704*/ uint16(xCondDataSize), 692, 698, 708,
+	/*708*/ uint16(xSetOp), uint16(OR),
+	/*710*/ uint16(xReadId),
+	/*711*/ uint16(xArgRAX),
+	/*712*/ uint16(xArgImm32),
+	/*713*/ uint16(xMatch),
+	/*714*/ uint16(xCondIs64), 717, 0,
+	/*717*/ uint16(xSetOp), uint16(PUSH),
+	/*719*/ uint16(xArgCS),
+	/*720*/ uint16(xMatch),
+	/*721*/ uint16(xCondByte), 228,
+	0x00, 1180,
+	0x01, 1237,
+	0x02, 1345,
+	0x03, 1367,
+	0x05, 1389,
+	0x06, 1395,
+	0x07, 1398,
+	0x08, 1404,
+	0x09, 1407,
+	0x0B, 1410,
+	0x0D, 1413,
+	0x10, 1426,
+	0x11, 1460,
+	0x12, 1494,
+	0x13, 1537,
+	0x14, 1555,
+	0x15, 1573,
+	0x16, 1591,
+	0x17, 1626,
+	0x18, 1644,
+	0x1F, 1669,
+	0x20, 1690,
+	0x21, 1705,
+	0x22, 1720,
+	0x23, 1735,
+	0x24, 1750,
+	0x26, 1765,
+	0x28, 1780,
+	0x29, 1798,
+	0x2A, 1816,
+	0x2B, 1903,
+	0x2C, 1937,
+	0x2D, 2024,
+	0x2E, 2111,
+	0x2F, 2129,
+	0x30, 2147,
+	0x31, 2150,
+	0x32, 2153,
+	0x33, 2156,
+	0x34, 2159,
+	0x35, 2162,
+	0x38, 2172,
+	0x3A, 3073,
+	0x40, 3484,
+	0x41, 3513,
+	0x42, 3542,
+	0x43, 3571,
+	0x44, 3600,
+	0x45, 3629,
+	0x46, 3658,
+	0x47, 3687,
+	0x48, 3716,
+	0x49, 3745,
+	0x4A, 3774,
+	0x4B, 3803,
+	0x4C, 3832,
+	0x4D, 3861,
+	0x4E, 3890,
+	0x4F, 3919,
+	0x50, 3948,
+	0x51, 3966,
+	0x52, 4000,
+	0x53, 4018,
+	0x54, 4036,
+	0x55, 4054,
+	0x56, 4072,
+	0x57, 4090,
+	0x58, 4108,
+	0x59, 4142,
+	0x5A, 4176,
+	0x5B, 4210,
+	0x5C, 4236,
+	0x5D, 4270,
+	0x5E, 4304,
+	0x5F, 4338,
+	0x60, 4372,
+	0x61, 4390,
+	0x62, 4408,
+	0x63, 4426,
+	0x64, 4444,
+	0x65, 4462,
+	0x66, 4480,
+	0x67, 4498,
+	0x68, 4516,
+	0x69, 4534,
+	0x6A, 4552,
+	0x6B, 4570,
+	0x6C, 4588,
+	0x6D, 4598,
+	0x6E, 4608,
+	0x6F, 4675,
+	0x70, 4701,
+	0x71, 4743,
+	0x72, 4806,
+	0x73, 4869,
+	0x74, 4934,
+	0x75, 4952,
+	0x76, 4970,
+	0x77, 4988,
+	0x7C, 4991,
+	0x7D, 5009,
+	0x7E, 5027,
+	0x7F, 5104,
+	0x80, 5130,
+	0x81, 5161,
+	0x82, 5192,
+	0x83, 5223,
+	0x84, 5254,
+	0x85, 5285,
+	0x86, 5316,
+	0x87, 5347,
+	0x88, 5378,
+	0x89, 5409,
+	0x8A, 5440,
+	0x8B, 5471,
+	0x8C, 5502,
+	0x8D, 5533,
+	0x8E, 5564,
+	0x8F, 5595,
+	0x90, 5626,
+	0x91, 5631,
+	0x92, 5636,
+	0x93, 5641,
+	0x94, 5646,
+	0x95, 5651,
+	0x96, 5656,
+	0x97, 5661,
+	0x98, 5666,
+	0x99, 5671,
+	0x9A, 5676,
+	0x9B, 5681,
+	0x9C, 5686,
+	0x9D, 5691,
+	0x9E, 5696,
+	0x9F, 5701,
+	0xA0, 5706,
+	0xA1, 5710,
+	0xA2, 5737,
+	0xA3, 5740,
+	0xA4, 5769,
+	0xA5, 5804,
+	0xA8, 5836,
+	0xA9, 5840,
+	0xAA, 5867,
+	0xAB, 5870,
+	0xAC, 5899,
+	0xAD, 5934,
+	0xAE, 5966,
+	0xAF, 6224,
+	0xB0, 6253,
+	0xB1, 6259,
+	0xB2, 6288,
+	0xB3, 6317,
+	0xB4, 6346,
+	0xB5, 6375,
+	0xB6, 6404,
+	0xB7, 6433,
+	0xB8, 6462,
+	0xB9, 6499,
+	0xBA, 6502,
+	0xBB, 6627,
+	0xBC, 6656,
+	0xBD, 6723,
+	0xBE, 6790,
+	0xBF, 6819,
+	0xC0, 6848,
+	0xC1, 6854,
+	0xC2, 6883,
+	0xC3, 6925,
+	0xC4, 6954,
+	0xC5, 6976,
+	0xC6, 6998,
+	0xC7, 7020,
+	0xc8, 7149,
+	0xc9, 7149,
+	0xca, 7149,
+	0xcb, 7149,
+	0xcc, 7149,
+	0xcd, 7149,
+	0xce, 7149,
+	0xcf, 7149,
+	0xD0, 7172,
+	0xD1, 7190,
+	0xD2, 7208,
+	0xD3, 7226,
+	0xD4, 7244,
+	0xD5, 7262,
+	0xD6, 7280,
+	0xD7, 7306,
+	0xD8, 7324,
+	0xD9, 7342,
+	0xDA, 7360,
+	0xDB, 7378,
+	0xDC, 7396,
+	0xDD, 7414,
+	0xDE, 7432,
+	0xDF, 7450,
+	0xE0, 7468,
+	0xE1, 7486,
+	0xE2, 7504,
+	0xE3, 7522,
+	0xE4, 7540,
+	0xE5, 7558,
+	0xE6, 7576,
+	0xE7, 7602,
+	0xE8, 7620,
+	0xE9, 7638,
+	0xEA, 7656,
+	0xEB, 7674,
+	0xEC, 7692,
+	0xED, 7710,
+	0xEE, 7728,
+	0xEF, 7746,
+	0xF0, 7764,
+	0xF1, 7774,
+	0xF2, 7792,
+	0xF3, 7810,
+	0xF4, 7828,
+	0xF5, 7846,
+	0xF6, 7864,
+	0xF7, 7882,
+	0xF8, 7900,
+	0xF9, 7918,
+	0xFA, 7936,
+	0xFB, 7954,
+	0xFC, 7972,
+	0xFD, 7990,
+	0xFE, 8008,
+	uint16(xFail),
+	/*1180*/ uint16(xCondSlashR),
+	1189, // 0
+	1205, // 1
+	1221, // 2
+	1225, // 3
+	1229, // 4
+	1233, // 5
+	0,    // 6
+	0,    // 7
+	/*1189*/ uint16(xCondDataSize), 1193, 1197, 1201,
+	/*1193*/ uint16(xSetOp), uint16(SLDT),
+	/*1195*/ uint16(xArgRM16),
+	/*1196*/ uint16(xMatch),
+	/*1197*/ uint16(xSetOp), uint16(SLDT),
+	/*1199*/ uint16(xArgR32M16),
+	/*1200*/ uint16(xMatch),
+	/*1201*/ uint16(xSetOp), uint16(SLDT),
+	/*1203*/ uint16(xArgR64M16),
+	/*1204*/ uint16(xMatch),
+	/*1205*/ uint16(xCondDataSize), 1209, 1213, 1217,
+	/*1209*/ uint16(xSetOp), uint16(STR),
+	/*1211*/ uint16(xArgRM16),
+	/*1212*/ uint16(xMatch),
+	/*1213*/ uint16(xSetOp), uint16(STR),
+	/*1215*/ uint16(xArgR32M16),
+	/*1216*/ uint16(xMatch),
+	/*1217*/ uint16(xSetOp), uint16(STR),
+	/*1219*/ uint16(xArgR64M16),
+	/*1220*/ uint16(xMatch),
+	/*1221*/ uint16(xSetOp), uint16(LLDT),
+	/*1223*/ uint16(xArgRM16),
+	/*1224*/ uint16(xMatch),
+	/*1225*/ uint16(xSetOp), uint16(LTR),
+	/*1227*/ uint16(xArgRM16),
+	/*1228*/ uint16(xMatch),
+	/*1229*/ uint16(xSetOp), uint16(VERR),
+	/*1231*/ uint16(xArgRM16),
+	/*1232*/ uint16(xMatch),
+	/*1233*/ uint16(xSetOp), uint16(VERW),
+	/*1235*/ uint16(xArgRM16),
+	/*1236*/ uint16(xMatch),
+	/*1237*/ uint16(xCondByte), 8,
+	0xC8, 1318,
+	0xC9, 1321,
+	0xD0, 1324,
+	0xD1, 1327,
+	0xD5, 1330,
+	0xD6, 1333,
+	0xF8, 1336,
+	0xF9, 1342,
+	/*1255*/ uint16(xCondSlashR),
+	1264, // 0
+	1268, // 1
+	1272, // 2
+	1283, // 3
+	1294, // 4
+	0,    // 5
+	1310, // 6
+	1314, // 7
+	/*1264*/ uint16(xSetOp), uint16(SGDT),
+	/*1266*/ uint16(xArgM),
+	/*1267*/ uint16(xMatch),
+	/*1268*/ uint16(xSetOp), uint16(SIDT),
+	/*1270*/ uint16(xArgM),
+	/*1271*/ uint16(xMatch),
+	/*1272*/ uint16(xCondIs64), 1275, 1279,
+	/*1275*/ uint16(xSetOp), uint16(LGDT),
+	/*1277*/ uint16(xArgM16and32),
+	/*1278*/ uint16(xMatch),
+	/*1279*/ uint16(xSetOp), uint16(LGDT),
+	/*1281*/ uint16(xArgM16and64),
+	/*1282*/ uint16(xMatch),
+	/*1283*/ uint16(xCondIs64), 1286, 1290,
+	/*1286*/ uint16(xSetOp), uint16(LIDT),
+	/*1288*/ uint16(xArgM16and32),
+	/*1289*/ uint16(xMatch),
+	/*1290*/ uint16(xSetOp), uint16(LIDT),
+	/*1292*/ uint16(xArgM16and64),
+	/*1293*/ uint16(xMatch),
+	/*1294*/ uint16(xCondDataSize), 1298, 1302, 1306,
+	/*1298*/ uint16(xSetOp), uint16(SMSW),
+	/*1300*/ uint16(xArgRM16),
+	/*1301*/ uint16(xMatch),
+	/*1302*/ uint16(xSetOp), uint16(SMSW),
+	/*1304*/ uint16(xArgR32M16),
+	/*1305*/ uint16(xMatch),
+	/*1306*/ uint16(xSetOp), uint16(SMSW),
+	/*1308*/ uint16(xArgR64M16),
+	/*1309*/ uint16(xMatch),
+	/*1310*/ uint16(xSetOp), uint16(LMSW),
+	/*1312*/ uint16(xArgRM16),
+	/*1313*/ uint16(xMatch),
+	/*1314*/ uint16(xSetOp), uint16(INVLPG),
+	/*1316*/ uint16(xArgM),
+	/*1317*/ uint16(xMatch),
+	/*1318*/ uint16(xSetOp), uint16(MONITOR),
+	/*1320*/ uint16(xMatch),
+	/*1321*/ uint16(xSetOp), uint16(MWAIT),
+	/*1323*/ uint16(xMatch),
+	/*1324*/ uint16(xSetOp), uint16(XGETBV),
+	/*1326*/ uint16(xMatch),
+	/*1327*/ uint16(xSetOp), uint16(XSETBV),
+	/*1329*/ uint16(xMatch),
+	/*1330*/ uint16(xSetOp), uint16(XEND),
+	/*1332*/ uint16(xMatch),
+	/*1333*/ uint16(xSetOp), uint16(XTEST),
+	/*1335*/ uint16(xMatch),
+	/*1336*/ uint16(xCondIs64), 0, 1339,
+	/*1339*/ uint16(xSetOp), uint16(SWAPGS),
+	/*1341*/ uint16(xMatch),
+	/*1342*/ uint16(xSetOp), uint16(RDTSCP),
+	/*1344*/ uint16(xMatch),
+	/*1345*/ uint16(xCondDataSize), 1349, 1355, 1361,
+	/*1349*/ uint16(xSetOp), uint16(LAR),
+	/*1351*/ uint16(xReadSlashR),
+	/*1352*/ uint16(xArgR16),
+	/*1353*/ uint16(xArgRM16),
+	/*1354*/ uint16(xMatch),
+	/*1355*/ uint16(xSetOp), uint16(LAR),
+	/*1357*/ uint16(xReadSlashR),
+	/*1358*/ uint16(xArgR32),
+	/*1359*/ uint16(xArgR32M16),
+	/*1360*/ uint16(xMatch),
+	/*1361*/ uint16(xSetOp), uint16(LAR),
+	/*1363*/ uint16(xReadSlashR),
+	/*1364*/ uint16(xArgR64),
+	/*1365*/ uint16(xArgR64M16),
+	/*1366*/ uint16(xMatch),
+	/*1367*/ uint16(xCondDataSize), 1371, 1377, 1383,
+	/*1371*/ uint16(xSetOp), uint16(LSL),
+	/*1373*/ uint16(xReadSlashR),
+	/*1374*/ uint16(xArgR16),
+	/*1375*/ uint16(xArgRM16),
+	/*1376*/ uint16(xMatch),
+	/*1377*/ uint16(xSetOp), uint16(LSL),
+	/*1379*/ uint16(xReadSlashR),
+	/*1380*/ uint16(xArgR32),
+	/*1381*/ uint16(xArgR32M16),
+	/*1382*/ uint16(xMatch),
+	/*1383*/ uint16(xSetOp), uint16(LSL),
+	/*1385*/ uint16(xReadSlashR),
+	/*1386*/ uint16(xArgR64),
+	/*1387*/ uint16(xArgR32M16),
+	/*1388*/ uint16(xMatch),
+	/*1389*/ uint16(xCondIs64), 0, 1392,
+	/*1392*/ uint16(xSetOp), uint16(SYSCALL),
+	/*1394*/ uint16(xMatch),
+	/*1395*/ uint16(xSetOp), uint16(CLTS),
+	/*1397*/ uint16(xMatch),
+	/*1398*/ uint16(xCondIs64), 0, 1401,
+	/*1401*/ uint16(xSetOp), uint16(SYSRET),
+	/*1403*/ uint16(xMatch),
+	/*1404*/ uint16(xSetOp), uint16(INVD),
+	/*1406*/ uint16(xMatch),
+	/*1407*/ uint16(xSetOp), uint16(WBINVD),
+	/*1409*/ uint16(xMatch),
+	/*1410*/ uint16(xSetOp), uint16(UD2),
+	/*1412*/ uint16(xMatch),
+	/*1413*/ uint16(xCondSlashR),
+	0,    // 0
+	1422, // 1
+	0,    // 2
+	0,    // 3
+	0,    // 4
+	0,    // 5
+	0,    // 6
+	0,    // 7
+	/*1422*/ uint16(xSetOp), uint16(PREFETCHW),
+	/*1424*/ uint16(xArgM8),
+	/*1425*/ uint16(xMatch),
+	/*1426*/ uint16(xCondPrefix), 4,
+	0xF3, 1454,
+	0xF2, 1448,
+	0x66, 1442,
+	0x0, 1436,
+	/*1436*/ uint16(xSetOp), uint16(MOVUPS),
+	/*1438*/ uint16(xReadSlashR),
+	/*1439*/ uint16(xArgXmm1),
+	/*1440*/ uint16(xArgXmm2M128),
+	/*1441*/ uint16(xMatch),
+	/*1442*/ uint16(xSetOp), uint16(MOVUPD),
+	/*1444*/ uint16(xReadSlashR),
+	/*1445*/ uint16(xArgXmm1),
+	/*1446*/ uint16(xArgXmm2M128),
+	/*1447*/ uint16(xMatch),
+	/*1448*/ uint16(xSetOp), uint16(MOVSD_XMM),
+	/*1450*/ uint16(xReadSlashR),
+	/*1451*/ uint16(xArgXmm1),
+	/*1452*/ uint16(xArgXmm2M64),
+	/*1453*/ uint16(xMatch),
+	/*1454*/ uint16(xSetOp), uint16(MOVSS),
+	/*1456*/ uint16(xReadSlashR),
+	/*1457*/ uint16(xArgXmm1),
+	/*1458*/ uint16(xArgXmm2M32),
+	/*1459*/ uint16(xMatch),
+	/*1460*/ uint16(xCondPrefix), 4,
+	0xF3, 1488,
+	0xF2, 1482,
+	0x66, 1476,
+	0x0, 1470,
+	/*1470*/ uint16(xSetOp), uint16(MOVUPS),
+	/*1472*/ uint16(xReadSlashR),
+	/*1473*/ uint16(xArgXmm2M128),
+	/*1474*/ uint16(xArgXmm1),
+	/*1475*/ uint16(xMatch),
+	/*1476*/ uint16(xSetOp), uint16(MOVUPD),
+	/*1478*/ uint16(xReadSlashR),
+	/*1479*/ uint16(xArgXmm2M128),
+	/*1480*/ uint16(xArgXmm),
+	/*1481*/ uint16(xMatch),
+	/*1482*/ uint16(xSetOp), uint16(MOVSD_XMM),
+	/*1484*/ uint16(xReadSlashR),
+	/*1485*/ uint16(xArgXmm2M64),
+	/*1486*/ uint16(xArgXmm1),
+	/*1487*/ uint16(xMatch),
+	/*1488*/ uint16(xSetOp), uint16(MOVSS),
+	/*1490*/ uint16(xReadSlashR),
+	/*1491*/ uint16(xArgXmm2M32),
+	/*1492*/ uint16(xArgXmm),
+	/*1493*/ uint16(xMatch),
+	/*1494*/ uint16(xCondPrefix), 4,
+	0xF3, 1531,
+	0xF2, 1525,
+	0x66, 1519,
+	0x0, 1504,
+	/*1504*/ uint16(xCondIsMem), 1507, 1513,
+	/*1507*/ uint16(xSetOp), uint16(MOVHLPS),
+	/*1509*/ uint16(xReadSlashR),
+	/*1510*/ uint16(xArgXmm1),
+	/*1511*/ uint16(xArgXmm2),
+	/*1512*/ uint16(xMatch),
+	/*1513*/ uint16(xSetOp), uint16(MOVLPS),
+	/*1515*/ uint16(xReadSlashR),
+	/*1516*/ uint16(xArgXmm),
+	/*1517*/ uint16(xArgM64),
+	/*1518*/ uint16(xMatch),
+	/*1519*/ uint16(xSetOp), uint16(MOVLPD),
+	/*1521*/ uint16(xReadSlashR),
+	/*1522*/ uint16(xArgXmm),
+	/*1523*/ uint16(xArgXmm2M64),
+	/*1524*/ uint16(xMatch),
+	/*1525*/ uint16(xSetOp), uint16(MOVDDUP),
+	/*1527*/ uint16(xReadSlashR),
+	/*1528*/ uint16(xArgXmm1),
+	/*1529*/ uint16(xArgXmm2M64),
+	/*1530*/ uint16(xMatch),
+	/*1531*/ uint16(xSetOp), uint16(MOVSLDUP),
+	/*1533*/ uint16(xReadSlashR),
+	/*1534*/ uint16(xArgXmm1),
+	/*1535*/ uint16(xArgXmm2M128),
+	/*1536*/ uint16(xMatch),
+	/*1537*/ uint16(xCondPrefix), 2,
+	0x66, 1549,
+	0x0, 1543,
+	/*1543*/ uint16(xSetOp), uint16(MOVLPS),
+	/*1545*/ uint16(xReadSlashR),
+	/*1546*/ uint16(xArgM64),
+	/*1547*/ uint16(xArgXmm),
+	/*1548*/ uint16(xMatch),
+	/*1549*/ uint16(xSetOp), uint16(MOVLPD),
+	/*1551*/ uint16(xReadSlashR),
+	/*1552*/ uint16(xArgXmm2M64),
+	/*1553*/ uint16(xArgXmm),
+	/*1554*/ uint16(xMatch),
+	/*1555*/ uint16(xCondPrefix), 2,
+	0x66, 1567,
+	0x0, 1561,
+	/*1561*/ uint16(xSetOp), uint16(UNPCKLPS),
+	/*1563*/ uint16(xReadSlashR),
+	/*1564*/ uint16(xArgXmm1),
+	/*1565*/ uint16(xArgXmm2M128),
+	/*1566*/ uint16(xMatch),
+	/*1567*/ uint16(xSetOp), uint16(UNPCKLPD),
+	/*1569*/ uint16(xReadSlashR),
+	/*1570*/ uint16(xArgXmm1),
+	/*1571*/ uint16(xArgXmm2M128),
+	/*1572*/ uint16(xMatch),
+	/*1573*/ uint16(xCondPrefix), 2,
+	0x66, 1585,
+	0x0, 1579,
+	/*1579*/ uint16(xSetOp), uint16(UNPCKHPS),
+	/*1581*/ uint16(xReadSlashR),
+	/*1582*/ uint16(xArgXmm1),
+	/*1583*/ uint16(xArgXmm2M128),
+	/*1584*/ uint16(xMatch),
+	/*1585*/ uint16(xSetOp), uint16(UNPCKHPD),
+	/*1587*/ uint16(xReadSlashR),
+	/*1588*/ uint16(xArgXmm1),
+	/*1589*/ uint16(xArgXmm2M128),
+	/*1590*/ uint16(xMatch),
+	/*1591*/ uint16(xCondPrefix), 3,
+	0xF3, 1620,
+	0x66, 1614,
+	0x0, 1599,
+	/*1599*/ uint16(xCondIsMem), 1602, 1608,
+	/*1602*/ uint16(xSetOp), uint16(MOVLHPS),
+	/*1604*/ uint16(xReadSlashR),
+	/*1605*/ uint16(xArgXmm1),
+	/*1606*/ uint16(xArgXmm2),
+	/*1607*/ uint16(xMatch),
+	/*1608*/ uint16(xSetOp), uint16(MOVHPS),
+	/*1610*/ uint16(xReadSlashR),
+	/*1611*/ uint16(xArgXmm),
+	/*1612*/ uint16(xArgM64),
+	/*1613*/ uint16(xMatch),
+	/*1614*/ uint16(xSetOp), uint16(MOVHPD),
+	/*1616*/ uint16(xReadSlashR),
+	/*1617*/ uint16(xArgXmm),
+	/*1618*/ uint16(xArgXmm2M64),
+	/*1619*/ uint16(xMatch),
+	/*1620*/ uint16(xSetOp), uint16(MOVSHDUP),
+	/*1622*/ uint16(xReadSlashR),
+	/*1623*/ uint16(xArgXmm1),
+	/*1624*/ uint16(xArgXmm2M128),
+	/*1625*/ uint16(xMatch),
+	/*1626*/ uint16(xCondPrefix), 2,
+	0x66, 1638,
+	0x0, 1632,
+	/*1632*/ uint16(xSetOp), uint16(MOVHPS),
+	/*1634*/ uint16(xReadSlashR),
+	/*1635*/ uint16(xArgM64),
+	/*1636*/ uint16(xArgXmm),
+	/*1637*/ uint16(xMatch),
+	/*1638*/ uint16(xSetOp), uint16(MOVHPD),
+	/*1640*/ uint16(xReadSlashR),
+	/*1641*/ uint16(xArgXmm2M64),
+	/*1642*/ uint16(xArgXmm),
+	/*1643*/ uint16(xMatch),
+	/*1644*/ uint16(xCondSlashR),
+	1653, // 0
+	1657, // 1
+	1661, // 2
+	1665, // 3
+	0,    // 4
+	0,    // 5
+	0,    // 6
+	0,    // 7
+	/*1653*/ uint16(xSetOp), uint16(PREFETCHNTA),
+	/*1655*/ uint16(xArgM8),
+	/*1656*/ uint16(xMatch),
+	/*1657*/ uint16(xSetOp), uint16(PREFETCHT0),
+	/*1659*/ uint16(xArgM8),
+	/*1660*/ uint16(xMatch),
+	/*1661*/ uint16(xSetOp), uint16(PREFETCHT1),
+	/*1663*/ uint16(xArgM8),
+	/*1664*/ uint16(xMatch),
+	/*1665*/ uint16(xSetOp), uint16(PREFETCHT2),
+	/*1667*/ uint16(xArgM8),
+	/*1668*/ uint16(xMatch),
+	/*1669*/ uint16(xCondSlashR),
+	1678, // 0
+	0,    // 1
+	0,    // 2
+	0,    // 3
+	0,    // 4
+	0,    // 5
+	0,    // 6
+	0,    // 7
+	/*1678*/ uint16(xCondDataSize), 1682, 1686, 0,
+	/*1682*/ uint16(xSetOp), uint16(NOP),
+	/*1684*/ uint16(xArgRM16),
+	/*1685*/ uint16(xMatch),
+	/*1686*/ uint16(xSetOp), uint16(NOP),
+	/*1688*/ uint16(xArgRM32),
+	/*1689*/ uint16(xMatch),
+	/*1690*/ uint16(xCondIs64), 1693, 1699,
+	/*1693*/ uint16(xSetOp), uint16(MOV),
+	/*1695*/ uint16(xReadSlashR),
+	/*1696*/ uint16(xArgRmf32),
+	/*1697*/ uint16(xArgCR0dashCR7),
+	/*1698*/ uint16(xMatch),
+	/*1699*/ uint16(xSetOp), uint16(MOV),
+	/*1701*/ uint16(xReadSlashR),
+	/*1702*/ uint16(xArgRmf64),
+	/*1703*/ uint16(xArgCR0dashCR7),
+	/*1704*/ uint16(xMatch),
+	/*1705*/ uint16(xCondIs64), 1708, 1714,
+	/*1708*/ uint16(xSetOp), uint16(MOV),
+	/*1710*/ uint16(xReadSlashR),
+	/*1711*/ uint16(xArgRmf32),
+	/*1712*/ uint16(xArgDR0dashDR7),
+	/*1713*/ uint16(xMatch),
+	/*1714*/ uint16(xSetOp), uint16(MOV),
+	/*1716*/ uint16(xReadSlashR),
+	/*1717*/ uint16(xArgRmf64),
+	/*1718*/ uint16(xArgDR0dashDR7),
+	/*1719*/ uint16(xMatch),
+	/*1720*/ uint16(xCondIs64), 1723, 1729,
+	/*1723*/ uint16(xSetOp), uint16(MOV),
+	/*1725*/ uint16(xReadSlashR),
+	/*1726*/ uint16(xArgCR0dashCR7),
+	/*1727*/ uint16(xArgRmf32),
+	/*1728*/ uint16(xMatch),
+	/*1729*/ uint16(xSetOp), uint16(MOV),
+	/*1731*/ uint16(xReadSlashR),
+	/*1732*/ uint16(xArgCR0dashCR7),
+	/*1733*/ uint16(xArgRmf64),
+	/*1734*/ uint16(xMatch),
+	/*1735*/ uint16(xCondIs64), 1738, 1744,
+	/*1738*/ uint16(xSetOp), uint16(MOV),
+	/*1740*/ uint16(xReadSlashR),
+	/*1741*/ uint16(xArgDR0dashDR7),
+	/*1742*/ uint16(xArgRmf32),
+	/*1743*/ uint16(xMatch),
+	/*1744*/ uint16(xSetOp), uint16(MOV),
+	/*1746*/ uint16(xReadSlashR),
+	/*1747*/ uint16(xArgDR0dashDR7),
+	/*1748*/ uint16(xArgRmf64),
+	/*1749*/ uint16(xMatch),
+	/*1750*/ uint16(xCondIs64), 1753, 1759,
+	/*1753*/ uint16(xSetOp), uint16(MOV),
+	/*1755*/ uint16(xReadSlashR),
+	/*1756*/ uint16(xArgRmf32),
+	/*1757*/ uint16(xArgTR0dashTR7),
+	/*1758*/ uint16(xMatch),
+	/*1759*/ uint16(xSetOp), uint16(MOV),
+	/*1761*/ uint16(xReadSlashR),
+	/*1762*/ uint16(xArgRmf64),
+	/*1763*/ uint16(xArgTR0dashTR7),
+	/*1764*/ uint16(xMatch),
+	/*1765*/ uint16(xCondIs64), 1768, 1774,
+	/*1768*/ uint16(xSetOp), uint16(MOV),
+	/*1770*/ uint16(xReadSlashR),
+	/*1771*/ uint16(xArgTR0dashTR7),
+	/*1772*/ uint16(xArgRmf32),
+	/*1773*/ uint16(xMatch),
+	/*1774*/ uint16(xSetOp), uint16(MOV),
+	/*1776*/ uint16(xReadSlashR),
+	/*1777*/ uint16(xArgTR0dashTR7),
+	/*1778*/ uint16(xArgRmf64),
+	/*1779*/ uint16(xMatch),
+	/*1780*/ uint16(xCondPrefix), 2,
+	0x66, 1792,
+	0x0, 1786,
+	/*1786*/ uint16(xSetOp), uint16(MOVAPS),
+	/*1788*/ uint16(xReadSlashR),
+	/*1789*/ uint16(xArgXmm1),
+	/*1790*/ uint16(xArgXmm2M128),
+	/*1791*/ uint16(xMatch),
+	/*1792*/ uint16(xSetOp), uint16(MOVAPD),
+	/*1794*/ uint16(xReadSlashR),
+	/*1795*/ uint16(xArgXmm1),
+	/*1796*/ uint16(xArgXmm2M128),
+	/*1797*/ uint16(xMatch),
+	/*1798*/ uint16(xCondPrefix), 2,
+	0x66, 1810,
+	0x0, 1804,
+	/*1804*/ uint16(xSetOp), uint16(MOVAPS),
+	/*1806*/ uint16(xReadSlashR),
+	/*1807*/ uint16(xArgXmm2M128),
+	/*1808*/ uint16(xArgXmm1),
+	/*1809*/ uint16(xMatch),
+	/*1810*/ uint16(xSetOp), uint16(MOVAPD),
+	/*1812*/ uint16(xReadSlashR),
+	/*1813*/ uint16(xArgXmm2M128),
+	/*1814*/ uint16(xArgXmm1),
+	/*1815*/ uint16(xMatch),
+	/*1816*/ uint16(xCondIs64), 1819, 1873,
+	/*1819*/ uint16(xCondPrefix), 4,
+	0xF3, 1857,
+	0xF2, 1841,
+	0x66, 1835,
+	0x0, 1829,
+	/*1829*/ uint16(xSetOp), uint16(CVTPI2PS),
+	/*1831*/ uint16(xReadSlashR),
+	/*1832*/ uint16(xArgXmm),
+	/*1833*/ uint16(xArgMmM64),
+	/*1834*/ uint16(xMatch),
+	/*1835*/ uint16(xSetOp), uint16(CVTPI2PD),
+	/*1837*/ uint16(xReadSlashR),
+	/*1838*/ uint16(xArgXmm),
+	/*1839*/ uint16(xArgMmM64),
+	/*1840*/ uint16(xMatch),
+	/*1841*/ uint16(xCondDataSize), 1845, 1851, 0,
+	/*1845*/ uint16(xSetOp), uint16(CVTSI2SD),
+	/*1847*/ uint16(xReadSlashR),
+	/*1848*/ uint16(xArgXmm),
+	/*1849*/ uint16(xArgRM32),
+	/*1850*/ uint16(xMatch),
+	/*1851*/ uint16(xSetOp), uint16(CVTSI2SD),
+	/*1853*/ uint16(xReadSlashR),
+	/*1854*/ uint16(xArgXmm),
+	/*1855*/ uint16(xArgRM32),
+	/*1856*/ uint16(xMatch),
+	/*1857*/ uint16(xCondDataSize), 1861, 1867, 0,
+	/*1861*/ uint16(xSetOp), uint16(CVTSI2SS),
+	/*1863*/ uint16(xReadSlashR),
+	/*1864*/ uint16(xArgXmm),
+	/*1865*/ uint16(xArgRM32),
+	/*1866*/ uint16(xMatch),
+	/*1867*/ uint16(xSetOp), uint16(CVTSI2SS),
+	/*1869*/ uint16(xReadSlashR),
+	/*1870*/ uint16(xArgXmm),
+	/*1871*/ uint16(xArgRM32),
+	/*1872*/ uint16(xMatch),
+	/*1873*/ uint16(xCondPrefix), 4,
+	0xF3, 1893,
+	0xF2, 1883,
+	0x66, 1835,
+	0x0, 1829,
+	/*1883*/ uint16(xCondDataSize), 1845, 1851, 1887,
+	/*1887*/ uint16(xSetOp), uint16(CVTSI2SD),
+	/*1889*/ uint16(xReadSlashR),
+	/*1890*/ uint16(xArgXmm),
+	/*1891*/ uint16(xArgRM64),
+	/*1892*/ uint16(xMatch),
+	/*1893*/ uint16(xCondDataSize), 1861, 1867, 1897,
+	/*1897*/ uint16(xSetOp), uint16(CVTSI2SS),
+	/*1899*/ uint16(xReadSlashR),
+	/*1900*/ uint16(xArgXmm),
+	/*1901*/ uint16(xArgRM64),
+	/*1902*/ uint16(xMatch),
+	/*1903*/ uint16(xCondPrefix), 4,
+	0xF3, 1931,
+	0xF2, 1925,
+	0x66, 1919,
+	0x0, 1913,
+	/*1913*/ uint16(xSetOp), uint16(MOVNTPS),
+	/*1915*/ uint16(xReadSlashR),
+	/*1916*/ uint16(xArgM128),
+	/*1917*/ uint16(xArgXmm),
+	/*1918*/ uint16(xMatch),
+	/*1919*/ uint16(xSetOp), uint16(MOVNTPD),
+	/*1921*/ uint16(xReadSlashR),
+	/*1922*/ uint16(xArgM128),
+	/*1923*/ uint16(xArgXmm),
+	/*1924*/ uint16(xMatch),
+	/*1925*/ uint16(xSetOp), uint16(MOVNTSD),
+	/*1927*/ uint16(xReadSlashR),
+	/*1928*/ uint16(xArgM64),
+	/*1929*/ uint16(xArgXmm),
+	/*1930*/ uint16(xMatch),
+	/*1931*/ uint16(xSetOp), uint16(MOVNTSS),
+	/*1933*/ uint16(xReadSlashR),
+	/*1934*/ uint16(xArgM32),
+	/*1935*/ uint16(xArgXmm),
+	/*1936*/ uint16(xMatch),
+	/*1937*/ uint16(xCondIs64), 1940, 1994,
+	/*1940*/ uint16(xCondPrefix), 4,
+	0xF3, 1978,
+	0xF2, 1962,
+	0x66, 1956,
+	0x0, 1950,
+	/*1950*/ uint16(xSetOp), uint16(CVTTPS2PI),
+	/*1952*/ uint16(xReadSlashR),
+	/*1953*/ uint16(xArgMm),
+	/*1954*/ uint16(xArgXmmM64),
+	/*1955*/ uint16(xMatch),
+	/*1956*/ uint16(xSetOp), uint16(CVTTPD2PI),
+	/*1958*/ uint16(xReadSlashR),
+	/*1959*/ uint16(xArgMm),
+	/*1960*/ uint16(xArgXmmM128),
+	/*1961*/ uint16(xMatch),
+	/*1962*/ uint16(xCondDataSize), 1966, 1972, 0,
+	/*1966*/ uint16(xSetOp), uint16(CVTTSD2SI),
+	/*1968*/ uint16(xReadSlashR),
+	/*1969*/ uint16(xArgR32),
+	/*1970*/ uint16(xArgXmmM64),
+	/*1971*/ uint16(xMatch),
+	/*1972*/ uint16(xSetOp), uint16(CVTTSD2SI),
+	/*1974*/ uint16(xReadSlashR),
+	/*1975*/ uint16(xArgR32),
+	/*1976*/ uint16(xArgXmmM64),
+	/*1977*/ uint16(xMatch),
+	/*1978*/ uint16(xCondDataSize), 1982, 1988, 0,
+	/*1982*/ uint16(xSetOp), uint16(CVTTSS2SI),
+	/*1984*/ uint16(xReadSlashR),
+	/*1985*/ uint16(xArgR32),
+	/*1986*/ uint16(xArgXmmM32),
+	/*1987*/ uint16(xMatch),
+	/*1988*/ uint16(xSetOp), uint16(CVTTSS2SI),
+	/*1990*/ uint16(xReadSlashR),
+	/*1991*/ uint16(xArgR32),
+	/*1992*/ uint16(xArgXmmM32),
+	/*1993*/ uint16(xMatch),
+	/*1994*/ uint16(xCondPrefix), 4,
+	0xF3, 2014,
+	0xF2, 2004,
+	0x66, 1956,
+	0x0, 1950,
+	/*2004*/ uint16(xCondDataSize), 1966, 1972, 2008,
+	/*2008*/ uint16(xSetOp), uint16(CVTTSD2SI),
+	/*2010*/ uint16(xReadSlashR),
+	/*2011*/ uint16(xArgR64),
+	/*2012*/ uint16(xArgXmmM64),
+	/*2013*/ uint16(xMatch),
+	/*2014*/ uint16(xCondDataSize), 1982, 1988, 2018,
+	/*2018*/ uint16(xSetOp), uint16(CVTTSS2SI),
+	/*2020*/ uint16(xReadSlashR),
+	/*2021*/ uint16(xArgR64),
+	/*2022*/ uint16(xArgXmmM32),
+	/*2023*/ uint16(xMatch),
+	/*2024*/ uint16(xCondIs64), 2027, 2081,
+	/*2027*/ uint16(xCondPrefix), 4,
+	0xF3, 2065,
+	0xF2, 2049,
+	0x66, 2043,
+	0x0, 2037,
+	/*2037*/ uint16(xSetOp), uint16(CVTPS2PI),
+	/*2039*/ uint16(xReadSlashR),
+	/*2040*/ uint16(xArgMm),
+	/*2041*/ uint16(xArgXmmM64),
+	/*2042*/ uint16(xMatch),
+	/*2043*/ uint16(xSetOp), uint16(CVTPD2PI),
+	/*2045*/ uint16(xReadSlashR),
+	/*2046*/ uint16(xArgMm),
+	/*2047*/ uint16(xArgXmmM128),
+	/*2048*/ uint16(xMatch),
+	/*2049*/ uint16(xCondDataSize), 2053, 2059, 0,
+	/*2053*/ uint16(xSetOp), uint16(CVTSD2SI),
+	/*2055*/ uint16(xReadSlashR),
+	/*2056*/ uint16(xArgR32),
+	/*2057*/ uint16(xArgXmmM64),
+	/*2058*/ uint16(xMatch),
+	/*2059*/ uint16(xSetOp), uint16(CVTSD2SI),
+	/*2061*/ uint16(xReadSlashR),
+	/*2062*/ uint16(xArgR32),
+	/*2063*/ uint16(xArgXmmM64),
+	/*2064*/ uint16(xMatch),
+	/*2065*/ uint16(xCondDataSize), 2069, 2075, 0,
+	/*2069*/ uint16(xSetOp), uint16(CVTSS2SI),
+	/*2071*/ uint16(xReadSlashR),
+	/*2072*/ uint16(xArgR32),
+	/*2073*/ uint16(xArgXmmM32),
+	/*2074*/ uint16(xMatch),
+	/*2075*/ uint16(xSetOp), uint16(CVTSS2SI),
+	/*2077*/ uint16(xReadSlashR),
+	/*2078*/ uint16(xArgR32),
+	/*2079*/ uint16(xArgXmmM32),
+	/*2080*/ uint16(xMatch),
+	/*2081*/ uint16(xCondPrefix), 4,
+	0xF3, 2101,
+	0xF2, 2091,
+	0x66, 2043,
+	0x0, 2037,
+	/*2091*/ uint16(xCondDataSize), 2053, 2059, 2095,
+	/*2095*/ uint16(xSetOp), uint16(CVTSD2SI),
+	/*2097*/ uint16(xReadSlashR),
+	/*2098*/ uint16(xArgR64),
+	/*2099*/ uint16(xArgXmmM64),
+	/*2100*/ uint16(xMatch),
+	/*2101*/ uint16(xCondDataSize), 2069, 2075, 2105,
+	/*2105*/ uint16(xSetOp), uint16(CVTSS2SI),
+	/*2107*/ uint16(xReadSlashR),
+	/*2108*/ uint16(xArgR64),
+	/*2109*/ uint16(xArgXmmM32),
+	/*2110*/ uint16(xMatch),
+	/*2111*/ uint16(xCondPrefix), 2,
+	0x66, 2123,
+	0x0, 2117,
+	/*2117*/ uint16(xSetOp), uint16(UCOMISS),
+	/*2119*/ uint16(xReadSlashR),
+	/*2120*/ uint16(xArgXmm1),
+	/*2121*/ uint16(xArgXmm2M32),
+	/*2122*/ uint16(xMatch),
+	/*2123*/ uint16(xSetOp), uint16(UCOMISD),
+	/*2125*/ uint16(xReadSlashR),
+	/*2126*/ uint16(xArgXmm1),
+	/*2127*/ uint16(xArgXmm2M64),
+	/*2128*/ uint16(xMatch),
+	/*2129*/ uint16(xCondPrefix), 2,
+	0x66, 2141,
+	0x0, 2135,
+	/*2135*/ uint16(xSetOp), uint16(COMISS),
+	/*2137*/ uint16(xReadSlashR),
+	/*2138*/ uint16(xArgXmm1),
+	/*2139*/ uint16(xArgXmm2M32),
+	/*2140*/ uint16(xMatch),
+	/*2141*/ uint16(xSetOp), uint16(COMISD),
+	/*2143*/ uint16(xReadSlashR),
+	/*2144*/ uint16(xArgXmm1),
+	/*2145*/ uint16(xArgXmm2M64),
+	/*2146*/ uint16(xMatch),
+	/*2147*/ uint16(xSetOp), uint16(WRMSR),
+	/*2149*/ uint16(xMatch),
+	/*2150*/ uint16(xSetOp), uint16(RDTSC),
+	/*2152*/ uint16(xMatch),
+	/*2153*/ uint16(xSetOp), uint16(RDMSR),
+	/*2155*/ uint16(xMatch),
+	/*2156*/ uint16(xSetOp), uint16(RDPMC),
+	/*2158*/ uint16(xMatch),
+	/*2159*/ uint16(xSetOp), uint16(SYSENTER),
+	/*2161*/ uint16(xMatch),
+	/*2162*/ uint16(xCondDataSize), 2166, 2166, 2169,
+	/*2166*/ uint16(xSetOp), uint16(SYSEXIT),
+	/*2168*/ uint16(xMatch),
+	/*2169*/ uint16(xSetOp), uint16(SYSEXIT),
+	/*2171*/ uint16(xMatch),
+	/*2172*/ uint16(xCondByte), 54,
+	0x00, 2283,
+	0x01, 2301,
+	0x02, 2319,
+	0x03, 2337,
+	0x04, 2355,
+	0x05, 2373,
+	0x06, 2391,
+	0x07, 2409,
+	0x08, 2427,
+	0x09, 2445,
+	0x0A, 2463,
+	0x0B, 2481,
+	0x10, 2499,
+	0x14, 2510,
+	0x15, 2521,
+	0x17, 2532,
+	0x1C, 2542,
+	0x1D, 2560,
+	0x1E, 2578,
+	0x20, 2596,
+	0x21, 2606,
+	0x22, 2616,
+	0x23, 2626,
+	0x24, 2636,
+	0x25, 2646,
+	0x28, 2656,
+	0x29, 2666,
+	0x2A, 2676,
+	0x2B, 2686,
+	0x30, 2696,
+	0x31, 2706,
+	0x32, 2716,
+	0x33, 2726,
+	0x34, 2736,
+	0x35, 2746,
+	0x37, 2756,
+	0x38, 2766,
+	0x39, 2776,
+	0x3A, 2786,
+	0x3B, 2796,
+	0x3C, 2806,
+	0x3D, 2816,
+	0x3E, 2826,
+	0x3F, 2836,
+	0x40, 2846,
+	0x41, 2856,
+	0x82, 2866,
+	0xDB, 2889,
+	0xDC, 2899,
+	0xDD, 2909,
+	0xDE, 2919,
+	0xDF, 2929,
+	0xF0, 2939,
+	0xF1, 3006,
+	uint16(xFail),
+	/*2283*/ uint16(xCondPrefix), 2,
+	0x66, 2295,
+	0x0, 2289,
+	/*2289*/ uint16(xSetOp), uint16(PSHUFB),
+	/*2291*/ uint16(xReadSlashR),
+	/*2292*/ uint16(xArgMm1),
+	/*2293*/ uint16(xArgMm2M64),
+	/*2294*/ uint16(xMatch),
+	/*2295*/ uint16(xSetOp), uint16(PSHUFB),
+	/*2297*/ uint16(xReadSlashR),
+	/*2298*/ uint16(xArgXmm1),
+	/*2299*/ uint16(xArgXmm2M128),
+	/*2300*/ uint16(xMatch),
+	/*2301*/ uint16(xCondPrefix), 2,
+	0x66, 2313,
+	0x0, 2307,
+	/*2307*/ uint16(xSetOp), uint16(PHADDW),
+	/*2309*/ uint16(xReadSlashR),
+	/*2310*/ uint16(xArgMm1),
+	/*2311*/ uint16(xArgMm2M64),
+	/*2312*/ uint16(xMatch),
+	/*2313*/ uint16(xSetOp), uint16(PHADDW),
+	/*2315*/ uint16(xReadSlashR),
+	/*2316*/ uint16(xArgXmm1),
+	/*2317*/ uint16(xArgXmm2M128),
+	/*2318*/ uint16(xMatch),
+	/*2319*/ uint16(xCondPrefix), 2,
+	0x66, 2331,
+	0x0, 2325,
+	/*2325*/ uint16(xSetOp), uint16(PHADDD),
+	/*2327*/ uint16(xReadSlashR),
+	/*2328*/ uint16(xArgMm1),
+	/*2329*/ uint16(xArgMm2M64),
+	/*2330*/ uint16(xMatch),
+	/*2331*/ uint16(xSetOp), uint16(PHADDD),
+	/*2333*/ uint16(xReadSlashR),
+	/*2334*/ uint16(xArgXmm1),
+	/*2335*/ uint16(xArgXmm2M128),
+	/*2336*/ uint16(xMatch),
+	/*2337*/ uint16(xCondPrefix), 2,
+	0x66, 2349,
+	0x0, 2343,
+	/*2343*/ uint16(xSetOp), uint16(PHADDSW),
+	/*2345*/ uint16(xReadSlashR),
+	/*2346*/ uint16(xArgMm1),
+	/*2347*/ uint16(xArgMm2M64),
+	/*2348*/ uint16(xMatch),
+	/*2349*/ uint16(xSetOp), uint16(PHADDSW),
+	/*2351*/ uint16(xReadSlashR),
+	/*2352*/ uint16(xArgXmm1),
+	/*2353*/ uint16(xArgXmm2M128),
+	/*2354*/ uint16(xMatch),
+	/*2355*/ uint16(xCondPrefix), 2,
+	0x66, 2367,
+	0x0, 2361,
+	/*2361*/ uint16(xSetOp), uint16(PMADDUBSW),
+	/*2363*/ uint16(xReadSlashR),
+	/*2364*/ uint16(xArgMm1),
+	/*2365*/ uint16(xArgMm2M64),
+	/*2366*/ uint16(xMatch),
+	/*2367*/ uint16(xSetOp), uint16(PMADDUBSW),
+	/*2369*/ uint16(xReadSlashR),
+	/*2370*/ uint16(xArgXmm1),
+	/*2371*/ uint16(xArgXmm2M128),
+	/*2372*/ uint16(xMatch),
+	/*2373*/ uint16(xCondPrefix), 2,
+	0x66, 2385,
+	0x0, 2379,
+	/*2379*/ uint16(xSetOp), uint16(PHSUBW),
+	/*2381*/ uint16(xReadSlashR),
+	/*2382*/ uint16(xArgMm1),
+	/*2383*/ uint16(xArgMm2M64),
+	/*2384*/ uint16(xMatch),
+	/*2385*/ uint16(xSetOp), uint16(PHSUBW),
+	/*2387*/ uint16(xReadSlashR),
+	/*2388*/ uint16(xArgXmm1),
+	/*2389*/ uint16(xArgXmm2M128),
+	/*2390*/ uint16(xMatch),
+	/*2391*/ uint16(xCondPrefix), 2,
+	0x66, 2403,
+	0x0, 2397,
+	/*2397*/ uint16(xSetOp), uint16(PHSUBD),
+	/*2399*/ uint16(xReadSlashR),
+	/*2400*/ uint16(xArgMm1),
+	/*2401*/ uint16(xArgMm2M64),
+	/*2402*/ uint16(xMatch),
+	/*2403*/ uint16(xSetOp), uint16(PHSUBD),
+	/*2405*/ uint16(xReadSlashR),
+	/*2406*/ uint16(xArgXmm1),
+	/*2407*/ uint16(xArgXmm2M128),
+	/*2408*/ uint16(xMatch),
+	/*2409*/ uint16(xCondPrefix), 2,
+	0x66, 2421,
+	0x0, 2415,
+	/*2415*/ uint16(xSetOp), uint16(PHSUBSW),
+	/*2417*/ uint16(xReadSlashR),
+	/*2418*/ uint16(xArgMm1),
+	/*2419*/ uint16(xArgMm2M64),
+	/*2420*/ uint16(xMatch),
+	/*2421*/ uint16(xSetOp), uint16(PHSUBSW),
+	/*2423*/ uint16(xReadSlashR),
+	/*2424*/ uint16(xArgXmm1),
+	/*2425*/ uint16(xArgXmm2M128),
+	/*2426*/ uint16(xMatch),
+	/*2427*/ uint16(xCondPrefix), 2,
+	0x66, 2439,
+	0x0, 2433,
+	/*2433*/ uint16(xSetOp), uint16(PSIGNB),
+	/*2435*/ uint16(xReadSlashR),
+	/*2436*/ uint16(xArgMm1),
+	/*2437*/ uint16(xArgMm2M64),
+	/*2438*/ uint16(xMatch),
+	/*2439*/ uint16(xSetOp), uint16(PSIGNB),
+	/*2441*/ uint16(xReadSlashR),
+	/*2442*/ uint16(xArgXmm1),
+	/*2443*/ uint16(xArgXmm2M128),
+	/*2444*/ uint16(xMatch),
+	/*2445*/ uint16(xCondPrefix), 2,
+	0x66, 2457,
+	0x0, 2451,
+	/*2451*/ uint16(xSetOp), uint16(PSIGNW),
+	/*2453*/ uint16(xReadSlashR),
+	/*2454*/ uint16(xArgMm1),
+	/*2455*/ uint16(xArgMm2M64),
+	/*2456*/ uint16(xMatch),
+	/*2457*/ uint16(xSetOp), uint16(PSIGNW),
+	/*2459*/ uint16(xReadSlashR),
+	/*2460*/ uint16(xArgXmm1),
+	/*2461*/ uint16(xArgXmm2M128),
+	/*2462*/ uint16(xMatch),
+	/*2463*/ uint16(xCondPrefix), 2,
+	0x66, 2475,
+	0x0, 2469,
+	/*2469*/ uint16(xSetOp), uint16(PSIGND),
+	/*2471*/ uint16(xReadSlashR),
+	/*2472*/ uint16(xArgMm1),
+	/*2473*/ uint16(xArgMm2M64),
+	/*2474*/ uint16(xMatch),
+	/*2475*/ uint16(xSetOp), uint16(PSIGND),
+	/*2477*/ uint16(xReadSlashR),
+	/*2478*/ uint16(xArgXmm1),
+	/*2479*/ uint16(xArgXmm2M128),
+	/*2480*/ uint16(xMatch),
+	/*2481*/ uint16(xCondPrefix), 2,
+	0x66, 2493,
+	0x0, 2487,
+	/*2487*/ uint16(xSetOp), uint16(PMULHRSW),
+	/*2489*/ uint16(xReadSlashR),
+	/*2490*/ uint16(xArgMm1),
+	/*2491*/ uint16(xArgMm2M64),
+	/*2492*/ uint16(xMatch),
+	/*2493*/ uint16(xSetOp), uint16(PMULHRSW),
+	/*2495*/ uint16(xReadSlashR),
+	/*2496*/ uint16(xArgXmm1),
+	/*2497*/ uint16(xArgXmm2M128),
+	/*2498*/ uint16(xMatch),
+	/*2499*/ uint16(xCondPrefix), 1,
+	0x66, 2503,
+	/*2503*/ uint16(xSetOp), uint16(PBLENDVB),
+	/*2505*/ uint16(xReadSlashR),
+	/*2506*/ uint16(xArgXmm1),
+	/*2507*/ uint16(xArgXmm2M128),
+	/*2508*/ uint16(xArgXMM0),
+	/*2509*/ uint16(xMatch),
+	/*2510*/ uint16(xCondPrefix), 1,
+	0x66, 2514,
+	/*2514*/ uint16(xSetOp), uint16(BLENDVPS),
+	/*2516*/ uint16(xReadSlashR),
+	/*2517*/ uint16(xArgXmm1),
+	/*2518*/ uint16(xArgXmm2M128),
+	/*2519*/ uint16(xArgXMM0),
+	/*2520*/ uint16(xMatch),
+	/*2521*/ uint16(xCondPrefix), 1,
+	0x66, 2525,
+	/*2525*/ uint16(xSetOp), uint16(BLENDVPD),
+	/*2527*/ uint16(xReadSlashR),
+	/*2528*/ uint16(xArgXmm1),
+	/*2529*/ uint16(xArgXmm2M128),
+	/*2530*/ uint16(xArgXMM0),
+	/*2531*/ uint16(xMatch),
+	/*2532*/ uint16(xCondPrefix), 1,
+	0x66, 2536,
+	/*2536*/ uint16(xSetOp), uint16(PTEST),
+	/*2538*/ uint16(xReadSlashR),
+	/*2539*/ uint16(xArgXmm1),
+	/*2540*/ uint16(xArgXmm2M128),
+	/*2541*/ uint16(xMatch),
+	/*2542*/ uint16(xCondPrefix), 2,
+	0x66, 2554,
+	0x0, 2548,
+	/*2548*/ uint16(xSetOp), uint16(PABSB),
+	/*2550*/ uint16(xReadSlashR),
+	/*2551*/ uint16(xArgMm1),
+	/*2552*/ uint16(xArgMm2M64),
+	/*2553*/ uint16(xMatch),
+	/*2554*/ uint16(xSetOp), uint16(PABSB),
+	/*2556*/ uint16(xReadSlashR),
+	/*2557*/ uint16(xArgXmm1),
+	/*2558*/ uint16(xArgXmm2M128),
+	/*2559*/ uint16(xMatch),
+	/*2560*/ uint16(xCondPrefix), 2,
+	0x66, 2572,
+	0x0, 2566,
+	/*2566*/ uint16(xSetOp), uint16(PABSW),
+	/*2568*/ uint16(xReadSlashR),
+	/*2569*/ uint16(xArgMm1),
+	/*2570*/ uint16(xArgMm2M64),
+	/*2571*/ uint16(xMatch),
+	/*2572*/ uint16(xSetOp), uint16(PABSW),
+	/*2574*/ uint16(xReadSlashR),
+	/*2575*/ uint16(xArgXmm1),
+	/*2576*/ uint16(xArgXmm2M128),
+	/*2577*/ uint16(xMatch),
+	/*2578*/ uint16(xCondPrefix), 2,
+	0x66, 2590,
+	0x0, 2584,
+	/*2584*/ uint16(xSetOp), uint16(PABSD),
+	/*2586*/ uint16(xReadSlashR),
+	/*2587*/ uint16(xArgMm1),
+	/*2588*/ uint16(xArgMm2M64),
+	/*2589*/ uint16(xMatch),
+	/*2590*/ uint16(xSetOp), uint16(PABSD),
+	/*2592*/ uint16(xReadSlashR),
+	/*2593*/ uint16(xArgXmm1),
+	/*2594*/ uint16(xArgXmm2M128),
+	/*2595*/ uint16(xMatch),
+	/*2596*/ uint16(xCondPrefix), 1,
+	0x66, 2600,
+	/*2600*/ uint16(xSetOp), uint16(PMOVSXBW),
+	/*2602*/ uint16(xReadSlashR),
+	/*2603*/ uint16(xArgXmm1),
+	/*2604*/ uint16(xArgXmm2M64),
+	/*2605*/ uint16(xMatch),
+	/*2606*/ uint16(xCondPrefix), 1,
+	0x66, 2610,
+	/*2610*/ uint16(xSetOp), uint16(PMOVSXBD),
+	/*2612*/ uint16(xReadSlashR),
+	/*2613*/ uint16(xArgXmm1),
+	/*2614*/ uint16(xArgXmm2M32),
+	/*2615*/ uint16(xMatch),
+	/*2616*/ uint16(xCondPrefix), 1,
+	0x66, 2620,
+	/*2620*/ uint16(xSetOp), uint16(PMOVSXBQ),
+	/*2622*/ uint16(xReadSlashR),
+	/*2623*/ uint16(xArgXmm1),
+	/*2624*/ uint16(xArgXmm2M16),
+	/*2625*/ uint16(xMatch),
+	/*2626*/ uint16(xCondPrefix), 1,
+	0x66, 2630,
+	/*2630*/ uint16(xSetOp), uint16(PMOVSXWD),
+	/*2632*/ uint16(xReadSlashR),
+	/*2633*/ uint16(xArgXmm1),
+	/*2634*/ uint16(xArgXmm2M64),
+	/*2635*/ uint16(xMatch),
+	/*2636*/ uint16(xCondPrefix), 1,
+	0x66, 2640,
+	/*2640*/ uint16(xSetOp), uint16(PMOVSXWQ),
+	/*2642*/ uint16(xReadSlashR),
+	/*2643*/ uint16(xArgXmm1),
+	/*2644*/ uint16(xArgXmm2M32),
+	/*2645*/ uint16(xMatch),
+	/*2646*/ uint16(xCondPrefix), 1,
+	0x66, 2650,
+	/*2650*/ uint16(xSetOp), uint16(PMOVSXDQ),
+	/*2652*/ uint16(xReadSlashR),
+	/*2653*/ uint16(xArgXmm1),
+	/*2654*/ uint16(xArgXmm2M64),
+	/*2655*/ uint16(xMatch),
+	/*2656*/ uint16(xCondPrefix), 1,
+	0x66, 2660,
+	/*2660*/ uint16(xSetOp), uint16(PMULDQ),
+	/*2662*/ uint16(xReadSlashR),
+	/*2663*/ uint16(xArgXmm1),
+	/*2664*/ uint16(xArgXmm2M128),
+	/*2665*/ uint16(xMatch),
+	/*2666*/ uint16(xCondPrefix), 1,
+	0x66, 2670,
+	/*2670*/ uint16(xSetOp), uint16(PCMPEQQ),
+	/*2672*/ uint16(xReadSlashR),
+	/*2673*/ uint16(xArgXmm1),
+	/*2674*/ uint16(xArgXmm2M128),
+	/*2675*/ uint16(xMatch),
+	/*2676*/ uint16(xCondPrefix), 1,
+	0x66, 2680,
+	/*2680*/ uint16(xSetOp), uint16(MOVNTDQA),
+	/*2682*/ uint16(xReadSlashR),
+	/*2683*/ uint16(xArgXmm1),
+	/*2684*/ uint16(xArgM128),
+	/*2685*/ uint16(xMatch),
+	/*2686*/ uint16(xCondPrefix), 1,
+	0x66, 2690,
+	/*2690*/ uint16(xSetOp), uint16(PACKUSDW),
+	/*2692*/ uint16(xReadSlashR),
+	/*2693*/ uint16(xArgXmm1),
+	/*2694*/ uint16(xArgXmm2M128),
+	/*2695*/ uint16(xMatch),
+	/*2696*/ uint16(xCondPrefix), 1,
+	0x66, 2700,
+	/*2700*/ uint16(xSetOp), uint16(PMOVZXBW),
+	/*2702*/ uint16(xReadSlashR),
+	/*2703*/ uint16(xArgXmm1),
+	/*2704*/ uint16(xArgXmm2M64),
+	/*2705*/ uint16(xMatch),
+	/*2706*/ uint16(xCondPrefix), 1,
+	0x66, 2710,
+	/*2710*/ uint16(xSetOp), uint16(PMOVZXBD),
+	/*2712*/ uint16(xReadSlashR),
+	/*2713*/ uint16(xArgXmm1),
+	/*2714*/ uint16(xArgXmm2M32),
+	/*2715*/ uint16(xMatch),
+	/*2716*/ uint16(xCondPrefix), 1,
+	0x66, 2720,
+	/*2720*/ uint16(xSetOp), uint16(PMOVZXBQ),
+	/*2722*/ uint16(xReadSlashR),
+	/*2723*/ uint16(xArgXmm1),
+	/*2724*/ uint16(xArgXmm2M16),
+	/*2725*/ uint16(xMatch),
+	/*2726*/ uint16(xCondPrefix), 1,
+	0x66, 2730,
+	/*2730*/ uint16(xSetOp), uint16(PMOVZXWD),
+	/*2732*/ uint16(xReadSlashR),
+	/*2733*/ uint16(xArgXmm1),
+	/*2734*/ uint16(xArgXmm2M64),
+	/*2735*/ uint16(xMatch),
+	/*2736*/ uint16(xCondPrefix), 1,
+	0x66, 2740,
+	/*2740*/ uint16(xSetOp), uint16(PMOVZXWQ),
+	/*2742*/ uint16(xReadSlashR),
+	/*2743*/ uint16(xArgXmm1),
+	/*2744*/ uint16(xArgXmm2M32),
+	/*2745*/ uint16(xMatch),
+	/*2746*/ uint16(xCondPrefix), 1,
+	0x66, 2750,
+	/*2750*/ uint16(xSetOp), uint16(PMOVZXDQ),
+	/*2752*/ uint16(xReadSlashR),
+	/*2753*/ uint16(xArgXmm1),
+	/*2754*/ uint16(xArgXmm2M64),
+	/*2755*/ uint16(xMatch),
+	/*2756*/ uint16(xCondPrefix), 1,
+	0x66, 2760,
+	/*2760*/ uint16(xSetOp), uint16(PCMPGTQ),
+	/*2762*/ uint16(xReadSlashR),
+	/*2763*/ uint16(xArgXmm1),
+	/*2764*/ uint16(xArgXmm2M128),
+	/*2765*/ uint16(xMatch),
+	/*2766*/ uint16(xCondPrefix), 1,
+	0x66, 2770,
+	/*2770*/ uint16(xSetOp), uint16(PMINSB),
+	/*2772*/ uint16(xReadSlashR),
+	/*2773*/ uint16(xArgXmm1),
+	/*2774*/ uint16(xArgXmm2M128),
+	/*2775*/ uint16(xMatch),
+	/*2776*/ uint16(xCondPrefix), 1,
+	0x66, 2780,
+	/*2780*/ uint16(xSetOp), uint16(PMINSD),
+	/*2782*/ uint16(xReadSlashR),
+	/*2783*/ uint16(xArgXmm1),
+	/*2784*/ uint16(xArgXmm2M128),
+	/*2785*/ uint16(xMatch),
+	/*2786*/ uint16(xCondPrefix), 1,
+	0x66, 2790,
+	/*2790*/ uint16(xSetOp), uint16(PMINUW),
+	/*2792*/ uint16(xReadSlashR),
+	/*2793*/ uint16(xArgXmm1),
+	/*2794*/ uint16(xArgXmm2M128),
+	/*2795*/ uint16(xMatch),
+	/*2796*/ uint16(xCondPrefix), 1,
+	0x66, 2800,
+	/*2800*/ uint16(xSetOp), uint16(PMINUD),
+	/*2802*/ uint16(xReadSlashR),
+	/*2803*/ uint16(xArgXmm1),
+	/*2804*/ uint16(xArgXmm2M128),
+	/*2805*/ uint16(xMatch),
+	/*2806*/ uint16(xCondPrefix), 1,
+	0x66, 2810,
+	/*2810*/ uint16(xSetOp), uint16(PMAXSB),
+	/*2812*/ uint16(xReadSlashR),
+	/*2813*/ uint16(xArgXmm1),
+	/*2814*/ uint16(xArgXmm2M128),
+	/*2815*/ uint16(xMatch),
+	/*2816*/ uint16(xCondPrefix), 1,
+	0x66, 2820,
+	/*2820*/ uint16(xSetOp), uint16(PMAXSD),
+	/*2822*/ uint16(xReadSlashR),
+	/*2823*/ uint16(xArgXmm1),
+	/*2824*/ uint16(xArgXmm2M128),
+	/*2825*/ uint16(xMatch),
+	/*2826*/ uint16(xCondPrefix), 1,
+	0x66, 2830,
+	/*2830*/ uint16(xSetOp), uint16(PMAXUW),
+	/*2832*/ uint16(xReadSlashR),
+	/*2833*/ uint16(xArgXmm1),
+	/*2834*/ uint16(xArgXmm2M128),
+	/*2835*/ uint16(xMatch),
+	/*2836*/ uint16(xCondPrefix), 1,
+	0x66, 2840,
+	/*2840*/ uint16(xSetOp), uint16(PMAXUD),
+	/*2842*/ uint16(xReadSlashR),
+	/*2843*/ uint16(xArgXmm1),
+	/*2844*/ uint16(xArgXmm2M128),
+	/*2845*/ uint16(xMatch),
+	/*2846*/ uint16(xCondPrefix), 1,
+	0x66, 2850,
+	/*2850*/ uint16(xSetOp), uint16(PMULLD),
+	/*2852*/ uint16(xReadSlashR),
+	/*2853*/ uint16(xArgXmm1),
+	/*2854*/ uint16(xArgXmm2M128),
+	/*2855*/ uint16(xMatch),
+	/*2856*/ uint16(xCondPrefix), 1,
+	0x66, 2860,
+	/*2860*/ uint16(xSetOp), uint16(PHMINPOSUW),
+	/*2862*/ uint16(xReadSlashR),
+	/*2863*/ uint16(xArgXmm1),
+	/*2864*/ uint16(xArgXmm2M128),
+	/*2865*/ uint16(xMatch),
+	/*2866*/ uint16(xCondIs64), 2869, 2879,
+	/*2869*/ uint16(xCondPrefix), 1,
+	0x66, 2873,
+	/*2873*/ uint16(xSetOp), uint16(INVPCID),
+	/*2875*/ uint16(xReadSlashR),
+	/*2876*/ uint16(xArgR32),
+	/*2877*/ uint16(xArgM128),
+	/*2878*/ uint16(xMatch),
+	/*2879*/ uint16(xCondPrefix), 1,
+	0x66, 2883,
+	/*2883*/ uint16(xSetOp), uint16(INVPCID),
+	/*2885*/ uint16(xReadSlashR),
+	/*2886*/ uint16(xArgR64),
+	/*2887*/ uint16(xArgM128),
+	/*2888*/ uint16(xMatch),
+	/*2889*/ uint16(xCondPrefix), 1,
+	0x66, 2893,
+	/*2893*/ uint16(xSetOp), uint16(AESIMC),
+	/*2895*/ uint16(xReadSlashR),
+	/*2896*/ uint16(xArgXmm1),
+	/*2897*/ uint16(xArgXmm2M128),
+	/*2898*/ uint16(xMatch),
+	/*2899*/ uint16(xCondPrefix), 1,
+	0x66, 2903,
+	/*2903*/ uint16(xSetOp), uint16(AESENC),
+	/*2905*/ uint16(xReadSlashR),
+	/*2906*/ uint16(xArgXmm1),
+	/*2907*/ uint16(xArgXmm2M128),
+	/*2908*/ uint16(xMatch),
+	/*2909*/ uint16(xCondPrefix), 1,
+	0x66, 2913,
+	/*2913*/ uint16(xSetOp), uint16(AESENCLAST),
+	/*2915*/ uint16(xReadSlashR),
+	/*2916*/ uint16(xArgXmm1),
+	/*2917*/ uint16(xArgXmm2M128),
+	/*2918*/ uint16(xMatch),
+	/*2919*/ uint16(xCondPrefix), 1,
+	0x66, 2923,
+	/*2923*/ uint16(xSetOp), uint16(AESDEC),
+	/*2925*/ uint16(xReadSlashR),
+	/*2926*/ uint16(xArgXmm1),
+	/*2927*/ uint16(xArgXmm2M128),
+	/*2928*/ uint16(xMatch),
+	/*2929*/ uint16(xCondPrefix), 1,
+	0x66, 2933,
+	/*2933*/ uint16(xSetOp), uint16(AESDECLAST),
+	/*2935*/ uint16(xReadSlashR),
+	/*2936*/ uint16(xArgXmm1),
+	/*2937*/ uint16(xArgXmm2M128),
+	/*2938*/ uint16(xMatch),
+	/*2939*/ uint16(xCondIs64), 2942, 2980,
+	/*2942*/ uint16(xCondPrefix), 2,
+	0xF2, 2964,
+	0x0, 2948,
+	/*2948*/ uint16(xCondDataSize), 2952, 2958, 0,
+	/*2952*/ uint16(xSetOp), uint16(MOVBE),
+	/*2954*/ uint16(xReadSlashR),
+	/*2955*/ uint16(xArgR16),
+	/*2956*/ uint16(xArgM16),
+	/*2957*/ uint16(xMatch),
+	/*2958*/ uint16(xSetOp), uint16(MOVBE),
+	/*2960*/ uint16(xReadSlashR),
+	/*2961*/ uint16(xArgR32),
+	/*2962*/ uint16(xArgM32),
+	/*2963*/ uint16(xMatch),
+	/*2964*/ uint16(xCondDataSize), 2968, 2974, 0,
+	/*2968*/ uint16(xSetOp), uint16(CRC32),
+	/*2970*/ uint16(xReadSlashR),
+	/*2971*/ uint16(xArgR32),
+	/*2972*/ uint16(xArgRM8),
+	/*2973*/ uint16(xMatch),
+	/*2974*/ uint16(xSetOp), uint16(CRC32),
+	/*2976*/ uint16(xReadSlashR),
+	/*2977*/ uint16(xArgR32),
+	/*2978*/ uint16(xArgRM8),
+	/*2979*/ uint16(xMatch),
+	/*2980*/ uint16(xCondPrefix), 2,
+	0xF2, 2996,
+	0x0, 2986,
+	/*2986*/ uint16(xCondDataSize), 2952, 2958, 2990,
+	/*2990*/ uint16(xSetOp), uint16(MOVBE),
+	/*2992*/ uint16(xReadSlashR),
+	/*2993*/ uint16(xArgR64),
+	/*2994*/ uint16(xArgM64),
+	/*2995*/ uint16(xMatch),
+	/*2996*/ uint16(xCondDataSize), 2968, 2974, 3000,
+	/*3000*/ uint16(xSetOp), uint16(CRC32),
+	/*3002*/ uint16(xReadSlashR),
+	/*3003*/ uint16(xArgR64),
+	/*3004*/ uint16(xArgRM8),
+	/*3005*/ uint16(xMatch),
+	/*3006*/ uint16(xCondIs64), 3009, 3047,
+	/*3009*/ uint16(xCondPrefix), 2,
+	0xF2, 3031,
+	0x0, 3015,
+	/*3015*/ uint16(xCondDataSize), 3019, 3025, 0,
+	/*3019*/ uint16(xSetOp), uint16(MOVBE),
+	/*3021*/ uint16(xReadSlashR),
+	/*3022*/ uint16(xArgM16),
+	/*3023*/ uint16(xArgR16),
+	/*3024*/ uint16(xMatch),
+	/*3025*/ uint16(xSetOp), uint16(MOVBE),
+	/*3027*/ uint16(xReadSlashR),
+	/*3028*/ uint16(xArgM32),
+	/*3029*/ uint16(xArgR32),
+	/*3030*/ uint16(xMatch),
+	/*3031*/ uint16(xCondDataSize), 3035, 3041, 0,
+	/*3035*/ uint16(xSetOp), uint16(CRC32),
+	/*3037*/ uint16(xReadSlashR),
+	/*3038*/ uint16(xArgR32),
+	/*3039*/ uint16(xArgRM16),
+	/*3040*/ uint16(xMatch),
+	/*3041*/ uint16(xSetOp), uint16(CRC32),
+	/*3043*/ uint16(xReadSlashR),
+	/*3044*/ uint16(xArgR32),
+	/*3045*/ uint16(xArgRM32),
+	/*3046*/ uint16(xMatch),
+	/*3047*/ uint16(xCondPrefix), 2,
+	0xF2, 3063,
+	0x0, 3053,
+	/*3053*/ uint16(xCondDataSize), 3019, 3025, 3057,
+	/*3057*/ uint16(xSetOp), uint16(MOVBE),
+	/*3059*/ uint16(xReadSlashR),
+	/*3060*/ uint16(xArgM64),
+	/*3061*/ uint16(xArgR64),
+	/*3062*/ uint16(xMatch),
+	/*3063*/ uint16(xCondDataSize), 3035, 3041, 3067,
+	/*3067*/ uint16(xSetOp), uint16(CRC32),
+	/*3069*/ uint16(xReadSlashR),
+	/*3070*/ uint16(xArgR64),
+	/*3071*/ uint16(xArgRM64),
+	/*3072*/ uint16(xMatch),
+	/*3073*/ uint16(xCondByte), 24,
+	0x08, 3124,
+	0x09, 3136,
+	0x0A, 3148,
+	0x0B, 3160,
+	0x0C, 3172,
+	0x0D, 3184,
+	0x0E, 3196,
+	0x0F, 3208,
+	0x14, 3230,
+	0x15, 3242,
+	0x16, 3254,
+	0x17, 3297,
+	0x20, 3309,
+	0x21, 3321,
+	0x22, 3333,
+	0x40, 3376,
+	0x41, 3388,
+	0x42, 3400,
+	0x44, 3412,
+	0x60, 3424,
+	0x61, 3436,
+	0x62, 3448,
+	0x63, 3460,
+	0xDF, 3472,
+	uint16(xFail),
+	/*3124*/ uint16(xCondPrefix), 1,
+	0x66, 3128,
+	/*3128*/ uint16(xSetOp), uint16(ROUNDPS),
+	/*3130*/ uint16(xReadSlashR),
+	/*3131*/ uint16(xReadIb),
+	/*3132*/ uint16(xArgXmm1),
+	/*3133*/ uint16(xArgXmm2M128),
+	/*3134*/ uint16(xArgImm8u),
+	/*3135*/ uint16(xMatch),
+	/*3136*/ uint16(xCondPrefix), 1,
+	0x66, 3140,
+	/*3140*/ uint16(xSetOp), uint16(ROUNDPD),
+	/*3142*/ uint16(xReadSlashR),
+	/*3143*/ uint16(xReadIb),
+	/*3144*/ uint16(xArgXmm1),
+	/*3145*/ uint16(xArgXmm2M128),
+	/*3146*/ uint16(xArgImm8u),
+	/*3147*/ uint16(xMatch),
+	/*3148*/ uint16(xCondPrefix), 1,
+	0x66, 3152,
+	/*3152*/ uint16(xSetOp), uint16(ROUNDSS),
+	/*3154*/ uint16(xReadSlashR),
+	/*3155*/ uint16(xReadIb),
+	/*3156*/ uint16(xArgXmm1),
+	/*3157*/ uint16(xArgXmm2M32),
+	/*3158*/ uint16(xArgImm8u),
+	/*3159*/ uint16(xMatch),
+	/*3160*/ uint16(xCondPrefix), 1,
+	0x66, 3164,
+	/*3164*/ uint16(xSetOp), uint16(ROUNDSD),
+	/*3166*/ uint16(xReadSlashR),
+	/*3167*/ uint16(xReadIb),
+	/*3168*/ uint16(xArgXmm1),
+	/*3169*/ uint16(xArgXmm2M64),
+	/*3170*/ uint16(xArgImm8u),
+	/*3171*/ uint16(xMatch),
+	/*3172*/ uint16(xCondPrefix), 1,
+	0x66, 3176,
+	/*3176*/ uint16(xSetOp), uint16(BLENDPS),
+	/*3178*/ uint16(xReadSlashR),
+	/*3179*/ uint16(xReadIb),
+	/*3180*/ uint16(xArgXmm1),
+	/*3181*/ uint16(xArgXmm2M128),
+	/*3182*/ uint16(xArgImm8u),
+	/*3183*/ uint16(xMatch),
+	/*3184*/ uint16(xCondPrefix), 1,
+	0x66, 3188,
+	/*3188*/ uint16(xSetOp), uint16(BLENDPD),
+	/*3190*/ uint16(xReadSlashR),
+	/*3191*/ uint16(xReadIb),
+	/*3192*/ uint16(xArgXmm1),
+	/*3193*/ uint16(xArgXmm2M128),
+	/*3194*/ uint16(xArgImm8u),
+	/*3195*/ uint16(xMatch),
+	/*3196*/ uint16(xCondPrefix), 1,
+	0x66, 3200,
+	/*3200*/ uint16(xSetOp), uint16(PBLENDW),
+	/*3202*/ uint16(xReadSlashR),
+	/*3203*/ uint16(xReadIb),
+	/*3204*/ uint16(xArgXmm1),
+	/*3205*/ uint16(xArgXmm2M128),
+	/*3206*/ uint16(xArgImm8u),
+	/*3207*/ uint16(xMatch),
+	/*3208*/ uint16(xCondPrefix), 2,
+	0x66, 3222,
+	0x0, 3214,
+	/*3214*/ uint16(xSetOp), uint16(PALIGNR),
+	/*3216*/ uint16(xReadSlashR),
+	/*3217*/ uint16(xReadIb),
+	/*3218*/ uint16(xArgMm1),
+	/*3219*/ uint16(xArgMm2M64),
+	/*3220*/ uint16(xArgImm8u),
+	/*3221*/ uint16(xMatch),
+	/*3222*/ uint16(xSetOp), uint16(PALIGNR),
+	/*3224*/ uint16(xReadSlashR),
+	/*3225*/ uint16(xReadIb),
+	/*3226*/ uint16(xArgXmm1),
+	/*3227*/ uint16(xArgXmm2M128),
+	/*3228*/ uint16(xArgImm8u),
+	/*3229*/ uint16(xMatch),
+	/*3230*/ uint16(xCondPrefix), 1,
+	0x66, 3234,
+	/*3234*/ uint16(xSetOp), uint16(PEXTRB),
+	/*3236*/ uint16(xReadSlashR),
+	/*3237*/ uint16(xReadIb),
+	/*3238*/ uint16(xArgR32M8),
+	/*3239*/ uint16(xArgXmm1),
+	/*3240*/ uint16(xArgImm8u),
+	/*3241*/ uint16(xMatch),
+	/*3242*/ uint16(xCondPrefix), 1,
+	0x66, 3246,
+	/*3246*/ uint16(xSetOp), uint16(PEXTRW),
+	/*3248*/ uint16(xReadSlashR),
+	/*3249*/ uint16(xReadIb),
+	/*3250*/ uint16(xArgR32M16),
+	/*3251*/ uint16(xArgXmm1),
+	/*3252*/ uint16(xArgImm8u),
+	/*3253*/ uint16(xMatch),
+	/*3254*/ uint16(xCondIs64), 3257, 3281,
+	/*3257*/ uint16(xCondPrefix), 1,
+	0x66, 3261,
+	/*3261*/ uint16(xCondDataSize), 3265, 3273, 0,
+	/*3265*/ uint16(xSetOp), uint16(PEXTRD),
+	/*3267*/ uint16(xReadSlashR),
+	/*3268*/ uint16(xReadIb),
+	/*3269*/ uint16(xArgRM32),
+	/*3270*/ uint16(xArgXmm1),
+	/*3271*/ uint16(xArgImm8u),
+	/*3272*/ uint16(xMatch),
+	/*3273*/ uint16(xSetOp), uint16(PEXTRD),
+	/*3275*/ uint16(xReadSlashR),
+	/*3276*/ uint16(xReadIb),
+	/*3277*/ uint16(xArgRM32),
+	/*3278*/ uint16(xArgXmm1),
+	/*3279*/ uint16(xArgImm8u),
+	/*3280*/ uint16(xMatch),
+	/*3281*/ uint16(xCondPrefix), 1,
+	0x66, 3285,
+	/*3285*/ uint16(xCondDataSize), 3265, 3273, 3289,
+	/*3289*/ uint16(xSetOp), uint16(PEXTRQ),
+	/*3291*/ uint16(xReadSlashR),
+	/*3292*/ uint16(xReadIb),
+	/*3293*/ uint16(xArgRM64),
+	/*3294*/ uint16(xArgXmm1),
+	/*3295*/ uint16(xArgImm8u),
+	/*3296*/ uint16(xMatch),
+	/*3297*/ uint16(xCondPrefix), 1,
+	0x66, 3301,
+	/*3301*/ uint16(xSetOp), uint16(EXTRACTPS),
+	/*3303*/ uint16(xReadSlashR),
+	/*3304*/ uint16(xReadIb),
+	/*3305*/ uint16(xArgRM32),
+	/*3306*/ uint16(xArgXmm1),
+	/*3307*/ uint16(xArgImm8u),
+	/*3308*/ uint16(xMatch),
+	/*3309*/ uint16(xCondPrefix), 1,
+	0x66, 3313,
+	/*3313*/ uint16(xSetOp), uint16(PINSRB),
+	/*3315*/ uint16(xReadSlashR),
+	/*3316*/ uint16(xReadIb),
+	/*3317*/ uint16(xArgXmm1),
+	/*3318*/ uint16(xArgR32M8),
+	/*3319*/ uint16(xArgImm8u),
+	/*3320*/ uint16(xMatch),
+	/*3321*/ uint16(xCondPrefix), 1,
+	0x66, 3325,
+	/*3325*/ uint16(xSetOp), uint16(INSERTPS),
+	/*3327*/ uint16(xReadSlashR),
+	/*3328*/ uint16(xReadIb),
+	/*3329*/ uint16(xArgXmm1),
+	/*3330*/ uint16(xArgXmm2M32),
+	/*3331*/ uint16(xArgImm8u),
+	/*3332*/ uint16(xMatch),
+	/*3333*/ uint16(xCondIs64), 3336, 3360,
+	/*3336*/ uint16(xCondPrefix), 1,
+	0x66, 3340,
+	/*3340*/ uint16(xCondDataSize), 3344, 3352, 0,
+	/*3344*/ uint16(xSetOp), uint16(PINSRD),
+	/*3346*/ uint16(xReadSlashR),
+	/*3347*/ uint16(xReadIb),
+	/*3348*/ uint16(xArgXmm1),
+	/*3349*/ uint16(xArgRM32),
+	/*3350*/ uint16(xArgImm8u),
+	/*3351*/ uint16(xMatch),
+	/*3352*/ uint16(xSetOp), uint16(PINSRD),
+	/*3354*/ uint16(xReadSlashR),
+	/*3355*/ uint16(xReadIb),
+	/*3356*/ uint16(xArgXmm1),
+	/*3357*/ uint16(xArgRM32),
+	/*3358*/ uint16(xArgImm8u),
+	/*3359*/ uint16(xMatch),
+	/*3360*/ uint16(xCondPrefix), 1,
+	0x66, 3364,
+	/*3364*/ uint16(xCondDataSize), 3344, 3352, 3368,
+	/*3368*/ uint16(xSetOp), uint16(PINSRQ),
+	/*3370*/ uint16(xReadSlashR),
+	/*3371*/ uint16(xReadIb),
+	/*3372*/ uint16(xArgXmm1),
+	/*3373*/ uint16(xArgRM64),
+	/*3374*/ uint16(xArgImm8u),
+	/*3375*/ uint16(xMatch),
+	/*3376*/ uint16(xCondPrefix), 1,
+	0x66, 3380,
+	/*3380*/ uint16(xSetOp), uint16(DPPS),
+	/*3382*/ uint16(xReadSlashR),
+	/*3383*/ uint16(xReadIb),
+	/*3384*/ uint16(xArgXmm1),
+	/*3385*/ uint16(xArgXmm2M128),
+	/*3386*/ uint16(xArgImm8u),
+	/*3387*/ uint16(xMatch),
+	/*3388*/ uint16(xCondPrefix), 1,
+	0x66, 3392,
+	/*3392*/ uint16(xSetOp), uint16(DPPD),
+	/*3394*/ uint16(xReadSlashR),
+	/*3395*/ uint16(xReadIb),
+	/*3396*/ uint16(xArgXmm1),
+	/*3397*/ uint16(xArgXmm2M128),
+	/*3398*/ uint16(xArgImm8u),
+	/*3399*/ uint16(xMatch),
+	/*3400*/ uint16(xCondPrefix), 1,
+	0x66, 3404,
+	/*3404*/ uint16(xSetOp), uint16(MPSADBW),
+	/*3406*/ uint16(xReadSlashR),
+	/*3407*/ uint16(xReadIb),
+	/*3408*/ uint16(xArgXmm1),
+	/*3409*/ uint16(xArgXmm2M128),
+	/*3410*/ uint16(xArgImm8u),
+	/*3411*/ uint16(xMatch),
+	/*3412*/ uint16(xCondPrefix), 1,
+	0x66, 3416,
+	/*3416*/ uint16(xSetOp), uint16(PCLMULQDQ),
+	/*3418*/ uint16(xReadSlashR),
+	/*3419*/ uint16(xReadIb),
+	/*3420*/ uint16(xArgXmm1),
+	/*3421*/ uint16(xArgXmm2M128),
+	/*3422*/ uint16(xArgImm8u),
+	/*3423*/ uint16(xMatch),
+	/*3424*/ uint16(xCondPrefix), 1,
+	0x66, 3428,
+	/*3428*/ uint16(xSetOp), uint16(PCMPESTRM),
+	/*3430*/ uint16(xReadSlashR),
+	/*3431*/ uint16(xReadIb),
+	/*3432*/ uint16(xArgXmm1),
+	/*3433*/ uint16(xArgXmm2M128),
+	/*3434*/ uint16(xArgImm8u),
+	/*3435*/ uint16(xMatch),
+	/*3436*/ uint16(xCondPrefix), 1,
+	0x66, 3440,
+	/*3440*/ uint16(xSetOp), uint16(PCMPESTRI),
+	/*3442*/ uint16(xReadSlashR),
+	/*3443*/ uint16(xReadIb),
+	/*3444*/ uint16(xArgXmm1),
+	/*3445*/ uint16(xArgXmm2M128),
+	/*3446*/ uint16(xArgImm8u),
+	/*3447*/ uint16(xMatch),
+	/*3448*/ uint16(xCondPrefix), 1,
+	0x66, 3452,
+	/*3452*/ uint16(xSetOp), uint16(PCMPISTRM),
+	/*3454*/ uint16(xReadSlashR),
+	/*3455*/ uint16(xReadIb),
+	/*3456*/ uint16(xArgXmm1),
+	/*3457*/ uint16(xArgXmm2M128),
+	/*3458*/ uint16(xArgImm8u),
+	/*3459*/ uint16(xMatch),
+	/*3460*/ uint16(xCondPrefix), 1,
+	0x66, 3464,
+	/*3464*/ uint16(xSetOp), uint16(PCMPISTRI),
+	/*3466*/ uint16(xReadSlashR),
+	/*3467*/ uint16(xReadIb),
+	/*3468*/ uint16(xArgXmm1),
+	/*3469*/ uint16(xArgXmm2M128),
+	/*3470*/ uint16(xArgImm8u),
+	/*3471*/ uint16(xMatch),
+	/*3472*/ uint16(xCondPrefix), 1,
+	0x66, 3476,
+	/*3476*/ uint16(xSetOp), uint16(AESKEYGENASSIST),
+	/*3478*/ uint16(xReadSlashR),
+	/*3479*/ uint16(xReadIb),
+	/*3480*/ uint16(xArgXmm1),
+	/*3481*/ uint16(xArgXmm2M128),
+	/*3482*/ uint16(xArgImm8u),
+	/*3483*/ uint16(xMatch),
+	/*3484*/ uint16(xCondIs64), 3487, 3503,
+	/*3487*/ uint16(xCondDataSize), 3491, 3497, 0,
+	/*3491*/ uint16(xSetOp), uint16(CMOVO),
+	/*3493*/ uint16(xReadSlashR),
+	/*3494*/ uint16(xArgR16),
+	/*3495*/ uint16(xArgRM16),
+	/*3496*/ uint16(xMatch),
+	/*3497*/ uint16(xSetOp), uint16(CMOVO),
+	/*3499*/ uint16(xReadSlashR),
+	/*3500*/ uint16(xArgR32),
+	/*3501*/ uint16(xArgRM32),
+	/*3502*/ uint16(xMatch),
+	/*3503*/ uint16(xCondDataSize), 3491, 3497, 3507,
+	/*3507*/ uint16(xSetOp), uint16(CMOVO),
+	/*3509*/ uint16(xReadSlashR),
+	/*3510*/ uint16(xArgR64),
+	/*3511*/ uint16(xArgRM64),
+	/*3512*/ uint16(xMatch),
+	/*3513*/ uint16(xCondIs64), 3516, 3532,
+	/*3516*/ uint16(xCondDataSize), 3520, 3526, 0,
+	/*3520*/ uint16(xSetOp), uint16(CMOVNO),
+	/*3522*/ uint16(xReadSlashR),
+	/*3523*/ uint16(xArgR16),
+	/*3524*/ uint16(xArgRM16),
+	/*3525*/ uint16(xMatch),
+	/*3526*/ uint16(xSetOp), uint16(CMOVNO),
+	/*3528*/ uint16(xReadSlashR),
+	/*3529*/ uint16(xArgR32),
+	/*3530*/ uint16(xArgRM32),
+	/*3531*/ uint16(xMatch),
+	/*3532*/ uint16(xCondDataSize), 3520, 3526, 3536,
+	/*3536*/ uint16(xSetOp), uint16(CMOVNO),
+	/*3538*/ uint16(xReadSlashR),
+	/*3539*/ uint16(xArgR64),
+	/*3540*/ uint16(xArgRM64),
+	/*3541*/ uint16(xMatch),
+	/*3542*/ uint16(xCondIs64), 3545, 3561,
+	/*3545*/ uint16(xCondDataSize), 3549, 3555, 0,
+	/*3549*/ uint16(xSetOp), uint16(CMOVB),
+	/*3551*/ uint16(xReadSlashR),
+	/*3552*/ uint16(xArgR16),
+	/*3553*/ uint16(xArgRM16),
+	/*3554*/ uint16(xMatch),
+	/*3555*/ uint16(xSetOp), uint16(CMOVB),
+	/*3557*/ uint16(xReadSlashR),
+	/*3558*/ uint16(xArgR32),
+	/*3559*/ uint16(xArgRM32),
+	/*3560*/ uint16(xMatch),
+	/*3561*/ uint16(xCondDataSize), 3549, 3555, 3565,
+	/*3565*/ uint16(xSetOp), uint16(CMOVB),
+	/*3567*/ uint16(xReadSlashR),
+	/*3568*/ uint16(xArgR64),
+	/*3569*/ uint16(xArgRM64),
+	/*3570*/ uint16(xMatch),
+	/*3571*/ uint16(xCondIs64), 3574, 3590,
+	/*3574*/ uint16(xCondDataSize), 3578, 3584, 0,
+	/*3578*/ uint16(xSetOp), uint16(CMOVAE),
+	/*3580*/ uint16(xReadSlashR),
+	/*3581*/ uint16(xArgR16),
+	/*3582*/ uint16(xArgRM16),
+	/*3583*/ uint16(xMatch),
+	/*3584*/ uint16(xSetOp), uint16(CMOVAE),
+	/*3586*/ uint16(xReadSlashR),
+	/*3587*/ uint16(xArgR32),
+	/*3588*/ uint16(xArgRM32),
+	/*3589*/ uint16(xMatch),
+	/*3590*/ uint16(xCondDataSize), 3578, 3584, 3594,
+	/*3594*/ uint16(xSetOp), uint16(CMOVAE),
+	/*3596*/ uint16(xReadSlashR),
+	/*3597*/ uint16(xArgR64),
+	/*3598*/ uint16(xArgRM64),
+	/*3599*/ uint16(xMatch),
+	/*3600*/ uint16(xCondIs64), 3603, 3619,
+	/*3603*/ uint16(xCondDataSize), 3607, 3613, 0,
+	/*3607*/ uint16(xSetOp), uint16(CMOVE),
+	/*3609*/ uint16(xReadSlashR),
+	/*3610*/ uint16(xArgR16),
+	/*3611*/ uint16(xArgRM16),
+	/*3612*/ uint16(xMatch),
+	/*3613*/ uint16(xSetOp), uint16(CMOVE),
+	/*3615*/ uint16(xReadSlashR),
+	/*3616*/ uint16(xArgR32),
+	/*3617*/ uint16(xArgRM32),
+	/*3618*/ uint16(xMatch),
+	/*3619*/ uint16(xCondDataSize), 3607, 3613, 3623,
+	/*3623*/ uint16(xSetOp), uint16(CMOVE),
+	/*3625*/ uint16(xReadSlashR),
+	/*3626*/ uint16(xArgR64),
+	/*3627*/ uint16(xArgRM64),
+	/*3628*/ uint16(xMatch),
+	/*3629*/ uint16(xCondIs64), 3632, 3648,
+	/*3632*/ uint16(xCondDataSize), 3636, 3642, 0,
+	/*3636*/ uint16(xSetOp), uint16(CMOVNE),
+	/*3638*/ uint16(xReadSlashR),
+	/*3639*/ uint16(xArgR16),
+	/*3640*/ uint16(xArgRM16),
+	/*3641*/ uint16(xMatch),
+	/*3642*/ uint16(xSetOp), uint16(CMOVNE),
+	/*3644*/ uint16(xReadSlashR),
+	/*3645*/ uint16(xArgR32),
+	/*3646*/ uint16(xArgRM32),
+	/*3647*/ uint16(xMatch),
+	/*3648*/ uint16(xCondDataSize), 3636, 3642, 3652,
+	/*3652*/ uint16(xSetOp), uint16(CMOVNE),
+	/*3654*/ uint16(xReadSlashR),
+	/*3655*/ uint16(xArgR64),
+	/*3656*/ uint16(xArgRM64),
+	/*3657*/ uint16(xMatch),
+	/*3658*/ uint16(xCondIs64), 3661, 3677,
+	/*3661*/ uint16(xCondDataSize), 3665, 3671, 0,
+	/*3665*/ uint16(xSetOp), uint16(CMOVBE),
+	/*3667*/ uint16(xReadSlashR),
+	/*3668*/ uint16(xArgR16),
+	/*3669*/ uint16(xArgRM16),
+	/*3670*/ uint16(xMatch),
+	/*3671*/ uint16(xSetOp), uint16(CMOVBE),
+	/*3673*/ uint16(xReadSlashR),
+	/*3674*/ uint16(xArgR32),
+	/*3675*/ uint16(xArgRM32),
+	/*3676*/ uint16(xMatch),
+	/*3677*/ uint16(xCondDataSize), 3665, 3671, 3681,
+	/*3681*/ uint16(xSetOp), uint16(CMOVBE),
+	/*3683*/ uint16(xReadSlashR),
+	/*3684*/ uint16(xArgR64),
+	/*3685*/ uint16(xArgRM64),
+	/*3686*/ uint16(xMatch),
+	/*3687*/ uint16(xCondIs64), 3690, 3706,
+	/*3690*/ uint16(xCondDataSize), 3694, 3700, 0,
+	/*3694*/ uint16(xSetOp), uint16(CMOVA),
+	/*3696*/ uint16(xReadSlashR),
+	/*3697*/ uint16(xArgR16),
+	/*3698*/ uint16(xArgRM16),
+	/*3699*/ uint16(xMatch),
+	/*3700*/ uint16(xSetOp), uint16(CMOVA),
+	/*3702*/ uint16(xReadSlashR),
+	/*3703*/ uint16(xArgR32),
+	/*3704*/ uint16(xArgRM32),
+	/*3705*/ uint16(xMatch),
+	/*3706*/ uint16(xCondDataSize), 3694, 3700, 3710,
+	/*3710*/ uint16(xSetOp), uint16(CMOVA),
+	/*3712*/ uint16(xReadSlashR),
+	/*3713*/ uint16(xArgR64),
+	/*3714*/ uint16(xArgRM64),
+	/*3715*/ uint16(xMatch),
+	/*3716*/ uint16(xCondIs64), 3719, 3735,
+	/*3719*/ uint16(xCondDataSize), 3723, 3729, 0,
+	/*3723*/ uint16(xSetOp), uint16(CMOVS),
+	/*3725*/ uint16(xReadSlashR),
+	/*3726*/ uint16(xArgR16),
+	/*3727*/ uint16(xArgRM16),
+	/*3728*/ uint16(xMatch),
+	/*3729*/ uint16(xSetOp), uint16(CMOVS),
+	/*3731*/ uint16(xReadSlashR),
+	/*3732*/ uint16(xArgR32),
+	/*3733*/ uint16(xArgRM32),
+	/*3734*/ uint16(xMatch),
+	/*3735*/ uint16(xCondDataSize), 3723, 3729, 3739,
+	/*3739*/ uint16(xSetOp), uint16(CMOVS),
+	/*3741*/ uint16(xReadSlashR),
+	/*3742*/ uint16(xArgR64),
+	/*3743*/ uint16(xArgRM64),
+	/*3744*/ uint16(xMatch),
+	/*3745*/ uint16(xCondIs64), 3748, 3764,
+	/*3748*/ uint16(xCondDataSize), 3752, 3758, 0,
+	/*3752*/ uint16(xSetOp), uint16(CMOVNS),
+	/*3754*/ uint16(xReadSlashR),
+	/*3755*/ uint16(xArgR16),
+	/*3756*/ uint16(xArgRM16),
+	/*3757*/ uint16(xMatch),
+	/*3758*/ uint16(xSetOp), uint16(CMOVNS),
+	/*3760*/ uint16(xReadSlashR),
+	/*3761*/ uint16(xArgR32),
+	/*3762*/ uint16(xArgRM32),
+	/*3763*/ uint16(xMatch),
+	/*3764*/ uint16(xCondDataSize), 3752, 3758, 3768,
+	/*3768*/ uint16(xSetOp), uint16(CMOVNS),
+	/*3770*/ uint16(xReadSlashR),
+	/*3771*/ uint16(xArgR64),
+	/*3772*/ uint16(xArgRM64),
+	/*3773*/ uint16(xMatch),
+	/*3774*/ uint16(xCondIs64), 3777, 3793,
+	/*3777*/ uint16(xCondDataSize), 3781, 3787, 0,
+	/*3781*/ uint16(xSetOp), uint16(CMOVP),
+	/*3783*/ uint16(xReadSlashR),
+	/*3784*/ uint16(xArgR16),
+	/*3785*/ uint16(xArgRM16),
+	/*3786*/ uint16(xMatch),
+	/*3787*/ uint16(xSetOp), uint16(CMOVP),
+	/*3789*/ uint16(xReadSlashR),
+	/*3790*/ uint16(xArgR32),
+	/*3791*/ uint16(xArgRM32),
+	/*3792*/ uint16(xMatch),
+	/*3793*/ uint16(xCondDataSize), 3781, 3787, 3797,
+	/*3797*/ uint16(xSetOp), uint16(CMOVP),
+	/*3799*/ uint16(xReadSlashR),
+	/*3800*/ uint16(xArgR64),
+	/*3801*/ uint16(xArgRM64),
+	/*3802*/ uint16(xMatch),
+	/*3803*/ uint16(xCondIs64), 3806, 3822,
+	/*3806*/ uint16(xCondDataSize), 3810, 3816, 0,
+	/*3810*/ uint16(xSetOp), uint16(CMOVNP),
+	/*3812*/ uint16(xReadSlashR),
+	/*3813*/ uint16(xArgR16),
+	/*3814*/ uint16(xArgRM16),
+	/*3815*/ uint16(xMatch),
+	/*3816*/ uint16(xSetOp), uint16(CMOVNP),
+	/*3818*/ uint16(xReadSlashR),
+	/*3819*/ uint16(xArgR32),
+	/*3820*/ uint16(xArgRM32),
+	/*3821*/ uint16(xMatch),
+	/*3822*/ uint16(xCondDataSize), 3810, 3816, 3826,
+	/*3826*/ uint16(xSetOp), uint16(CMOVNP),
+	/*3828*/ uint16(xReadSlashR),
+	/*3829*/ uint16(xArgR64),
+	/*3830*/ uint16(xArgRM64),
+	/*3831*/ uint16(xMatch),
+	/*3832*/ uint16(xCondIs64), 3835, 3851,
+	/*3835*/ uint16(xCondDataSize), 3839, 3845, 0,
+	/*3839*/ uint16(xSetOp), uint16(CMOVL),
+	/*3841*/ uint16(xReadSlashR),
+	/*3842*/ uint16(xArgR16),
+	/*3843*/ uint16(xArgRM16),
+	/*3844*/ uint16(xMatch),
+	/*3845*/ uint16(xSetOp), uint16(CMOVL),
+	/*3847*/ uint16(xReadSlashR),
+	/*3848*/ uint16(xArgR32),
+	/*3849*/ uint16(xArgRM32),
+	/*3850*/ uint16(xMatch),
+	/*3851*/ uint16(xCondDataSize), 3839, 3845, 3855,
+	/*3855*/ uint16(xSetOp), uint16(CMOVL),
+	/*3857*/ uint16(xReadSlashR),
+	/*3858*/ uint16(xArgR64),
+	/*3859*/ uint16(xArgRM64),
+	/*3860*/ uint16(xMatch),
+	/*3861*/ uint16(xCondIs64), 3864, 3880,
+	/*3864*/ uint16(xCondDataSize), 3868, 3874, 0,
+	/*3868*/ uint16(xSetOp), uint16(CMOVGE),
+	/*3870*/ uint16(xReadSlashR),
+	/*3871*/ uint16(xArgR16),
+	/*3872*/ uint16(xArgRM16),
+	/*3873*/ uint16(xMatch),
+	/*3874*/ uint16(xSetOp), uint16(CMOVGE),
+	/*3876*/ uint16(xReadSlashR),
+	/*3877*/ uint16(xArgR32),
+	/*3878*/ uint16(xArgRM32),
+	/*3879*/ uint16(xMatch),
+	/*3880*/ uint16(xCondDataSize), 3868, 3874, 3884,
+	/*3884*/ uint16(xSetOp), uint16(CMOVGE),
+	/*3886*/ uint16(xReadSlashR),
+	/*3887*/ uint16(xArgR64),
+	/*3888*/ uint16(xArgRM64),
+	/*3889*/ uint16(xMatch),
+	/*3890*/ uint16(xCondIs64), 3893, 3909,
+	/*3893*/ uint16(xCondDataSize), 3897, 3903, 0,
+	/*3897*/ uint16(xSetOp), uint16(CMOVLE),
+	/*3899*/ uint16(xReadSlashR),
+	/*3900*/ uint16(xArgR16),
+	/*3901*/ uint16(xArgRM16),
+	/*3902*/ uint16(xMatch),
+	/*3903*/ uint16(xSetOp), uint16(CMOVLE),
+	/*3905*/ uint16(xReadSlashR),
+	/*3906*/ uint16(xArgR32),
+	/*3907*/ uint16(xArgRM32),
+	/*3908*/ uint16(xMatch),
+	/*3909*/ uint16(xCondDataSize), 3897, 3903, 3913,
+	/*3913*/ uint16(xSetOp), uint16(CMOVLE),
+	/*3915*/ uint16(xReadSlashR),
+	/*3916*/ uint16(xArgR64),
+	/*3917*/ uint16(xArgRM64),
+	/*3918*/ uint16(xMatch),
+	/*3919*/ uint16(xCondIs64), 3922, 3938,
+	/*3922*/ uint16(xCondDataSize), 3926, 3932, 0,
+	/*3926*/ uint16(xSetOp), uint16(CMOVG),
+	/*3928*/ uint16(xReadSlashR),
+	/*3929*/ uint16(xArgR16),
+	/*3930*/ uint16(xArgRM16),
+	/*3931*/ uint16(xMatch),
+	/*3932*/ uint16(xSetOp), uint16(CMOVG),
+	/*3934*/ uint16(xReadSlashR),
+	/*3935*/ uint16(xArgR32),
+	/*3936*/ uint16(xArgRM32),
+	/*3937*/ uint16(xMatch),
+	/*3938*/ uint16(xCondDataSize), 3926, 3932, 3942,
+	/*3942*/ uint16(xSetOp), uint16(CMOVG),
+	/*3944*/ uint16(xReadSlashR),
+	/*3945*/ uint16(xArgR64),
+	/*3946*/ uint16(xArgRM64),
+	/*3947*/ uint16(xMatch),
+	/*3948*/ uint16(xCondPrefix), 2,
+	0x66, 3960,
+	0x0, 3954,
+	/*3954*/ uint16(xSetOp), uint16(MOVMSKPS),
+	/*3956*/ uint16(xReadSlashR),
+	/*3957*/ uint16(xArgR32),
+	/*3958*/ uint16(xArgXmm2),
+	/*3959*/ uint16(xMatch),
+	/*3960*/ uint16(xSetOp), uint16(MOVMSKPD),
+	/*3962*/ uint16(xReadSlashR),
+	/*3963*/ uint16(xArgR32),
+	/*3964*/ uint16(xArgXmm2),
+	/*3965*/ uint16(xMatch),
+	/*3966*/ uint16(xCondPrefix), 4,
+	0xF3, 3994,
+	0xF2, 3988,
+	0x66, 3982,
+	0x0, 3976,
+	/*3976*/ uint16(xSetOp), uint16(SQRTPS),
+	/*3978*/ uint16(xReadSlashR),
+	/*3979*/ uint16(xArgXmm1),
+	/*3980*/ uint16(xArgXmm2M128),
+	/*3981*/ uint16(xMatch),
+	/*3982*/ uint16(xSetOp), uint16(SQRTPD),
+	/*3984*/ uint16(xReadSlashR),
+	/*3985*/ uint16(xArgXmm1),
+	/*3986*/ uint16(xArgXmm2M128),
+	/*3987*/ uint16(xMatch),
+	/*3988*/ uint16(xSetOp), uint16(SQRTSD),
+	/*3990*/ uint16(xReadSlashR),
+	/*3991*/ uint16(xArgXmm1),
+	/*3992*/ uint16(xArgXmm2M64),
+	/*3993*/ uint16(xMatch),
+	/*3994*/ uint16(xSetOp), uint16(SQRTSS),
+	/*3996*/ uint16(xReadSlashR),
+	/*3997*/ uint16(xArgXmm1),
+	/*3998*/ uint16(xArgXmm2M32),
+	/*3999*/ uint16(xMatch),
+	/*4000*/ uint16(xCondPrefix), 2,
+	0xF3, 4012,
+	0x0, 4006,
+	/*4006*/ uint16(xSetOp), uint16(RSQRTPS),
+	/*4008*/ uint16(xReadSlashR),
+	/*4009*/ uint16(xArgXmm1),
+	/*4010*/ uint16(xArgXmm2M128),
+	/*4011*/ uint16(xMatch),
+	/*4012*/ uint16(xSetOp), uint16(RSQRTSS),
+	/*4014*/ uint16(xReadSlashR),
+	/*4015*/ uint16(xArgXmm1),
+	/*4016*/ uint16(xArgXmm2M32),
+	/*4017*/ uint16(xMatch),
+	/*4018*/ uint16(xCondPrefix), 2,
+	0xF3, 4030,
+	0x0, 4024,
+	/*4024*/ uint16(xSetOp), uint16(RCPPS),
+	/*4026*/ uint16(xReadSlashR),
+	/*4027*/ uint16(xArgXmm1),
+	/*4028*/ uint16(xArgXmm2M128),
+	/*4029*/ uint16(xMatch),
+	/*4030*/ uint16(xSetOp), uint16(RCPSS),
+	/*4032*/ uint16(xReadSlashR),
+	/*4033*/ uint16(xArgXmm1),
+	/*4034*/ uint16(xArgXmm2M32),
+	/*4035*/ uint16(xMatch),
+	/*4036*/ uint16(xCondPrefix), 2,
+	0x66, 4048,
+	0x0, 4042,
+	/*4042*/ uint16(xSetOp), uint16(ANDPS),
+	/*4044*/ uint16(xReadSlashR),
+	/*4045*/ uint16(xArgXmm1),
+	/*4046*/ uint16(xArgXmm2M128),
+	/*4047*/ uint16(xMatch),
+	/*4048*/ uint16(xSetOp), uint16(ANDPD),
+	/*4050*/ uint16(xReadSlashR),
+	/*4051*/ uint16(xArgXmm1),
+	/*4052*/ uint16(xArgXmm2M128),
+	/*4053*/ uint16(xMatch),
+	/*4054*/ uint16(xCondPrefix), 2,
+	0x66, 4066,
+	0x0, 4060,
+	/*4060*/ uint16(xSetOp), uint16(ANDNPS),
+	/*4062*/ uint16(xReadSlashR),
+	/*4063*/ uint16(xArgXmm1),
+	/*4064*/ uint16(xArgXmm2M128),
+	/*4065*/ uint16(xMatch),
+	/*4066*/ uint16(xSetOp), uint16(ANDNPD),
+	/*4068*/ uint16(xReadSlashR),
+	/*4069*/ uint16(xArgXmm1),
+	/*4070*/ uint16(xArgXmm2M128),
+	/*4071*/ uint16(xMatch),
+	/*4072*/ uint16(xCondPrefix), 2,
+	0x66, 4084,
+	0x0, 4078,
+	/*4078*/ uint16(xSetOp), uint16(ORPS),
+	/*4080*/ uint16(xReadSlashR),
+	/*4081*/ uint16(xArgXmm1),
+	/*4082*/ uint16(xArgXmm2M128),
+	/*4083*/ uint16(xMatch),
+	/*4084*/ uint16(xSetOp), uint16(ORPD),
+	/*4086*/ uint16(xReadSlashR),
+	/*4087*/ uint16(xArgXmm1),
+	/*4088*/ uint16(xArgXmm2M128),
+	/*4089*/ uint16(xMatch),
+	/*4090*/ uint16(xCondPrefix), 2,
+	0x66, 4102,
+	0x0, 4096,
+	/*4096*/ uint16(xSetOp), uint16(XORPS),
+	/*4098*/ uint16(xReadSlashR),
+	/*4099*/ uint16(xArgXmm1),
+	/*4100*/ uint16(xArgXmm2M128),
+	/*4101*/ uint16(xMatch),
+	/*4102*/ uint16(xSetOp), uint16(XORPD),
+	/*4104*/ uint16(xReadSlashR),
+	/*4105*/ uint16(xArgXmm1),
+	/*4106*/ uint16(xArgXmm2M128),
+	/*4107*/ uint16(xMatch),
+	/*4108*/ uint16(xCondPrefix), 4,
+	0xF3, 4136,
+	0xF2, 4130,
+	0x66, 4124,
+	0x0, 4118,
+	/*4118*/ uint16(xSetOp), uint16(ADDPS),
+	/*4120*/ uint16(xReadSlashR),
+	/*4121*/ uint16(xArgXmm1),
+	/*4122*/ uint16(xArgXmm2M128),
+	/*4123*/ uint16(xMatch),
+	/*4124*/ uint16(xSetOp), uint16(ADDPD),
+	/*4126*/ uint16(xReadSlashR),
+	/*4127*/ uint16(xArgXmm1),
+	/*4128*/ uint16(xArgXmm2M128),
+	/*4129*/ uint16(xMatch),
+	/*4130*/ uint16(xSetOp), uint16(ADDSD),
+	/*4132*/ uint16(xReadSlashR),
+	/*4133*/ uint16(xArgXmm1),
+	/*4134*/ uint16(xArgXmm2M64),
+	/*4135*/ uint16(xMatch),
+	/*4136*/ uint16(xSetOp), uint16(ADDSS),
+	/*4138*/ uint16(xReadSlashR),
+	/*4139*/ uint16(xArgXmm1),
+	/*4140*/ uint16(xArgXmm2M32),
+	/*4141*/ uint16(xMatch),
+	/*4142*/ uint16(xCondPrefix), 4,
+	0xF3, 4170,
+	0xF2, 4164,
+	0x66, 4158,
+	0x0, 4152,
+	/*4152*/ uint16(xSetOp), uint16(MULPS),
+	/*4154*/ uint16(xReadSlashR),
+	/*4155*/ uint16(xArgXmm1),
+	/*4156*/ uint16(xArgXmm2M128),
+	/*4157*/ uint16(xMatch),
+	/*4158*/ uint16(xSetOp), uint16(MULPD),
+	/*4160*/ uint16(xReadSlashR),
+	/*4161*/ uint16(xArgXmm1),
+	/*4162*/ uint16(xArgXmm2M128),
+	/*4163*/ uint16(xMatch),
+	/*4164*/ uint16(xSetOp), uint16(MULSD),
+	/*4166*/ uint16(xReadSlashR),
+	/*4167*/ uint16(xArgXmm1),
+	/*4168*/ uint16(xArgXmm2M64),
+	/*4169*/ uint16(xMatch),
+	/*4170*/ uint16(xSetOp), uint16(MULSS),
+	/*4172*/ uint16(xReadSlashR),
+	/*4173*/ uint16(xArgXmm1),
+	/*4174*/ uint16(xArgXmm2M32),
+	/*4175*/ uint16(xMatch),
+	/*4176*/ uint16(xCondPrefix), 4,
+	0xF3, 4204,
+	0xF2, 4198,
+	0x66, 4192,
+	0x0, 4186,
+	/*4186*/ uint16(xSetOp), uint16(CVTPS2PD),
+	/*4188*/ uint16(xReadSlashR),
+	/*4189*/ uint16(xArgXmm1),
+	/*4190*/ uint16(xArgXmm2M64),
+	/*4191*/ uint16(xMatch),
+	/*4192*/ uint16(xSetOp), uint16(CVTPD2PS),
+	/*4194*/ uint16(xReadSlashR),
+	/*4195*/ uint16(xArgXmm1),
+	/*4196*/ uint16(xArgXmm2M128),
+	/*4197*/ uint16(xMatch),
+	/*4198*/ uint16(xSetOp), uint16(CVTSD2SS),
+	/*4200*/ uint16(xReadSlashR),
+	/*4201*/ uint16(xArgXmm1),
+	/*4202*/ uint16(xArgXmm2M64),
+	/*4203*/ uint16(xMatch),
+	/*4204*/ uint16(xSetOp), uint16(CVTSS2SD),
+	/*4206*/ uint16(xReadSlashR),
+	/*4207*/ uint16(xArgXmm1),
+	/*4208*/ uint16(xArgXmm2M32),
+	/*4209*/ uint16(xMatch),
+	/*4210*/ uint16(xCondPrefix), 3,
+	0xF3, 4230,
+	0x66, 4224,
+	0x0, 4218,
+	/*4218*/ uint16(xSetOp), uint16(CVTDQ2PS),
+	/*4220*/ uint16(xReadSlashR),
+	/*4221*/ uint16(xArgXmm1),
+	/*4222*/ uint16(xArgXmm2M128),
+	/*4223*/ uint16(xMatch),
+	/*4224*/ uint16(xSetOp), uint16(CVTPS2DQ),
+	/*4226*/ uint16(xReadSlashR),
+	/*4227*/ uint16(xArgXmm1),
+	/*4228*/ uint16(xArgXmm2M128),
+	/*4229*/ uint16(xMatch),
+	/*4230*/ uint16(xSetOp), uint16(CVTTPS2DQ),
+	/*4232*/ uint16(xReadSlashR),
+	/*4233*/ uint16(xArgXmm1),
+	/*4234*/ uint16(xArgXmm2M128),
+	/*4235*/ uint16(xMatch),
+	/*4236*/ uint16(xCondPrefix), 4,
+	0xF3, 4264,
+	0xF2, 4258,
+	0x66, 4252,
+	0x0, 4246,
+	/*4246*/ uint16(xSetOp), uint16(SUBPS),
+	/*4248*/ uint16(xReadSlashR),
+	/*4249*/ uint16(xArgXmm1),
+	/*4250*/ uint16(xArgXmm2M128),
+	/*4251*/ uint16(xMatch),
+	/*4252*/ uint16(xSetOp), uint16(SUBPD),
+	/*4254*/ uint16(xReadSlashR),
+	/*4255*/ uint16(xArgXmm1),
+	/*4256*/ uint16(xArgXmm2M128),
+	/*4257*/ uint16(xMatch),
+	/*4258*/ uint16(xSetOp), uint16(SUBSD),
+	/*4260*/ uint16(xReadSlashR),
+	/*4261*/ uint16(xArgXmm1),
+	/*4262*/ uint16(xArgXmm2M64),
+	/*4263*/ uint16(xMatch),
+	/*4264*/ uint16(xSetOp), uint16(SUBSS),
+	/*4266*/ uint16(xReadSlashR),
+	/*4267*/ uint16(xArgXmm1),
+	/*4268*/ uint16(xArgXmm2M32),
+	/*4269*/ uint16(xMatch),
+	/*4270*/ uint16(xCondPrefix), 4,
+	0xF3, 4298,
+	0xF2, 4292,
+	0x66, 4286,
+	0x0, 4280,
+	/*4280*/ uint16(xSetOp), uint16(MINPS),
+	/*4282*/ uint16(xReadSlashR),
+	/*4283*/ uint16(xArgXmm1),
+	/*4284*/ uint16(xArgXmm2M128),
+	/*4285*/ uint16(xMatch),
+	/*4286*/ uint16(xSetOp), uint16(MINPD),
+	/*4288*/ uint16(xReadSlashR),
+	/*4289*/ uint16(xArgXmm1),
+	/*4290*/ uint16(xArgXmm2M128),
+	/*4291*/ uint16(xMatch),
+	/*4292*/ uint16(xSetOp), uint16(MINSD),
+	/*4294*/ uint16(xReadSlashR),
+	/*4295*/ uint16(xArgXmm1),
+	/*4296*/ uint16(xArgXmm2M64),
+	/*4297*/ uint16(xMatch),
+	/*4298*/ uint16(xSetOp), uint16(MINSS),
+	/*4300*/ uint16(xReadSlashR),
+	/*4301*/ uint16(xArgXmm1),
+	/*4302*/ uint16(xArgXmm2M32),
+	/*4303*/ uint16(xMatch),
+	/*4304*/ uint16(xCondPrefix), 4,
+	0xF3, 4332,
+	0xF2, 4326,
+	0x66, 4320,
+	0x0, 4314,
+	/*4314*/ uint16(xSetOp), uint16(DIVPS),
+	/*4316*/ uint16(xReadSlashR),
+	/*4317*/ uint16(xArgXmm1),
+	/*4318*/ uint16(xArgXmm2M128),
+	/*4319*/ uint16(xMatch),
+	/*4320*/ uint16(xSetOp), uint16(DIVPD),
+	/*4322*/ uint16(xReadSlashR),
+	/*4323*/ uint16(xArgXmm1),
+	/*4324*/ uint16(xArgXmm2M128),
+	/*4325*/ uint16(xMatch),
+	/*4326*/ uint16(xSetOp), uint16(DIVSD),
+	/*4328*/ uint16(xReadSlashR),
+	/*4329*/ uint16(xArgXmm1),
+	/*4330*/ uint16(xArgXmm2M64),
+	/*4331*/ uint16(xMatch),
+	/*4332*/ uint16(xSetOp), uint16(DIVSS),
+	/*4334*/ uint16(xReadSlashR),
+	/*4335*/ uint16(xArgXmm1),
+	/*4336*/ uint16(xArgXmm2M32),
+	/*4337*/ uint16(xMatch),
+	/*4338*/ uint16(xCondPrefix), 4,
+	0xF3, 4366,
+	0xF2, 4360,
+	0x66, 4354,
+	0x0, 4348,
+	/*4348*/ uint16(xSetOp), uint16(MAXPS),
+	/*4350*/ uint16(xReadSlashR),
+	/*4351*/ uint16(xArgXmm1),
+	/*4352*/ uint16(xArgXmm2M128),
+	/*4353*/ uint16(xMatch),
+	/*4354*/ uint16(xSetOp), uint16(MAXPD),
+	/*4356*/ uint16(xReadSlashR),
+	/*4357*/ uint16(xArgXmm1),
+	/*4358*/ uint16(xArgXmm2M128),
+	/*4359*/ uint16(xMatch),
+	/*4360*/ uint16(xSetOp), uint16(MAXSD),
+	/*4362*/ uint16(xReadSlashR),
+	/*4363*/ uint16(xArgXmm1),
+	/*4364*/ uint16(xArgXmm2M64),
+	/*4365*/ uint16(xMatch),
+	/*4366*/ uint16(xSetOp), uint16(MAXSS),
+	/*4368*/ uint16(xReadSlashR),
+	/*4369*/ uint16(xArgXmm1),
+	/*4370*/ uint16(xArgXmm2M32),
+	/*4371*/ uint16(xMatch),
+	/*4372*/ uint16(xCondPrefix), 2,
+	0x66, 4384,
+	0x0, 4378,
+	/*4378*/ uint16(xSetOp), uint16(PUNPCKLBW),
+	/*4380*/ uint16(xReadSlashR),
+	/*4381*/ uint16(xArgMm),
+	/*4382*/ uint16(xArgMmM32),
+	/*4383*/ uint16(xMatch),
+	/*4384*/ uint16(xSetOp), uint16(PUNPCKLBW),
+	/*4386*/ uint16(xReadSlashR),
+	/*4387*/ uint16(xArgXmm1),
+	/*4388*/ uint16(xArgXmm2M128),
+	/*4389*/ uint16(xMatch),
+	/*4390*/ uint16(xCondPrefix), 2,
+	0x66, 4402,
+	0x0, 4396,
+	/*4396*/ uint16(xSetOp), uint16(PUNPCKLWD),
+	/*4398*/ uint16(xReadSlashR),
+	/*4399*/ uint16(xArgMm),
+	/*4400*/ uint16(xArgMmM32),
+	/*4401*/ uint16(xMatch),
+	/*4402*/ uint16(xSetOp), uint16(PUNPCKLWD),
+	/*4404*/ uint16(xReadSlashR),
+	/*4405*/ uint16(xArgXmm1),
+	/*4406*/ uint16(xArgXmm2M128),
+	/*4407*/ uint16(xMatch),
+	/*4408*/ uint16(xCondPrefix), 2,
+	0x66, 4420,
+	0x0, 4414,
+	/*4414*/ uint16(xSetOp), uint16(PUNPCKLDQ),
+	/*4416*/ uint16(xReadSlashR),
+	/*4417*/ uint16(xArgMm),
+	/*4418*/ uint16(xArgMmM32),
+	/*4419*/ uint16(xMatch),
+	/*4420*/ uint16(xSetOp), uint16(PUNPCKLDQ),
+	/*4422*/ uint16(xReadSlashR),
+	/*4423*/ uint16(xArgXmm1),
+	/*4424*/ uint16(xArgXmm2M128),
+	/*4425*/ uint16(xMatch),
+	/*4426*/ uint16(xCondPrefix), 2,
+	0x66, 4438,
+	0x0, 4432,
+	/*4432*/ uint16(xSetOp), uint16(PACKSSWB),
+	/*4434*/ uint16(xReadSlashR),
+	/*4435*/ uint16(xArgMm1),
+	/*4436*/ uint16(xArgMm2M64),
+	/*4437*/ uint16(xMatch),
+	/*4438*/ uint16(xSetOp), uint16(PACKSSWB),
+	/*4440*/ uint16(xReadSlashR),
+	/*4441*/ uint16(xArgXmm1),
+	/*4442*/ uint16(xArgXmm2M128),
+	/*4443*/ uint16(xMatch),
+	/*4444*/ uint16(xCondPrefix), 2,
+	0x66, 4456,
+	0x0, 4450,
+	/*4450*/ uint16(xSetOp), uint16(PCMPGTB),
+	/*4452*/ uint16(xReadSlashR),
+	/*4453*/ uint16(xArgMm),
+	/*4454*/ uint16(xArgMmM64),
+	/*4455*/ uint16(xMatch),
+	/*4456*/ uint16(xSetOp), uint16(PCMPGTB),
+	/*4458*/ uint16(xReadSlashR),
+	/*4459*/ uint16(xArgXmm1),
+	/*4460*/ uint16(xArgXmm2M128),
+	/*4461*/ uint16(xMatch),
+	/*4462*/ uint16(xCondPrefix), 2,
+	0x66, 4474,
+	0x0, 4468,
+	/*4468*/ uint16(xSetOp), uint16(PCMPGTW),
+	/*4470*/ uint16(xReadSlashR),
+	/*4471*/ uint16(xArgMm),
+	/*4472*/ uint16(xArgMmM64),
+	/*4473*/ uint16(xMatch),
+	/*4474*/ uint16(xSetOp), uint16(PCMPGTW),
+	/*4476*/ uint16(xReadSlashR),
+	/*4477*/ uint16(xArgXmm1),
+	/*4478*/ uint16(xArgXmm2M128),
+	/*4479*/ uint16(xMatch),
+	/*4480*/ uint16(xCondPrefix), 2,
+	0x66, 4492,
+	0x0, 4486,
+	/*4486*/ uint16(xSetOp), uint16(PCMPGTD),
+	/*4488*/ uint16(xReadSlashR),
+	/*4489*/ uint16(xArgMm),
+	/*4490*/ uint16(xArgMmM64),
+	/*4491*/ uint16(xMatch),
+	/*4492*/ uint16(xSetOp), uint16(PCMPGTD),
+	/*4494*/ uint16(xReadSlashR),
+	/*4495*/ uint16(xArgXmm1),
+	/*4496*/ uint16(xArgXmm2M128),
+	/*4497*/ uint16(xMatch),
+	/*4498*/ uint16(xCondPrefix), 2,
+	0x66, 4510,
+	0x0, 4504,
+	/*4504*/ uint16(xSetOp), uint16(PACKUSWB),
+	/*4506*/ uint16(xReadSlashR),
+	/*4507*/ uint16(xArgMm),
+	/*4508*/ uint16(xArgMmM64),
+	/*4509*/ uint16(xMatch),
+	/*4510*/ uint16(xSetOp), uint16(PACKUSWB),
+	/*4512*/ uint16(xReadSlashR),
+	/*4513*/ uint16(xArgXmm1),
+	/*4514*/ uint16(xArgXmm2M128),
+	/*4515*/ uint16(xMatch),
+	/*4516*/ uint16(xCondPrefix), 2,
+	0x66, 4528,
+	0x0, 4522,
+	/*4522*/ uint16(xSetOp), uint16(PUNPCKHBW),
+	/*4524*/ uint16(xReadSlashR),
+	/*4525*/ uint16(xArgMm),
+	/*4526*/ uint16(xArgMmM64),
+	/*4527*/ uint16(xMatch),
+	/*4528*/ uint16(xSetOp), uint16(PUNPCKHBW),
+	/*4530*/ uint16(xReadSlashR),
+	/*4531*/ uint16(xArgXmm1),
+	/*4532*/ uint16(xArgXmm2M128),
+	/*4533*/ uint16(xMatch),
+	/*4534*/ uint16(xCondPrefix), 2,
+	0x66, 4546,
+	0x0, 4540,
+	/*4540*/ uint16(xSetOp), uint16(PUNPCKHWD),
+	/*4542*/ uint16(xReadSlashR),
+	/*4543*/ uint16(xArgMm),
+	/*4544*/ uint16(xArgMmM64),
+	/*4545*/ uint16(xMatch),
+	/*4546*/ uint16(xSetOp), uint16(PUNPCKHWD),
+	/*4548*/ uint16(xReadSlashR),
+	/*4549*/ uint16(xArgXmm1),
+	/*4550*/ uint16(xArgXmm2M128),
+	/*4551*/ uint16(xMatch),
+	/*4552*/ uint16(xCondPrefix), 2,
+	0x66, 4564,
+	0x0, 4558,
+	/*4558*/ uint16(xSetOp), uint16(PUNPCKHDQ),
+	/*4560*/ uint16(xReadSlashR),
+	/*4561*/ uint16(xArgMm),
+	/*4562*/ uint16(xArgMmM64),
+	/*4563*/ uint16(xMatch),
+	/*4564*/ uint16(xSetOp), uint16(PUNPCKHDQ),
+	/*4566*/ uint16(xReadSlashR),
+	/*4567*/ uint16(xArgXmm1),
+	/*4568*/ uint16(xArgXmm2M128),
+	/*4569*/ uint16(xMatch),
+	/*4570*/ uint16(xCondPrefix), 2,
+	0x66, 4582,
+	0x0, 4576,
+	/*4576*/ uint16(xSetOp), uint16(PACKSSDW),
+	/*4578*/ uint16(xReadSlashR),
+	/*4579*/ uint16(xArgMm1),
+	/*4580*/ uint16(xArgMm2M64),
+	/*4581*/ uint16(xMatch),
+	/*4582*/ uint16(xSetOp), uint16(PACKSSDW),
+	/*4584*/ uint16(xReadSlashR),
+	/*4585*/ uint16(xArgXmm1),
+	/*4586*/ uint16(xArgXmm2M128),
+	/*4587*/ uint16(xMatch),
+	/*4588*/ uint16(xCondPrefix), 1,
+	0x66, 4592,
+	/*4592*/ uint16(xSetOp), uint16(PUNPCKLQDQ),
+	/*4594*/ uint16(xReadSlashR),
+	/*4595*/ uint16(xArgXmm1),
+	/*4596*/ uint16(xArgXmm2M128),
+	/*4597*/ uint16(xMatch),
+	/*4598*/ uint16(xCondPrefix), 1,
+	0x66, 4602,
+	/*4602*/ uint16(xSetOp), uint16(PUNPCKHQDQ),
+	/*4604*/ uint16(xReadSlashR),
+	/*4605*/ uint16(xArgXmm1),
+	/*4606*/ uint16(xArgXmm2M128),
+	/*4607*/ uint16(xMatch),
+	/*4608*/ uint16(xCondIs64), 4611, 4649,
+	/*4611*/ uint16(xCondPrefix), 2,
+	0x66, 4633,
+	0x0, 4617,
+	/*4617*/ uint16(xCondDataSize), 4621, 4627, 0,
+	/*4621*/ uint16(xSetOp), uint16(MOVD),
+	/*4623*/ uint16(xReadSlashR),
+	/*4624*/ uint16(xArgMm),
+	/*4625*/ uint16(xArgRM32),
+	/*4626*/ uint16(xMatch),
+	/*4627*/ uint16(xSetOp), uint16(MOVD),
+	/*4629*/ uint16(xReadSlashR),
+	/*4630*/ uint16(xArgMm),
+	/*4631*/ uint16(xArgRM32),
+	/*4632*/ uint16(xMatch),
+	/*4633*/ uint16(xCondDataSize), 4637, 4643, 0,
+	/*4637*/ uint16(xSetOp), uint16(MOVD),
+	/*4639*/ uint16(xReadSlashR),
+	/*4640*/ uint16(xArgXmm),
+	/*4641*/ uint16(xArgRM32),
+	/*4642*/ uint16(xMatch),
+	/*4643*/ uint16(xSetOp), uint16(MOVD),
+	/*4645*/ uint16(xReadSlashR),
+	/*4646*/ uint16(xArgXmm),
+	/*4647*/ uint16(xArgRM32),
+	/*4648*/ uint16(xMatch),
+	/*4649*/ uint16(xCondPrefix), 2,
+	0x66, 4665,
+	0x0, 4655,
+	/*4655*/ uint16(xCondDataSize), 4621, 4627, 4659,
+	/*4659*/ uint16(xSetOp), uint16(MOVQ),
+	/*4661*/ uint16(xReadSlashR),
+	/*4662*/ uint16(xArgMm),
+	/*4663*/ uint16(xArgRM64),
+	/*4664*/ uint16(xMatch),
+	/*4665*/ uint16(xCondDataSize), 4637, 4643, 4669,
+	/*4669*/ uint16(xSetOp), uint16(MOVQ),
+	/*4671*/ uint16(xReadSlashR),
+	/*4672*/ uint16(xArgXmm),
+	/*4673*/ uint16(xArgRM64),
+	/*4674*/ uint16(xMatch),
+	/*4675*/ uint16(xCondPrefix), 3,
+	0xF3, 4695,
+	0x66, 4689,
+	0x0, 4683,
+	/*4683*/ uint16(xSetOp), uint16(MOVQ),
+	/*4685*/ uint16(xReadSlashR),
+	/*4686*/ uint16(xArgMm),
+	/*4687*/ uint16(xArgMmM64),
+	/*4688*/ uint16(xMatch),
+	/*4689*/ uint16(xSetOp), uint16(MOVDQA),
+	/*4691*/ uint16(xReadSlashR),
+	/*4692*/ uint16(xArgXmm1),
+	/*4693*/ uint16(xArgXmm2M128),
+	/*4694*/ uint16(xMatch),
+	/*4695*/ uint16(xSetOp), uint16(MOVDQU),
+	/*4697*/ uint16(xReadSlashR),
+	/*4698*/ uint16(xArgXmm1),
+	/*4699*/ uint16(xArgXmm2M128),
+	/*4700*/ uint16(xMatch),
+	/*4701*/ uint16(xCondPrefix), 4,
+	0xF3, 4735,
+	0xF2, 4727,
+	0x66, 4719,
+	0x0, 4711,
+	/*4711*/ uint16(xSetOp), uint16(PSHUFW),
+	/*4713*/ uint16(xReadSlashR),
+	/*4714*/ uint16(xReadIb),
+	/*4715*/ uint16(xArgMm1),
+	/*4716*/ uint16(xArgMm2M64),
+	/*4717*/ uint16(xArgImm8u),
+	/*4718*/ uint16(xMatch),
+	/*4719*/ uint16(xSetOp), uint16(PSHUFD),
+	/*4721*/ uint16(xReadSlashR),
+	/*4722*/ uint16(xReadIb),
+	/*4723*/ uint16(xArgXmm1),
+	/*4724*/ uint16(xArgXmm2M128),
+	/*4725*/ uint16(xArgImm8u),
+	/*4726*/ uint16(xMatch),
+	/*4727*/ uint16(xSetOp), uint16(PSHUFLW),
+	/*4729*/ uint16(xReadSlashR),
+	/*4730*/ uint16(xReadIb),
+	/*4731*/ uint16(xArgXmm1),
+	/*4732*/ uint16(xArgXmm2M128),
+	/*4733*/ uint16(xArgImm8u),
+	/*4734*/ uint16(xMatch),
+	/*4735*/ uint16(xSetOp), uint16(PSHUFHW),
+	/*4737*/ uint16(xReadSlashR),
+	/*4738*/ uint16(xReadIb),
+	/*4739*/ uint16(xArgXmm1),
+	/*4740*/ uint16(xArgXmm2M128),
+	/*4741*/ uint16(xArgImm8u),
+	/*4742*/ uint16(xMatch),
+	/*4743*/ uint16(xCondSlashR),
+	0,    // 0
+	0,    // 1
+	4752, // 2
+	0,    // 3
+	4770, // 4
+	0,    // 5
+	4788, // 6
+	0,    // 7
+	/*4752*/ uint16(xCondPrefix), 2,
+	0x66, 4764,
+	0x0, 4758,
+	/*4758*/ uint16(xSetOp), uint16(PSRLW),
+	/*4760*/ uint16(xReadIb),
+	/*4761*/ uint16(xArgMm2),
+	/*4762*/ uint16(xArgImm8u),
+	/*4763*/ uint16(xMatch),
+	/*4764*/ uint16(xSetOp), uint16(PSRLW),
+	/*4766*/ uint16(xReadIb),
+	/*4767*/ uint16(xArgXmm2),
+	/*4768*/ uint16(xArgImm8u),
+	/*4769*/ uint16(xMatch),
+	/*4770*/ uint16(xCondPrefix), 2,
+	0x66, 4782,
+	0x0, 4776,
+	/*4776*/ uint16(xSetOp), uint16(PSRAW),
+	/*4778*/ uint16(xReadIb),
+	/*4779*/ uint16(xArgMm2),
+	/*4780*/ uint16(xArgImm8u),
+	/*4781*/ uint16(xMatch),
+	/*4782*/ uint16(xSetOp), uint16(PSRAW),
+	/*4784*/ uint16(xReadIb),
+	/*4785*/ uint16(xArgXmm2),
+	/*4786*/ uint16(xArgImm8u),
+	/*4787*/ uint16(xMatch),
+	/*4788*/ uint16(xCondPrefix), 2,
+	0x66, 4800,
+	0x0, 4794,
+	/*4794*/ uint16(xSetOp), uint16(PSLLW),
+	/*4796*/ uint16(xReadIb),
+	/*4797*/ uint16(xArgMm2),
+	/*4798*/ uint16(xArgImm8u),
+	/*4799*/ uint16(xMatch),
+	/*4800*/ uint16(xSetOp), uint16(PSLLW),
+	/*4802*/ uint16(xReadIb),
+	/*4803*/ uint16(xArgXmm2),
+	/*4804*/ uint16(xArgImm8u),
+	/*4805*/ uint16(xMatch),
+	/*4806*/ uint16(xCondSlashR),
+	0,    // 0
+	0,    // 1
+	4815, // 2
+	0,    // 3
+	4833, // 4
+	0,    // 5
+	4851, // 6
+	0,    // 7
+	/*4815*/ uint16(xCondPrefix), 2,
+	0x66, 4827,
+	0x0, 4821,
+	/*4821*/ uint16(xSetOp), uint16(PSRLD),
+	/*4823*/ uint16(xReadIb),
+	/*4824*/ uint16(xArgMm2),
+	/*4825*/ uint16(xArgImm8u),
+	/*4826*/ uint16(xMatch),
+	/*4827*/ uint16(xSetOp), uint16(PSRLD),
+	/*4829*/ uint16(xReadIb),
+	/*4830*/ uint16(xArgXmm2),
+	/*4831*/ uint16(xArgImm8u),
+	/*4832*/ uint16(xMatch),
+	/*4833*/ uint16(xCondPrefix), 2,
+	0x66, 4845,
+	0x0, 4839,
+	/*4839*/ uint16(xSetOp), uint16(PSRAD),
+	/*4841*/ uint16(xReadIb),
+	/*4842*/ uint16(xArgMm2),
+	/*4843*/ uint16(xArgImm8u),
+	/*4844*/ uint16(xMatch),
+	/*4845*/ uint16(xSetOp), uint16(PSRAD),
+	/*4847*/ uint16(xReadIb),
+	/*4848*/ uint16(xArgXmm2),
+	/*4849*/ uint16(xArgImm8u),
+	/*4850*/ uint16(xMatch),
+	/*4851*/ uint16(xCondPrefix), 2,
+	0x66, 4863,
+	0x0, 4857,
+	/*4857*/ uint16(xSetOp), uint16(PSLLD),
+	/*4859*/ uint16(xReadIb),
+	/*4860*/ uint16(xArgMm2),
+	/*4861*/ uint16(xArgImm8u),
+	/*4862*/ uint16(xMatch),
+	/*4863*/ uint16(xSetOp), uint16(PSLLD),
+	/*4865*/ uint16(xReadIb),
+	/*4866*/ uint16(xArgXmm2),
+	/*4867*/ uint16(xArgImm8u),
+	/*4868*/ uint16(xMatch),
+	/*4869*/ uint16(xCondSlashR),
+	0,    // 0
+	0,    // 1
+	4878, // 2
+	4896, // 3
+	0,    // 4
+	0,    // 5
+	4906, // 6
+	4924, // 7
+	/*4878*/ uint16(xCondPrefix), 2,
+	0x66, 4890,
+	0x0, 4884,
+	/*4884*/ uint16(xSetOp), uint16(PSRLQ),
+	/*4886*/ uint16(xReadIb),
+	/*4887*/ uint16(xArgMm2),
+	/*4888*/ uint16(xArgImm8u),
+	/*4889*/ uint16(xMatch),
+	/*4890*/ uint16(xSetOp), uint16(PSRLQ),
+	/*4892*/ uint16(xReadIb),
+	/*4893*/ uint16(xArgXmm2),
+	/*4894*/ uint16(xArgImm8u),
+	/*4895*/ uint16(xMatch),
+	/*4896*/ uint16(xCondPrefix), 1,
+	0x66, 4900,
+	/*4900*/ uint16(xSetOp), uint16(PSRLDQ),
+	/*4902*/ uint16(xReadIb),
+	/*4903*/ uint16(xArgXmm2),
+	/*4904*/ uint16(xArgImm8u),
+	/*4905*/ uint16(xMatch),
+	/*4906*/ uint16(xCondPrefix), 2,
+	0x66, 4918,
+	0x0, 4912,
+	/*4912*/ uint16(xSetOp), uint16(PSLLQ),
+	/*4914*/ uint16(xReadIb),
+	/*4915*/ uint16(xArgMm2),
+	/*4916*/ uint16(xArgImm8u),
+	/*4917*/ uint16(xMatch),
+	/*4918*/ uint16(xSetOp), uint16(PSLLQ),
+	/*4920*/ uint16(xReadIb),
+	/*4921*/ uint16(xArgXmm2),
+	/*4922*/ uint16(xArgImm8u),
+	/*4923*/ uint16(xMatch),
+	/*4924*/ uint16(xCondPrefix), 1,
+	0x66, 4928,
+	/*4928*/ uint16(xSetOp), uint16(PSLLDQ),
+	/*4930*/ uint16(xReadIb),
+	/*4931*/ uint16(xArgXmm2),
+	/*4932*/ uint16(xArgImm8u),
+	/*4933*/ uint16(xMatch),
+	/*4934*/ uint16(xCondPrefix), 2,
+	0x66, 4946,
+	0x0, 4940,
+	/*4940*/ uint16(xSetOp), uint16(PCMPEQB),
+	/*4942*/ uint16(xReadSlashR),
+	/*4943*/ uint16(xArgMm),
+	/*4944*/ uint16(xArgMmM64),
+	/*4945*/ uint16(xMatch),
+	/*4946*/ uint16(xSetOp), uint16(PCMPEQB),
+	/*4948*/ uint16(xReadSlashR),
+	/*4949*/ uint16(xArgXmm1),
+	/*4950*/ uint16(xArgXmm2M128),
+	/*4951*/ uint16(xMatch),
+	/*4952*/ uint16(xCondPrefix), 2,
+	0x66, 4964,
+	0x0, 4958,
+	/*4958*/ uint16(xSetOp), uint16(PCMPEQW),
+	/*4960*/ uint16(xReadSlashR),
+	/*4961*/ uint16(xArgMm),
+	/*4962*/ uint16(xArgMmM64),
+	/*4963*/ uint16(xMatch),
+	/*4964*/ uint16(xSetOp), uint16(PCMPEQW),
+	/*4966*/ uint16(xReadSlashR),
+	/*4967*/ uint16(xArgXmm1),
+	/*4968*/ uint16(xArgXmm2M128),
+	/*4969*/ uint16(xMatch),
+	/*4970*/ uint16(xCondPrefix), 2,
+	0x66, 4982,
+	0x0, 4976,
+	/*4976*/ uint16(xSetOp), uint16(PCMPEQD),
+	/*4978*/ uint16(xReadSlashR),
+	/*4979*/ uint16(xArgMm),
+	/*4980*/ uint16(xArgMmM64),
+	/*4981*/ uint16(xMatch),
+	/*4982*/ uint16(xSetOp), uint16(PCMPEQD),
+	/*4984*/ uint16(xReadSlashR),
+	/*4985*/ uint16(xArgXmm1),
+	/*4986*/ uint16(xArgXmm2M128),
+	/*4987*/ uint16(xMatch),
+	/*4988*/ uint16(xSetOp), uint16(EMMS),
+	/*4990*/ uint16(xMatch),
+	/*4991*/ uint16(xCondPrefix), 2,
+	0xF2, 5003,
+	0x66, 4997,
+	/*4997*/ uint16(xSetOp), uint16(HADDPD),
+	/*4999*/ uint16(xReadSlashR),
+	/*5000*/ uint16(xArgXmm1),
+	/*5001*/ uint16(xArgXmm2M128),
+	/*5002*/ uint16(xMatch),
+	/*5003*/ uint16(xSetOp), uint16(HADDPS),
+	/*5005*/ uint16(xReadSlashR),
+	/*5006*/ uint16(xArgXmm1),
+	/*5007*/ uint16(xArgXmm2M128),
+	/*5008*/ uint16(xMatch),
+	/*5009*/ uint16(xCondPrefix), 2,
+	0xF2, 5021,
+	0x66, 5015,
+	/*5015*/ uint16(xSetOp), uint16(HSUBPD),
+	/*5017*/ uint16(xReadSlashR),
+	/*5018*/ uint16(xArgXmm1),
+	/*5019*/ uint16(xArgXmm2M128),
+	/*5020*/ uint16(xMatch),
+	/*5021*/ uint16(xSetOp), uint16(HSUBPS),
+	/*5023*/ uint16(xReadSlashR),
+	/*5024*/ uint16(xArgXmm1),
+	/*5025*/ uint16(xArgXmm2M128),
+	/*5026*/ uint16(xMatch),
+	/*5027*/ uint16(xCondIs64), 5030, 5076,
+	/*5030*/ uint16(xCondPrefix), 3,
+	0xF3, 5070,
+	0x66, 5054,
+	0x0, 5038,
+	/*5038*/ uint16(xCondDataSize), 5042, 5048, 0,
+	/*5042*/ uint16(xSetOp), uint16(MOVD),
+	/*5044*/ uint16(xReadSlashR),
+	/*5045*/ uint16(xArgRM32),
+	/*5046*/ uint16(xArgMm),
+	/*5047*/ uint16(xMatch),
+	/*5048*/ uint16(xSetOp), uint16(MOVD),
+	/*5050*/ uint16(xReadSlashR),
+	/*5051*/ uint16(xArgRM32),
+	/*5052*/ uint16(xArgMm),
+	/*5053*/ uint16(xMatch),
+	/*5054*/ uint16(xCondDataSize), 5058, 5064, 0,
+	/*5058*/ uint16(xSetOp), uint16(MOVD),
+	/*5060*/ uint16(xReadSlashR),
+	/*5061*/ uint16(xArgRM32),
+	/*5062*/ uint16(xArgXmm),
+	/*5063*/ uint16(xMatch),
+	/*5064*/ uint16(xSetOp), uint16(MOVD),
+	/*5066*/ uint16(xReadSlashR),
+	/*5067*/ uint16(xArgRM32),
+	/*5068*/ uint16(xArgXmm),
+	/*5069*/ uint16(xMatch),
+	/*5070*/ uint16(xSetOp), uint16(MOVQ),
+	/*5072*/ uint16(xReadSlashR),
+	/*5073*/ uint16(xArgXmm1),
+	/*5074*/ uint16(xArgXmm2M64),
+	/*5075*/ uint16(xMatch),
+	/*5076*/ uint16(xCondPrefix), 3,
+	0xF3, 5070,
+	0x66, 5094,
+	0x0, 5084,
+	/*5084*/ uint16(xCondDataSize), 5042, 5048, 5088,
+	/*5088*/ uint16(xSetOp), uint16(MOVQ),
+	/*5090*/ uint16(xReadSlashR),
+	/*5091*/ uint16(xArgRM64),
+	/*5092*/ uint16(xArgMm),
+	/*5093*/ uint16(xMatch),
+	/*5094*/ uint16(xCondDataSize), 5058, 5064, 5098,
+	/*5098*/ uint16(xSetOp), uint16(MOVQ),
+	/*5100*/ uint16(xReadSlashR),
+	/*5101*/ uint16(xArgRM64),
+	/*5102*/ uint16(xArgXmm),
+	/*5103*/ uint16(xMatch),
+	/*5104*/ uint16(xCondPrefix), 3,
+	0xF3, 5124,
+	0x66, 5118,
+	0x0, 5112,
+	/*5112*/ uint16(xSetOp), uint16(MOVQ),
+	/*5114*/ uint16(xReadSlashR),
+	/*5115*/ uint16(xArgMmM64),
+	/*5116*/ uint16(xArgMm),
+	/*5117*/ uint16(xMatch),
+	/*5118*/ uint16(xSetOp), uint16(MOVDQA),
+	/*5120*/ uint16(xReadSlashR),
+	/*5121*/ uint16(xArgXmm2M128),
+	/*5122*/ uint16(xArgXmm1),
+	/*5123*/ uint16(xMatch),
+	/*5124*/ uint16(xSetOp), uint16(MOVDQU),
+	/*5126*/ uint16(xReadSlashR),
+	/*5127*/ uint16(xArgXmm2M128),
+	/*5128*/ uint16(xArgXmm1),
+	/*5129*/ uint16(xMatch),
+	/*5130*/ uint16(xCondIs64), 5133, 5147,
+	/*5133*/ uint16(xCondDataSize), 5137, 5142, 0,
+	/*5137*/ uint16(xSetOp), uint16(JO),
+	/*5139*/ uint16(xReadCw),
+	/*5140*/ uint16(xArgRel16),
+	/*5141*/ uint16(xMatch),
+	/*5142*/ uint16(xSetOp), uint16(JO),
+	/*5144*/ uint16(xReadCd),
+	/*5145*/ uint16(xArgRel32),
+	/*5146*/ uint16(xMatch),
+	/*5147*/ uint16(xCondDataSize), 5151, 5142, 5156,
+	/*5151*/ uint16(xSetOp), uint16(JO),
+	/*5153*/ uint16(xReadCd),
+	/*5154*/ uint16(xArgRel32),
+	/*5155*/ uint16(xMatch),
+	/*5156*/ uint16(xSetOp), uint16(JO),
+	/*5158*/ uint16(xReadCd),
+	/*5159*/ uint16(xArgRel32),
+	/*5160*/ uint16(xMatch),
+	/*5161*/ uint16(xCondIs64), 5164, 5178,
+	/*5164*/ uint16(xCondDataSize), 5168, 5173, 0,
+	/*5168*/ uint16(xSetOp), uint16(JNO),
+	/*5170*/ uint16(xReadCw),
+	/*5171*/ uint16(xArgRel16),
+	/*5172*/ uint16(xMatch),
+	/*5173*/ uint16(xSetOp), uint16(JNO),
+	/*5175*/ uint16(xReadCd),
+	/*5176*/ uint16(xArgRel32),
+	/*5177*/ uint16(xMatch),
+	/*5178*/ uint16(xCondDataSize), 5182, 5173, 5187,
+	/*5182*/ uint16(xSetOp), uint16(JNO),
+	/*5184*/ uint16(xReadCd),
+	/*5185*/ uint16(xArgRel32),
+	/*5186*/ uint16(xMatch),
+	/*5187*/ uint16(xSetOp), uint16(JNO),
+	/*5189*/ uint16(xReadCd),
+	/*5190*/ uint16(xArgRel32),
+	/*5191*/ uint16(xMatch),
+	/*5192*/ uint16(xCondIs64), 5195, 5209,
+	/*5195*/ uint16(xCondDataSize), 5199, 5204, 0,
+	/*5199*/ uint16(xSetOp), uint16(JB),
+	/*5201*/ uint16(xReadCw),
+	/*5202*/ uint16(xArgRel16),
+	/*5203*/ uint16(xMatch),
+	/*5204*/ uint16(xSetOp), uint16(JB),
+	/*5206*/ uint16(xReadCd),
+	/*5207*/ uint16(xArgRel32),
+	/*5208*/ uint16(xMatch),
+	/*5209*/ uint16(xCondDataSize), 5213, 5204, 5218,
+	/*5213*/ uint16(xSetOp), uint16(JB),
+	/*5215*/ uint16(xReadCd),
+	/*5216*/ uint16(xArgRel32),
+	/*5217*/ uint16(xMatch),
+	/*5218*/ uint16(xSetOp), uint16(JB),
+	/*5220*/ uint16(xReadCd),
+	/*5221*/ uint16(xArgRel32),
+	/*5222*/ uint16(xMatch),
+	/*5223*/ uint16(xCondIs64), 5226, 5240,
+	/*5226*/ uint16(xCondDataSize), 5230, 5235, 0,
+	/*5230*/ uint16(xSetOp), uint16(JAE),
+	/*5232*/ uint16(xReadCw),
+	/*5233*/ uint16(xArgRel16),
+	/*5234*/ uint16(xMatch),
+	/*5235*/ uint16(xSetOp), uint16(JAE),
+	/*5237*/ uint16(xReadCd),
+	/*5238*/ uint16(xArgRel32),
+	/*5239*/ uint16(xMatch),
+	/*5240*/ uint16(xCondDataSize), 5244, 5235, 5249,
+	/*5244*/ uint16(xSetOp), uint16(JAE),
+	/*5246*/ uint16(xReadCd),
+	/*5247*/ uint16(xArgRel32),
+	/*5248*/ uint16(xMatch),
+	/*5249*/ uint16(xSetOp), uint16(JAE),
+	/*5251*/ uint16(xReadCd),
+	/*5252*/ uint16(xArgRel32),
+	/*5253*/ uint16(xMatch),
+	/*5254*/ uint16(xCondIs64), 5257, 5271,
+	/*5257*/ uint16(xCondDataSize), 5261, 5266, 0,
+	/*5261*/ uint16(xSetOp), uint16(JE),
+	/*5263*/ uint16(xReadCw),
+	/*5264*/ uint16(xArgRel16),
+	/*5265*/ uint16(xMatch),
+	/*5266*/ uint16(xSetOp), uint16(JE),
+	/*5268*/ uint16(xReadCd),
+	/*5269*/ uint16(xArgRel32),
+	/*5270*/ uint16(xMatch),
+	/*5271*/ uint16(xCondDataSize), 5275, 5266, 5280,
+	/*5275*/ uint16(xSetOp), uint16(JE),
+	/*5277*/ uint16(xReadCd),
+	/*5278*/ uint16(xArgRel32),
+	/*5279*/ uint16(xMatch),
+	/*5280*/ uint16(xSetOp), uint16(JE),
+	/*5282*/ uint16(xReadCd),
+	/*5283*/ uint16(xArgRel32),
+	/*5284*/ uint16(xMatch),
+	/*5285*/ uint16(xCondIs64), 5288, 5302,
+	/*5288*/ uint16(xCondDataSize), 5292, 5297, 0,
+	/*5292*/ uint16(xSetOp), uint16(JNE),
+	/*5294*/ uint16(xReadCw),
+	/*5295*/ uint16(xArgRel16),
+	/*5296*/ uint16(xMatch),
+	/*5297*/ uint16(xSetOp), uint16(JNE),
+	/*5299*/ uint16(xReadCd),
+	/*5300*/ uint16(xArgRel32),
+	/*5301*/ uint16(xMatch),
+	/*5302*/ uint16(xCondDataSize), 5306, 5297, 5311,
+	/*5306*/ uint16(xSetOp), uint16(JNE),
+	/*5308*/ uint16(xReadCd),
+	/*5309*/ uint16(xArgRel32),
+	/*5310*/ uint16(xMatch),
+	/*5311*/ uint16(xSetOp), uint16(JNE),
+	/*5313*/ uint16(xReadCd),
+	/*5314*/ uint16(xArgRel32),
+	/*5315*/ uint16(xMatch),
+	/*5316*/ uint16(xCondIs64), 5319, 5333,
+	/*5319*/ uint16(xCondDataSize), 5323, 5328, 0,
+	/*5323*/ uint16(xSetOp), uint16(JBE),
+	/*5325*/ uint16(xReadCw),
+	/*5326*/ uint16(xArgRel16),
+	/*5327*/ uint16(xMatch),
+	/*5328*/ uint16(xSetOp), uint16(JBE),
+	/*5330*/ uint16(xReadCd),
+	/*5331*/ uint16(xArgRel32),
+	/*5332*/ uint16(xMatch),
+	/*5333*/ uint16(xCondDataSize), 5337, 5328, 5342,
+	/*5337*/ uint16(xSetOp), uint16(JBE),
+	/*5339*/ uint16(xReadCd),
+	/*5340*/ uint16(xArgRel32),
+	/*5341*/ uint16(xMatch),
+	/*5342*/ uint16(xSetOp), uint16(JBE),
+	/*5344*/ uint16(xReadCd),
+	/*5345*/ uint16(xArgRel32),
+	/*5346*/ uint16(xMatch),
+	/*5347*/ uint16(xCondIs64), 5350, 5364,
+	/*5350*/ uint16(xCondDataSize), 5354, 5359, 0,
+	/*5354*/ uint16(xSetOp), uint16(JA),
+	/*5356*/ uint16(xReadCw),
+	/*5357*/ uint16(xArgRel16),
+	/*5358*/ uint16(xMatch),
+	/*5359*/ uint16(xSetOp), uint16(JA),
+	/*5361*/ uint16(xReadCd),
+	/*5362*/ uint16(xArgRel32),
+	/*5363*/ uint16(xMatch),
+	/*5364*/ uint16(xCondDataSize), 5368, 5359, 5373,
+	/*5368*/ uint16(xSetOp), uint16(JA),
+	/*5370*/ uint16(xReadCd),
+	/*5371*/ uint16(xArgRel32),
+	/*5372*/ uint16(xMatch),
+	/*5373*/ uint16(xSetOp), uint16(JA),
+	/*5375*/ uint16(xReadCd),
+	/*5376*/ uint16(xArgRel32),
+	/*5377*/ uint16(xMatch),
+	/*5378*/ uint16(xCondIs64), 5381, 5395,
+	/*5381*/ uint16(xCondDataSize), 5385, 5390, 0,
+	/*5385*/ uint16(xSetOp), uint16(JS),
+	/*5387*/ uint16(xReadCw),
+	/*5388*/ uint16(xArgRel16),
+	/*5389*/ uint16(xMatch),
+	/*5390*/ uint16(xSetOp), uint16(JS),
+	/*5392*/ uint16(xReadCd),
+	/*5393*/ uint16(xArgRel32),
+	/*5394*/ uint16(xMatch),
+	/*5395*/ uint16(xCondDataSize), 5399, 5390, 5404,
+	/*5399*/ uint16(xSetOp), uint16(JS),
+	/*5401*/ uint16(xReadCd),
+	/*5402*/ uint16(xArgRel32),
+	/*5403*/ uint16(xMatch),
+	/*5404*/ uint16(xSetOp), uint16(JS),
+	/*5406*/ uint16(xReadCd),
+	/*5407*/ uint16(xArgRel32),
+	/*5408*/ uint16(xMatch),
+	/*5409*/ uint16(xCondIs64), 5412, 5426,
+	/*5412*/ uint16(xCondDataSize), 5416, 5421, 0,
+	/*5416*/ uint16(xSetOp), uint16(JNS),
+	/*5418*/ uint16(xReadCw),
+	/*5419*/ uint16(xArgRel16),
+	/*5420*/ uint16(xMatch),
+	/*5421*/ uint16(xSetOp), uint16(JNS),
+	/*5423*/ uint16(xReadCd),
+	/*5424*/ uint16(xArgRel32),
+	/*5425*/ uint16(xMatch),
+	/*5426*/ uint16(xCondDataSize), 5430, 5421, 5435,
+	/*5430*/ uint16(xSetOp), uint16(JNS),
+	/*5432*/ uint16(xReadCd),
+	/*5433*/ uint16(xArgRel32),
+	/*5434*/ uint16(xMatch),
+	/*5435*/ uint16(xSetOp), uint16(JNS),
+	/*5437*/ uint16(xReadCd),
+	/*5438*/ uint16(xArgRel32),
+	/*5439*/ uint16(xMatch),
+	/*5440*/ uint16(xCondIs64), 5443, 5457,
+	/*5443*/ uint16(xCondDataSize), 5447, 5452, 0,
+	/*5447*/ uint16(xSetOp), uint16(JP),
+	/*5449*/ uint16(xReadCw),
+	/*5450*/ uint16(xArgRel16),
+	/*5451*/ uint16(xMatch),
+	/*5452*/ uint16(xSetOp), uint16(JP),
+	/*5454*/ uint16(xReadCd),
+	/*5455*/ uint16(xArgRel32),
+	/*5456*/ uint16(xMatch),
+	/*5457*/ uint16(xCondDataSize), 5461, 5452, 5466,
+	/*5461*/ uint16(xSetOp), uint16(JP),
+	/*5463*/ uint16(xReadCd),
+	/*5464*/ uint16(xArgRel32),
+	/*5465*/ uint16(xMatch),
+	/*5466*/ uint16(xSetOp), uint16(JP),
+	/*5468*/ uint16(xReadCd),
+	/*5469*/ uint16(xArgRel32),
+	/*5470*/ uint16(xMatch),
+	/*5471*/ uint16(xCondIs64), 5474, 5488,
+	/*5474*/ uint16(xCondDataSize), 5478, 5483, 0,
+	/*5478*/ uint16(xSetOp), uint16(JNP),
+	/*5480*/ uint16(xReadCw),
+	/*5481*/ uint16(xArgRel16),
+	/*5482*/ uint16(xMatch),
+	/*5483*/ uint16(xSetOp), uint16(JNP),
+	/*5485*/ uint16(xReadCd),
+	/*5486*/ uint16(xArgRel32),
+	/*5487*/ uint16(xMatch),
+	/*5488*/ uint16(xCondDataSize), 5492, 5483, 5497,
+	/*5492*/ uint16(xSetOp), uint16(JNP),
+	/*5494*/ uint16(xReadCd),
+	/*5495*/ uint16(xArgRel32),
+	/*5496*/ uint16(xMatch),
+	/*5497*/ uint16(xSetOp), uint16(JNP),
+	/*5499*/ uint16(xReadCd),
+	/*5500*/ uint16(xArgRel32),
+	/*5501*/ uint16(xMatch),
+	/*5502*/ uint16(xCondIs64), 5505, 5519,
+	/*5505*/ uint16(xCondDataSize), 5509, 5514, 0,
+	/*5509*/ uint16(xSetOp), uint16(JL),
+	/*5511*/ uint16(xReadCw),
+	/*5512*/ uint16(xArgRel16),
+	/*5513*/ uint16(xMatch),
+	/*5514*/ uint16(xSetOp), uint16(JL),
+	/*5516*/ uint16(xReadCd),
+	/*5517*/ uint16(xArgRel32),
+	/*5518*/ uint16(xMatch),
+	/*5519*/ uint16(xCondDataSize), 5523, 5514, 5528,
+	/*5523*/ uint16(xSetOp), uint16(JL),
+	/*5525*/ uint16(xReadCd),
+	/*5526*/ uint16(xArgRel32),
+	/*5527*/ uint16(xMatch),
+	/*5528*/ uint16(xSetOp), uint16(JL),
+	/*5530*/ uint16(xReadCd),
+	/*5531*/ uint16(xArgRel32),
+	/*5532*/ uint16(xMatch),
+	/*5533*/ uint16(xCondIs64), 5536, 5550,
+	/*5536*/ uint16(xCondDataSize), 5540, 5545, 0,
+	/*5540*/ uint16(xSetOp), uint16(JGE),
+	/*5542*/ uint16(xReadCw),
+	/*5543*/ uint16(xArgRel16),
+	/*5544*/ uint16(xMatch),
+	/*5545*/ uint16(xSetOp), uint16(JGE),
+	/*5547*/ uint16(xReadCd),
+	/*5548*/ uint16(xArgRel32),
+	/*5549*/ uint16(xMatch),
+	/*5550*/ uint16(xCondDataSize), 5554, 5545, 5559,
+	/*5554*/ uint16(xSetOp), uint16(JGE),
+	/*5556*/ uint16(xReadCd),
+	/*5557*/ uint16(xArgRel32),
+	/*5558*/ uint16(xMatch),
+	/*5559*/ uint16(xSetOp), uint16(JGE),
+	/*5561*/ uint16(xReadCd),
+	/*5562*/ uint16(xArgRel32),
+	/*5563*/ uint16(xMatch),
+	/*5564*/ uint16(xCondIs64), 5567, 5581,
+	/*5567*/ uint16(xCondDataSize), 5571, 5576, 0,
+	/*5571*/ uint16(xSetOp), uint16(JLE),
+	/*5573*/ uint16(xReadCw),
+	/*5574*/ uint16(xArgRel16),
+	/*5575*/ uint16(xMatch),
+	/*5576*/ uint16(xSetOp), uint16(JLE),
+	/*5578*/ uint16(xReadCd),
+	/*5579*/ uint16(xArgRel32),
+	/*5580*/ uint16(xMatch),
+	/*5581*/ uint16(xCondDataSize), 5585, 5576, 5590,
+	/*5585*/ uint16(xSetOp), uint16(JLE),
+	/*5587*/ uint16(xReadCd),
+	/*5588*/ uint16(xArgRel32),
+	/*5589*/ uint16(xMatch),
+	/*5590*/ uint16(xSetOp), uint16(JLE),
+	/*5592*/ uint16(xReadCd),
+	/*5593*/ uint16(xArgRel32),
+	/*5594*/ uint16(xMatch),
+	/*5595*/ uint16(xCondIs64), 5598, 5612,
+	/*5598*/ uint16(xCondDataSize), 5602, 5607, 0,
+	/*5602*/ uint16(xSetOp), uint16(JG),
+	/*5604*/ uint16(xReadCw),
+	/*5605*/ uint16(xArgRel16),
+	/*5606*/ uint16(xMatch),
+	/*5607*/ uint16(xSetOp), uint16(JG),
+	/*5609*/ uint16(xReadCd),
+	/*5610*/ uint16(xArgRel32),
+	/*5611*/ uint16(xMatch),
+	/*5612*/ uint16(xCondDataSize), 5616, 5607, 5621,
+	/*5616*/ uint16(xSetOp), uint16(JG),
+	/*5618*/ uint16(xReadCd),
+	/*5619*/ uint16(xArgRel32),
+	/*5620*/ uint16(xMatch),
+	/*5621*/ uint16(xSetOp), uint16(JG),
+	/*5623*/ uint16(xReadCd),
+	/*5624*/ uint16(xArgRel32),
+	/*5625*/ uint16(xMatch),
+	/*5626*/ uint16(xSetOp), uint16(SETO),
+	/*5628*/ uint16(xReadSlashR),
+	/*5629*/ uint16(xArgRM8),
+	/*5630*/ uint16(xMatch),
+	/*5631*/ uint16(xSetOp), uint16(SETNO),
+	/*5633*/ uint16(xReadSlashR),
+	/*5634*/ uint16(xArgRM8),
+	/*5635*/ uint16(xMatch),
+	/*5636*/ uint16(xSetOp), uint16(SETB),
+	/*5638*/ uint16(xReadSlashR),
+	/*5639*/ uint16(xArgRM8),
+	/*5640*/ uint16(xMatch),
+	/*5641*/ uint16(xSetOp), uint16(SETAE),
+	/*5643*/ uint16(xReadSlashR),
+	/*5644*/ uint16(xArgRM8),
+	/*5645*/ uint16(xMatch),
+	/*5646*/ uint16(xSetOp), uint16(SETE),
+	/*5648*/ uint16(xReadSlashR),
+	/*5649*/ uint16(xArgRM8),
+	/*5650*/ uint16(xMatch),
+	/*5651*/ uint16(xSetOp), uint16(SETNE),
+	/*5653*/ uint16(xReadSlashR),
+	/*5654*/ uint16(xArgRM8),
+	/*5655*/ uint16(xMatch),
+	/*5656*/ uint16(xSetOp), uint16(SETBE),
+	/*5658*/ uint16(xReadSlashR),
+	/*5659*/ uint16(xArgRM8),
+	/*5660*/ uint16(xMatch),
+	/*5661*/ uint16(xSetOp), uint16(SETA),
+	/*5663*/ uint16(xReadSlashR),
+	/*5664*/ uint16(xArgRM8),
+	/*5665*/ uint16(xMatch),
+	/*5666*/ uint16(xSetOp), uint16(SETS),
+	/*5668*/ uint16(xReadSlashR),
+	/*5669*/ uint16(xArgRM8),
+	/*5670*/ uint16(xMatch),
+	/*5671*/ uint16(xSetOp), uint16(SETNS),
+	/*5673*/ uint16(xReadSlashR),
+	/*5674*/ uint16(xArgRM8),
+	/*5675*/ uint16(xMatch),
+	/*5676*/ uint16(xSetOp), uint16(SETP),
+	/*5678*/ uint16(xReadSlashR),
+	/*5679*/ uint16(xArgRM8),
+	/*5680*/ uint16(xMatch),
+	/*5681*/ uint16(xSetOp), uint16(SETNP),
+	/*5683*/ uint16(xReadSlashR),
+	/*5684*/ uint16(xArgRM8),
+	/*5685*/ uint16(xMatch),
+	/*5686*/ uint16(xSetOp), uint16(SETL),
+	/*5688*/ uint16(xReadSlashR),
+	/*5689*/ uint16(xArgRM8),
+	/*5690*/ uint16(xMatch),
+	/*5691*/ uint16(xSetOp), uint16(SETGE),
+	/*5693*/ uint16(xReadSlashR),
+	/*5694*/ uint16(xArgRM8),
+	/*5695*/ uint16(xMatch),
+	/*5696*/ uint16(xSetOp), uint16(SETLE),
+	/*5698*/ uint16(xReadSlashR),
+	/*5699*/ uint16(xArgRM8),
+	/*5700*/ uint16(xMatch),
+	/*5701*/ uint16(xSetOp), uint16(SETG),
+	/*5703*/ uint16(xReadSlashR),
+	/*5704*/ uint16(xArgRM8),
+	/*5705*/ uint16(xMatch),
+	/*5706*/ uint16(xSetOp), uint16(PUSH),
+	/*5708*/ uint16(xArgFS),
+	/*5709*/ uint16(xMatch),
+	/*5710*/ uint16(xCondIs64), 5713, 5725,
+	/*5713*/ uint16(xCondDataSize), 5717, 5721, 0,
+	/*5717*/ uint16(xSetOp), uint16(POP),
+	/*5719*/ uint16(xArgFS),
+	/*5720*/ uint16(xMatch),
+	/*5721*/ uint16(xSetOp), uint16(POP),
+	/*5723*/ uint16(xArgFS),
+	/*5724*/ uint16(xMatch),
+	/*5725*/ uint16(xCondDataSize), 5717, 5729, 5733,
+	/*5729*/ uint16(xSetOp), uint16(POP),
+	/*5731*/ uint16(xArgFS),
+	/*5732*/ uint16(xMatch),
+	/*5733*/ uint16(xSetOp), uint16(POP),
+	/*5735*/ uint16(xArgFS),
+	/*5736*/ uint16(xMatch),
+	/*5737*/ uint16(xSetOp), uint16(CPUID),
+	/*5739*/ uint16(xMatch),
+	/*5740*/ uint16(xCondIs64), 5743, 5759,
+	/*5743*/ uint16(xCondDataSize), 5747, 5753, 0,
+	/*5747*/ uint16(xSetOp), uint16(BT),
+	/*5749*/ uint16(xReadSlashR),
+	/*5750*/ uint16(xArgRM16),
+	/*5751*/ uint16(xArgR16),
+	/*5752*/ uint16(xMatch),
+	/*5753*/ uint16(xSetOp), uint16(BT),
+	/*5755*/ uint16(xReadSlashR),
+	/*5756*/ uint16(xArgRM32),
+	/*5757*/ uint16(xArgR32),
+	/*5758*/ uint16(xMatch),
+	/*5759*/ uint16(xCondDataSize), 5747, 5753, 5763,
+	/*5763*/ uint16(xSetOp), uint16(BT),
+	/*5765*/ uint16(xReadSlashR),
+	/*5766*/ uint16(xArgRM64),
+	/*5767*/ uint16(xArgR64),
+	/*5768*/ uint16(xMatch),
+	/*5769*/ uint16(xCondIs64), 5772, 5792,
+	/*5772*/ uint16(xCondDataSize), 5776, 5784, 0,
+	/*5776*/ uint16(xSetOp), uint16(SHLD),
+	/*5778*/ uint16(xReadSlashR),
+	/*5779*/ uint16(xReadIb),
+	/*5780*/ uint16(xArgRM16),
+	/*5781*/ uint16(xArgR16),
+	/*5782*/ uint16(xArgImm8u),
+	/*5783*/ uint16(xMatch),
+	/*5784*/ uint16(xSetOp), uint16(SHLD),
+	/*5786*/ uint16(xReadSlashR),
+	/*5787*/ uint16(xReadIb),
+	/*5788*/ uint16(xArgRM32),
+	/*5789*/ uint16(xArgR32),
+	/*5790*/ uint16(xArgImm8u),
+	/*5791*/ uint16(xMatch),
+	/*5792*/ uint16(xCondDataSize), 5776, 5784, 5796,
+	/*5796*/ uint16(xSetOp), uint16(SHLD),
+	/*5798*/ uint16(xReadSlashR),
+	/*5799*/ uint16(xReadIb),
+	/*5800*/ uint16(xArgRM64),
+	/*5801*/ uint16(xArgR64),
+	/*5802*/ uint16(xArgImm8u),
+	/*5803*/ uint16(xMatch),
+	/*5804*/ uint16(xCondIs64), 5807, 5825,
+	/*5807*/ uint16(xCondDataSize), 5811, 5818, 0,
+	/*5811*/ uint16(xSetOp), uint16(SHLD),
+	/*5813*/ uint16(xReadSlashR),
+	/*5814*/ uint16(xArgRM16),
+	/*5815*/ uint16(xArgR16),
+	/*5816*/ uint16(xArgCL),
+	/*5817*/ uint16(xMatch),
+	/*5818*/ uint16(xSetOp), uint16(SHLD),
+	/*5820*/ uint16(xReadSlashR),
+	/*5821*/ uint16(xArgRM32),
+	/*5822*/ uint16(xArgR32),
+	/*5823*/ uint16(xArgCL),
+	/*5824*/ uint16(xMatch),
+	/*5825*/ uint16(xCondDataSize), 5811, 5818, 5829,
+	/*5829*/ uint16(xSetOp), uint16(SHLD),
+	/*5831*/ uint16(xReadSlashR),
+	/*5832*/ uint16(xArgRM64),
+	/*5833*/ uint16(xArgR64),
+	/*5834*/ uint16(xArgCL),
+	/*5835*/ uint16(xMatch),
+	/*5836*/ uint16(xSetOp), uint16(PUSH),
+	/*5838*/ uint16(xArgGS),
+	/*5839*/ uint16(xMatch),
+	/*5840*/ uint16(xCondIs64), 5843, 5855,
+	/*5843*/ uint16(xCondDataSize), 5847, 5851, 0,
+	/*5847*/ uint16(xSetOp), uint16(POP),
+	/*5849*/ uint16(xArgGS),
+	/*5850*/ uint16(xMatch),
+	/*5851*/ uint16(xSetOp), uint16(POP),
+	/*5853*/ uint16(xArgGS),
+	/*5854*/ uint16(xMatch),
+	/*5855*/ uint16(xCondDataSize), 5847, 5859, 5863,
+	/*5859*/ uint16(xSetOp), uint16(POP),
+	/*5861*/ uint16(xArgGS),
+	/*5862*/ uint16(xMatch),
+	/*5863*/ uint16(xSetOp), uint16(POP),
+	/*5865*/ uint16(xArgGS),
+	/*5866*/ uint16(xMatch),
+	/*5867*/ uint16(xSetOp), uint16(RSM),
+	/*5869*/ uint16(xMatch),
+	/*5870*/ uint16(xCondIs64), 5873, 5889,
+	/*5873*/ uint16(xCondDataSize), 5877, 5883, 0,
+	/*5877*/ uint16(xSetOp), uint16(BTS),
+	/*5879*/ uint16(xReadSlashR),
+	/*5880*/ uint16(xArgRM16),
+	/*5881*/ uint16(xArgR16),
+	/*5882*/ uint16(xMatch),
+	/*5883*/ uint16(xSetOp), uint16(BTS),
+	/*5885*/ uint16(xReadSlashR),
+	/*5886*/ uint16(xArgRM32),
+	/*5887*/ uint16(xArgR32),
+	/*5888*/ uint16(xMatch),
+	/*5889*/ uint16(xCondDataSize), 5877, 5883, 5893,
+	/*5893*/ uint16(xSetOp), uint16(BTS),
+	/*5895*/ uint16(xReadSlashR),
+	/*5896*/ uint16(xArgRM64),
+	/*5897*/ uint16(xArgR64),
+	/*5898*/ uint16(xMatch),
+	/*5899*/ uint16(xCondIs64), 5902, 5922,
+	/*5902*/ uint16(xCondDataSize), 5906, 5914, 0,
+	/*5906*/ uint16(xSetOp), uint16(SHRD),
+	/*5908*/ uint16(xReadSlashR),
+	/*5909*/ uint16(xReadIb),
+	/*5910*/ uint16(xArgRM16),
+	/*5911*/ uint16(xArgR16),
+	/*5912*/ uint16(xArgImm8u),
+	/*5913*/ uint16(xMatch),
+	/*5914*/ uint16(xSetOp), uint16(SHRD),
+	/*5916*/ uint16(xReadSlashR),
+	/*5917*/ uint16(xReadIb),
+	/*5918*/ uint16(xArgRM32),
+	/*5919*/ uint16(xArgR32),
+	/*5920*/ uint16(xArgImm8u),
+	/*5921*/ uint16(xMatch),
+	/*5922*/ uint16(xCondDataSize), 5906, 5914, 5926,
+	/*5926*/ uint16(xSetOp), uint16(SHRD),
+	/*5928*/ uint16(xReadSlashR),
+	/*5929*/ uint16(xReadIb),
+	/*5930*/ uint16(xArgRM64),
+	/*5931*/ uint16(xArgR64),
+	/*5932*/ uint16(xArgImm8u),
+	/*5933*/ uint16(xMatch),
+	/*5934*/ uint16(xCondIs64), 5937, 5955,
+	/*5937*/ uint16(xCondDataSize), 5941, 5948, 0,
+	/*5941*/ uint16(xSetOp), uint16(SHRD),
+	/*5943*/ uint16(xReadSlashR),
+	/*5944*/ uint16(xArgRM16),
+	/*5945*/ uint16(xArgR16),
+	/*5946*/ uint16(xArgCL),
+	/*5947*/ uint16(xMatch),
+	/*5948*/ uint16(xSetOp), uint16(SHRD),
+	/*5950*/ uint16(xReadSlashR),
+	/*5951*/ uint16(xArgRM32),
+	/*5952*/ uint16(xArgR32),
+	/*5953*/ uint16(xArgCL),
+	/*5954*/ uint16(xMatch),
+	/*5955*/ uint16(xCondDataSize), 5941, 5948, 5959,
+	/*5959*/ uint16(xSetOp), uint16(SHRD),
+	/*5961*/ uint16(xReadSlashR),
+	/*5962*/ uint16(xArgRM64),
+	/*5963*/ uint16(xArgR64),
+	/*5964*/ uint16(xArgCL),
+	/*5965*/ uint16(xMatch),
+	/*5966*/ uint16(xCondByte), 3,
+	0xE8, 6215,
+	0xF0, 6218,
+	0xF8, 6221,
+	/*5974*/ uint16(xCondSlashR),
+	5983, // 0
+	6037, // 1
+	6091, // 2
+	6120, // 3
+	6149, // 4
+	6172, // 5
+	6195, // 6
+	6211, // 7
+	/*5983*/ uint16(xCondIs64), 5986, 5998,
+	/*5986*/ uint16(xCondDataSize), 5990, 5994, 0,
+	/*5990*/ uint16(xSetOp), uint16(FXSAVE),
+	/*5992*/ uint16(xArgM512byte),
+	/*5993*/ uint16(xMatch),
+	/*5994*/ uint16(xSetOp), uint16(FXSAVE),
+	/*5996*/ uint16(xArgM512byte),
+	/*5997*/ uint16(xMatch),
+	/*5998*/ uint16(xCondPrefix), 2,
+	0xF3, 6012,
+	0x0, 6004,
+	/*6004*/ uint16(xCondDataSize), 5990, 5994, 6008,
+	/*6008*/ uint16(xSetOp), uint16(FXSAVE64),
+	/*6010*/ uint16(xArgM512byte),
+	/*6011*/ uint16(xMatch),
+	/*6012*/ uint16(xCondDataSize), 6016, 6023, 6030,
+	/*6016*/ uint16(xCondIsMem), 6019, 0,
+	/*6019*/ uint16(xSetOp), uint16(RDFSBASE),
+	/*6021*/ uint16(xArgRM32),
+	/*6022*/ uint16(xMatch),
+	/*6023*/ uint16(xCondIsMem), 6026, 0,
+	/*6026*/ uint16(xSetOp), uint16(RDFSBASE),
+	/*6028*/ uint16(xArgRM32),
+	/*6029*/ uint16(xMatch),
+	/*6030*/ uint16(xCondIsMem), 6033, 0,
+	/*6033*/ uint16(xSetOp), uint16(RDFSBASE),
+	/*6035*/ uint16(xArgRM64),
+	/*6036*/ uint16(xMatch),
+	/*6037*/ uint16(xCondIs64), 6040, 6052,
+	/*6040*/ uint16(xCondDataSize), 6044, 6048, 0,
+	/*6044*/ uint16(xSetOp), uint16(FXRSTOR),
+	/*6046*/ uint16(xArgM512byte),
+	/*6047*/ uint16(xMatch),
+	/*6048*/ uint16(xSetOp), uint16(FXRSTOR),
+	/*6050*/ uint16(xArgM512byte),
+	/*6051*/ uint16(xMatch),
+	/*6052*/ uint16(xCondPrefix), 2,
+	0xF3, 6066,
+	0x0, 6058,
+	/*6058*/ uint16(xCondDataSize), 6044, 6048, 6062,
+	/*6062*/ uint16(xSetOp), uint16(FXRSTOR64),
+	/*6064*/ uint16(xArgM512byte),
+	/*6065*/ uint16(xMatch),
+	/*6066*/ uint16(xCondDataSize), 6070, 6077, 6084,
+	/*6070*/ uint16(xCondIsMem), 6073, 0,
+	/*6073*/ uint16(xSetOp), uint16(RDGSBASE),
+	/*6075*/ uint16(xArgRM32),
+	/*6076*/ uint16(xMatch),
+	/*6077*/ uint16(xCondIsMem), 6080, 0,
+	/*6080*/ uint16(xSetOp), uint16(RDGSBASE),
+	/*6082*/ uint16(xArgRM32),
+	/*6083*/ uint16(xMatch),
+	/*6084*/ uint16(xCondIsMem), 6087, 0,
+	/*6087*/ uint16(xSetOp), uint16(RDGSBASE),
+	/*6089*/ uint16(xArgRM64),
+	/*6090*/ uint16(xMatch),
+	/*6091*/ uint16(xCondIs64), 6094, 6098,
+	/*6094*/ uint16(xSetOp), uint16(LDMXCSR),
+	/*6096*/ uint16(xArgM32),
+	/*6097*/ uint16(xMatch),
+	/*6098*/ uint16(xCondPrefix), 2,
+	0xF3, 6104,
+	0x0, 6094,
+	/*6104*/ uint16(xCondDataSize), 6108, 6112, 6116,
+	/*6108*/ uint16(xSetOp), uint16(WRFSBASE),
+	/*6110*/ uint16(xArgRM32),
+	/*6111*/ uint16(xMatch),
+	/*6112*/ uint16(xSetOp), uint16(WRFSBASE),
+	/*6114*/ uint16(xArgRM32),
+	/*6115*/ uint16(xMatch),
+	/*6116*/ uint16(xSetOp), uint16(WRFSBASE),
+	/*6118*/ uint16(xArgRM64),
+	/*6119*/ uint16(xMatch),
+	/*6120*/ uint16(xCondIs64), 6123, 6127,
+	/*6123*/ uint16(xSetOp), uint16(STMXCSR),
+	/*6125*/ uint16(xArgM32),
+	/*6126*/ uint16(xMatch),
+	/*6127*/ uint16(xCondPrefix), 2,
+	0xF3, 6133,
+	0x0, 6123,
+	/*6133*/ uint16(xCondDataSize), 6137, 6141, 6145,
+	/*6137*/ uint16(xSetOp), uint16(WRGSBASE),
+	/*6139*/ uint16(xArgRM32),
+	/*6140*/ uint16(xMatch),
+	/*6141*/ uint16(xSetOp), uint16(WRGSBASE),
+	/*6143*/ uint16(xArgRM32),
+	/*6144*/ uint16(xMatch),
+	/*6145*/ uint16(xSetOp), uint16(WRGSBASE),
+	/*6147*/ uint16(xArgRM64),
+	/*6148*/ uint16(xMatch),
+	/*6149*/ uint16(xCondIs64), 6152, 6164,
+	/*6152*/ uint16(xCondDataSize), 6156, 6160, 0,
+	/*6156*/ uint16(xSetOp), uint16(XSAVE),
+	/*6158*/ uint16(xArgMem),
+	/*6159*/ uint16(xMatch),
+	/*6160*/ uint16(xSetOp), uint16(XSAVE),
+	/*6162*/ uint16(xArgMem),
+	/*6163*/ uint16(xMatch),
+	/*6164*/ uint16(xCondDataSize), 6156, 6160, 6168,
+	/*6168*/ uint16(xSetOp), uint16(XSAVE64),
+	/*6170*/ uint16(xArgMem),
+	/*6171*/ uint16(xMatch),
+	/*6172*/ uint16(xCondIs64), 6175, 6187,
+	/*6175*/ uint16(xCondDataSize), 6179, 6183, 0,
+	/*6179*/ uint16(xSetOp), uint16(XRSTOR),
+	/*6181*/ uint16(xArgMem),
+	/*6182*/ uint16(xMatch),
+	/*6183*/ uint16(xSetOp), uint16(XRSTOR),
+	/*6185*/ uint16(xArgMem),
+	/*6186*/ uint16(xMatch),
+	/*6187*/ uint16(xCondDataSize), 6179, 6183, 6191,
+	/*6191*/ uint16(xSetOp), uint16(XRSTOR64),
+	/*6193*/ uint16(xArgMem),
+	/*6194*/ uint16(xMatch),
+	/*6195*/ uint16(xCondDataSize), 6199, 6203, 6207,
+	/*6199*/ uint16(xSetOp), uint16(XSAVEOPT),
+	/*6201*/ uint16(xArgMem),
+	/*6202*/ uint16(xMatch),
+	/*6203*/ uint16(xSetOp), uint16(XSAVEOPT),
+	/*6205*/ uint16(xArgMem),
+	/*6206*/ uint16(xMatch),
+	/*6207*/ uint16(xSetOp), uint16(XSAVEOPT64),
+	/*6209*/ uint16(xArgMem),
+	/*6210*/ uint16(xMatch),
+	/*6211*/ uint16(xSetOp), uint16(CLFLUSH),
+	/*6213*/ uint16(xArgM8),
+	/*6214*/ uint16(xMatch),
+	/*6215*/ uint16(xSetOp), uint16(LFENCE),
+	/*6217*/ uint16(xMatch),
+	/*6218*/ uint16(xSetOp), uint16(MFENCE),
+	/*6220*/ uint16(xMatch),
+	/*6221*/ uint16(xSetOp), uint16(SFENCE),
+	/*6223*/ uint16(xMatch),
+	/*6224*/ uint16(xCondIs64), 6227, 6243,
+	/*6227*/ uint16(xCondDataSize), 6231, 6237, 0,
+	/*6231*/ uint16(xSetOp), uint16(IMUL),
+	/*6233*/ uint16(xReadSlashR),
+	/*6234*/ uint16(xArgR16),
+	/*6235*/ uint16(xArgRM16),
+	/*6236*/ uint16(xMatch),
+	/*6237*/ uint16(xSetOp), uint16(IMUL),
+	/*6239*/ uint16(xReadSlashR),
+	/*6240*/ uint16(xArgR32),
+	/*6241*/ uint16(xArgRM32),
+	/*6242*/ uint16(xMatch),
+	/*6243*/ uint16(xCondDataSize), 6231, 6237, 6247,
+	/*6247*/ uint16(xSetOp), uint16(IMUL),
+	/*6249*/ uint16(xReadSlashR),
+	/*6250*/ uint16(xArgR64),
+	/*6251*/ uint16(xArgRM64),
+	/*6252*/ uint16(xMatch),
+	/*6253*/ uint16(xSetOp), uint16(CMPXCHG),
+	/*6255*/ uint16(xReadSlashR),
+	/*6256*/ uint16(xArgRM8),
+	/*6257*/ uint16(xArgR8),
+	/*6258*/ uint16(xMatch),
+	/*6259*/ uint16(xCondIs64), 6262, 6278,
+	/*6262*/ uint16(xCondDataSize), 6266, 6272, 0,
+	/*6266*/ uint16(xSetOp), uint16(CMPXCHG),
+	/*6268*/ uint16(xReadSlashR),
+	/*6269*/ uint16(xArgRM16),
+	/*6270*/ uint16(xArgR16),
+	/*6271*/ uint16(xMatch),
+	/*6272*/ uint16(xSetOp), uint16(CMPXCHG),
+	/*6274*/ uint16(xReadSlashR),
+	/*6275*/ uint16(xArgRM32),
+	/*6276*/ uint16(xArgR32),
+	/*6277*/ uint16(xMatch),
+	/*6278*/ uint16(xCondDataSize), 6266, 6272, 6282,
+	/*6282*/ uint16(xSetOp), uint16(CMPXCHG),
+	/*6284*/ uint16(xReadSlashR),
+	/*6285*/ uint16(xArgRM64),
+	/*6286*/ uint16(xArgR64),
+	/*6287*/ uint16(xMatch),
+	/*6288*/ uint16(xCondIs64), 6291, 6307,
+	/*6291*/ uint16(xCondDataSize), 6295, 6301, 0,
+	/*6295*/ uint16(xSetOp), uint16(LSS),
+	/*6297*/ uint16(xReadSlashR),
+	/*6298*/ uint16(xArgR16),
+	/*6299*/ uint16(xArgM16colon16),
+	/*6300*/ uint16(xMatch),
+	/*6301*/ uint16(xSetOp), uint16(LSS),
+	/*6303*/ uint16(xReadSlashR),
+	/*6304*/ uint16(xArgR32),
+	/*6305*/ uint16(xArgM16colon32),
+	/*6306*/ uint16(xMatch),
+	/*6307*/ uint16(xCondDataSize), 6295, 6301, 6311,
+	/*6311*/ uint16(xSetOp), uint16(LSS),
+	/*6313*/ uint16(xReadSlashR),
+	/*6314*/ uint16(xArgR64),
+	/*6315*/ uint16(xArgM16colon64),
+	/*6316*/ uint16(xMatch),
+	/*6317*/ uint16(xCondIs64), 6320, 6336,
+	/*6320*/ uint16(xCondDataSize), 6324, 6330, 0,
+	/*6324*/ uint16(xSetOp), uint16(BTR),
+	/*6326*/ uint16(xReadSlashR),
+	/*6327*/ uint16(xArgRM16),
+	/*6328*/ uint16(xArgR16),
+	/*6329*/ uint16(xMatch),
+	/*6330*/ uint16(xSetOp), uint16(BTR),
+	/*6332*/ uint16(xReadSlashR),
+	/*6333*/ uint16(xArgRM32),
+	/*6334*/ uint16(xArgR32),
+	/*6335*/ uint16(xMatch),
+	/*6336*/ uint16(xCondDataSize), 6324, 6330, 6340,
+	/*6340*/ uint16(xSetOp), uint16(BTR),
+	/*6342*/ uint16(xReadSlashR),
+	/*6343*/ uint16(xArgRM64),
+	/*6344*/ uint16(xArgR64),
+	/*6345*/ uint16(xMatch),
+	/*6346*/ uint16(xCondIs64), 6349, 6365,
+	/*6349*/ uint16(xCondDataSize), 6353, 6359, 0,
+	/*6353*/ uint16(xSetOp), uint16(LFS),
+	/*6355*/ uint16(xReadSlashR),
+	/*6356*/ uint16(xArgR16),
+	/*6357*/ uint16(xArgM16colon16),
+	/*6358*/ uint16(xMatch),
+	/*6359*/ uint16(xSetOp), uint16(LFS),
+	/*6361*/ uint16(xReadSlashR),
+	/*6362*/ uint16(xArgR32),
+	/*6363*/ uint16(xArgM16colon32),
+	/*6364*/ uint16(xMatch),
+	/*6365*/ uint16(xCondDataSize), 6353, 6359, 6369,
+	/*6369*/ uint16(xSetOp), uint16(LFS),
+	/*6371*/ uint16(xReadSlashR),
+	/*6372*/ uint16(xArgR64),
+	/*6373*/ uint16(xArgM16colon64),
+	/*6374*/ uint16(xMatch),
+	/*6375*/ uint16(xCondIs64), 6378, 6394,
+	/*6378*/ uint16(xCondDataSize), 6382, 6388, 0,
+	/*6382*/ uint16(xSetOp), uint16(LGS),
+	/*6384*/ uint16(xReadSlashR),
+	/*6385*/ uint16(xArgR16),
+	/*6386*/ uint16(xArgM16colon16),
+	/*6387*/ uint16(xMatch),
+	/*6388*/ uint16(xSetOp), uint16(LGS),
+	/*6390*/ uint16(xReadSlashR),
+	/*6391*/ uint16(xArgR32),
+	/*6392*/ uint16(xArgM16colon32),
+	/*6393*/ uint16(xMatch),
+	/*6394*/ uint16(xCondDataSize), 6382, 6388, 6398,
+	/*6398*/ uint16(xSetOp), uint16(LGS),
+	/*6400*/ uint16(xReadSlashR),
+	/*6401*/ uint16(xArgR64),
+	/*6402*/ uint16(xArgM16colon64),
+	/*6403*/ uint16(xMatch),
+	/*6404*/ uint16(xCondIs64), 6407, 6423,
+	/*6407*/ uint16(xCondDataSize), 6411, 6417, 0,
+	/*6411*/ uint16(xSetOp), uint16(MOVZX),
+	/*6413*/ uint16(xReadSlashR),
+	/*6414*/ uint16(xArgR16),
+	/*6415*/ uint16(xArgRM8),
+	/*6416*/ uint16(xMatch),
+	/*6417*/ uint16(xSetOp), uint16(MOVZX),
+	/*6419*/ uint16(xReadSlashR),
+	/*6420*/ uint16(xArgR32),
+	/*6421*/ uint16(xArgRM8),
+	/*6422*/ uint16(xMatch),
+	/*6423*/ uint16(xCondDataSize), 6411, 6417, 6427,
+	/*6427*/ uint16(xSetOp), uint16(MOVZX),
+	/*6429*/ uint16(xReadSlashR),
+	/*6430*/ uint16(xArgR64),
+	/*6431*/ uint16(xArgRM8),
+	/*6432*/ uint16(xMatch),
+	/*6433*/ uint16(xCondIs64), 6436, 6452,
+	/*6436*/ uint16(xCondDataSize), 6440, 6446, 0,
+	/*6440*/ uint16(xSetOp), uint16(MOVZX),
+	/*6442*/ uint16(xReadSlashR),
+	/*6443*/ uint16(xArgR16),
+	/*6444*/ uint16(xArgRM16),
+	/*6445*/ uint16(xMatch),
+	/*6446*/ uint16(xSetOp), uint16(MOVZX),
+	/*6448*/ uint16(xReadSlashR),
+	/*6449*/ uint16(xArgR32),
+	/*6450*/ uint16(xArgRM16),
+	/*6451*/ uint16(xMatch),
+	/*6452*/ uint16(xCondDataSize), 6440, 6446, 6456,
+	/*6456*/ uint16(xSetOp), uint16(MOVZX),
+	/*6458*/ uint16(xReadSlashR),
+	/*6459*/ uint16(xArgR64),
+	/*6460*/ uint16(xArgRM16),
+	/*6461*/ uint16(xMatch),
+	/*6462*/ uint16(xCondIs64), 6465, 6485,
+	/*6465*/ uint16(xCondPrefix), 1,
+	0xF3, 6469,
+	/*6469*/ uint16(xCondDataSize), 6473, 6479, 0,
+	/*6473*/ uint16(xSetOp), uint16(POPCNT),
+	/*6475*/ uint16(xReadSlashR),
+	/*6476*/ uint16(xArgR16),
+	/*6477*/ uint16(xArgRM16),
+	/*6478*/ uint16(xMatch),
+	/*6479*/ uint16(xSetOp), uint16(POPCNT),
+	/*6481*/ uint16(xReadSlashR),
+	/*6482*/ uint16(xArgR32),
+	/*6483*/ uint16(xArgRM32),
+	/*6484*/ uint16(xMatch),
+	/*6485*/ uint16(xCondPrefix), 1,
+	0xF3, 6489,
+	/*6489*/ uint16(xCondDataSize), 6473, 6479, 6493,
+	/*6493*/ uint16(xSetOp), uint16(POPCNT),
+	/*6495*/ uint16(xReadSlashR),
+	/*6496*/ uint16(xArgR64),
+	/*6497*/ uint16(xArgRM64),
+	/*6498*/ uint16(xMatch),
+	/*6499*/ uint16(xSetOp), uint16(UD1),
+	/*6501*/ uint16(xMatch),
+	/*6502*/ uint16(xCondSlashR),
+	0,    // 0
+	0,    // 1
+	0,    // 2
+	0,    // 3
+	6511, // 4
+	6540, // 5
+	6569, // 6
+	6598, // 7
+	/*6511*/ uint16(xCondIs64), 6514, 6530,
+	/*6514*/ uint16(xCondDataSize), 6518, 6524, 0,
+	/*6518*/ uint16(xSetOp), uint16(BT),
+	/*6520*/ uint16(xReadIb),
+	/*6521*/ uint16(xArgRM16),
+	/*6522*/ uint16(xArgImm8u),
+	/*6523*/ uint16(xMatch),
+	/*6524*/ uint16(xSetOp), uint16(BT),
+	/*6526*/ uint16(xReadIb),
+	/*6527*/ uint16(xArgRM32),
+	/*6528*/ uint16(xArgImm8u),
+	/*6529*/ uint16(xMatch),
+	/*6530*/ uint16(xCondDataSize), 6518, 6524, 6534,
+	/*6534*/ uint16(xSetOp), uint16(BT),
+	/*6536*/ uint16(xReadIb),
+	/*6537*/ uint16(xArgRM64),
+	/*6538*/ uint16(xArgImm8u),
+	/*6539*/ uint16(xMatch),
+	/*6540*/ uint16(xCondIs64), 6543, 6559,
+	/*6543*/ uint16(xCondDataSize), 6547, 6553, 0,
+	/*6547*/ uint16(xSetOp), uint16(BTS),
+	/*6549*/ uint16(xReadIb),
+	/*6550*/ uint16(xArgRM16),
+	/*6551*/ uint16(xArgImm8u),
+	/*6552*/ uint16(xMatch),
+	/*6553*/ uint16(xSetOp), uint16(BTS),
+	/*6555*/ uint16(xReadIb),
+	/*6556*/ uint16(xArgRM32),
+	/*6557*/ uint16(xArgImm8u),
+	/*6558*/ uint16(xMatch),
+	/*6559*/ uint16(xCondDataSize), 6547, 6553, 6563,
+	/*6563*/ uint16(xSetOp), uint16(BTS),
+	/*6565*/ uint16(xReadIb),
+	/*6566*/ uint16(xArgRM64),
+	/*6567*/ uint16(xArgImm8u),
+	/*6568*/ uint16(xMatch),
+	/*6569*/ uint16(xCondIs64), 6572, 6588,
+	/*6572*/ uint16(xCondDataSize), 6576, 6582, 0,
+	/*6576*/ uint16(xSetOp), uint16(BTR),
+	/*6578*/ uint16(xReadIb),
+	/*6579*/ uint16(xArgRM16),
+	/*6580*/ uint16(xArgImm8u),
+	/*6581*/ uint16(xMatch),
+	/*6582*/ uint16(xSetOp), uint16(BTR),
+	/*6584*/ uint16(xReadIb),
+	/*6585*/ uint16(xArgRM32),
+	/*6586*/ uint16(xArgImm8u),
+	/*6587*/ uint16(xMatch),
+	/*6588*/ uint16(xCondDataSize), 6576, 6582, 6592,
+	/*6592*/ uint16(xSetOp), uint16(BTR),
+	/*6594*/ uint16(xReadIb),
+	/*6595*/ uint16(xArgRM64),
+	/*6596*/ uint16(xArgImm8u),
+	/*6597*/ uint16(xMatch),
+	/*6598*/ uint16(xCondIs64), 6601, 6617,
+	/*6601*/ uint16(xCondDataSize), 6605, 6611, 0,
+	/*6605*/ uint16(xSetOp), uint16(BTC),
+	/*6607*/ uint16(xReadIb),
+	/*6608*/ uint16(xArgRM16),
+	/*6609*/ uint16(xArgImm8u),
+	/*6610*/ uint16(xMatch),
+	/*6611*/ uint16(xSetOp), uint16(BTC),
+	/*6613*/ uint16(xReadIb),
+	/*6614*/ uint16(xArgRM32),
+	/*6615*/ uint16(xArgImm8u),
+	/*6616*/ uint16(xMatch),
+	/*6617*/ uint16(xCondDataSize), 6605, 6611, 6621,
+	/*6621*/ uint16(xSetOp), uint16(BTC),
+	/*6623*/ uint16(xReadIb),
+	/*6624*/ uint16(xArgRM64),
+	/*6625*/ uint16(xArgImm8u),
+	/*6626*/ uint16(xMatch),
+	/*6627*/ uint16(xCondIs64), 6630, 6646,
+	/*6630*/ uint16(xCondDataSize), 6634, 6640, 0,
+	/*6634*/ uint16(xSetOp), uint16(BTC),
+	/*6636*/ uint16(xReadSlashR),
+	/*6637*/ uint16(xArgRM16),
+	/*6638*/ uint16(xArgR16),
+	/*6639*/ uint16(xMatch),
+	/*6640*/ uint16(xSetOp), uint16(BTC),
+	/*6642*/ uint16(xReadSlashR),
+	/*6643*/ uint16(xArgRM32),
+	/*6644*/ uint16(xArgR32),
+	/*6645*/ uint16(xMatch),
+	/*6646*/ uint16(xCondDataSize), 6634, 6640, 6650,
+	/*6650*/ uint16(xSetOp), uint16(BTC),
+	/*6652*/ uint16(xReadSlashR),
+	/*6653*/ uint16(xArgRM64),
+	/*6654*/ uint16(xArgR64),
+	/*6655*/ uint16(xMatch),
+	/*6656*/ uint16(xCondIs64), 6659, 6697,
+	/*6659*/ uint16(xCondPrefix), 2,
+	0xF3, 6681,
+	0x0, 6665,
+	/*6665*/ uint16(xCondDataSize), 6669, 6675, 0,
+	/*6669*/ uint16(xSetOp), uint16(BSF),
+	/*6671*/ uint16(xReadSlashR),
+	/*6672*/ uint16(xArgR16),
+	/*6673*/ uint16(xArgRM16),
+	/*6674*/ uint16(xMatch),
+	/*6675*/ uint16(xSetOp), uint16(BSF),
+	/*6677*/ uint16(xReadSlashR),
+	/*6678*/ uint16(xArgR32),
+	/*6679*/ uint16(xArgRM32),
+	/*6680*/ uint16(xMatch),
+	/*6681*/ uint16(xCondDataSize), 6685, 6691, 0,
+	/*6685*/ uint16(xSetOp), uint16(TZCNT),
+	/*6687*/ uint16(xReadSlashR),
+	/*6688*/ uint16(xArgR16),
+	/*6689*/ uint16(xArgRM16),
+	/*6690*/ uint16(xMatch),
+	/*6691*/ uint16(xSetOp), uint16(TZCNT),
+	/*6693*/ uint16(xReadSlashR),
+	/*6694*/ uint16(xArgR32),
+	/*6695*/ uint16(xArgRM32),
+	/*6696*/ uint16(xMatch),
+	/*6697*/ uint16(xCondPrefix), 2,
+	0xF3, 6713,
+	0x0, 6703,
+	/*6703*/ uint16(xCondDataSize), 6669, 6675, 6707,
+	/*6707*/ uint16(xSetOp), uint16(BSF),
+	/*6709*/ uint16(xReadSlashR),
+	/*6710*/ uint16(xArgR64),
+	/*6711*/ uint16(xArgRM64),
+	/*6712*/ uint16(xMatch),
+	/*6713*/ uint16(xCondDataSize), 6685, 6691, 6717,
+	/*6717*/ uint16(xSetOp), uint16(TZCNT),
+	/*6719*/ uint16(xReadSlashR),
+	/*6720*/ uint16(xArgR64),
+	/*6721*/ uint16(xArgRM64),
+	/*6722*/ uint16(xMatch),
+	/*6723*/ uint16(xCondIs64), 6726, 6764,
+	/*6726*/ uint16(xCondPrefix), 2,
+	0xF3, 6748,
+	0x0, 6732,
+	/*6732*/ uint16(xCondDataSize), 6736, 6742, 0,
+	/*6736*/ uint16(xSetOp), uint16(BSR),
+	/*6738*/ uint16(xReadSlashR),
+	/*6739*/ uint16(xArgR16),
+	/*6740*/ uint16(xArgRM16),
+	/*6741*/ uint16(xMatch),
+	/*6742*/ uint16(xSetOp), uint16(BSR),
+	/*6744*/ uint16(xReadSlashR),
+	/*6745*/ uint16(xArgR32),
+	/*6746*/ uint16(xArgRM32),
+	/*6747*/ uint16(xMatch),
+	/*6748*/ uint16(xCondDataSize), 6752, 6758, 0,
+	/*6752*/ uint16(xSetOp), uint16(LZCNT),
+	/*6754*/ uint16(xReadSlashR),
+	/*6755*/ uint16(xArgR16),
+	/*6756*/ uint16(xArgRM16),
+	/*6757*/ uint16(xMatch),
+	/*6758*/ uint16(xSetOp), uint16(LZCNT),
+	/*6760*/ uint16(xReadSlashR),
+	/*6761*/ uint16(xArgR32),
+	/*6762*/ uint16(xArgRM32),
+	/*6763*/ uint16(xMatch),
+	/*6764*/ uint16(xCondPrefix), 2,
+	0xF3, 6780,
+	0x0, 6770,
+	/*6770*/ uint16(xCondDataSize), 6736, 6742, 6774,
+	/*6774*/ uint16(xSetOp), uint16(BSR),
+	/*6776*/ uint16(xReadSlashR),
+	/*6777*/ uint16(xArgR64),
+	/*6778*/ uint16(xArgRM64),
+	/*6779*/ uint16(xMatch),
+	/*6780*/ uint16(xCondDataSize), 6752, 6758, 6784,
+	/*6784*/ uint16(xSetOp), uint16(LZCNT),
+	/*6786*/ uint16(xReadSlashR),
+	/*6787*/ uint16(xArgR64),
+	/*6788*/ uint16(xArgRM64),
+	/*6789*/ uint16(xMatch),
+	/*6790*/ uint16(xCondIs64), 6793, 6809,
+	/*6793*/ uint16(xCondDataSize), 6797, 6803, 0,
+	/*6797*/ uint16(xSetOp), uint16(MOVSX),
+	/*6799*/ uint16(xReadSlashR),
+	/*6800*/ uint16(xArgR16),
+	/*6801*/ uint16(xArgRM8),
+	/*6802*/ uint16(xMatch),
+	/*6803*/ uint16(xSetOp), uint16(MOVSX),
+	/*6805*/ uint16(xReadSlashR),
+	/*6806*/ uint16(xArgR32),
+	/*6807*/ uint16(xArgRM8),
+	/*6808*/ uint16(xMatch),
+	/*6809*/ uint16(xCondDataSize), 6797, 6803, 6813,
+	/*6813*/ uint16(xSetOp), uint16(MOVSX),
+	/*6815*/ uint16(xReadSlashR),
+	/*6816*/ uint16(xArgR64),
+	/*6817*/ uint16(xArgRM8),
+	/*6818*/ uint16(xMatch),
+	/*6819*/ uint16(xCondIs64), 6822, 6838,
+	/*6822*/ uint16(xCondDataSize), 6826, 6832, 0,
+	/*6826*/ uint16(xSetOp), uint16(MOVSX),
+	/*6828*/ uint16(xReadSlashR),
+	/*6829*/ uint16(xArgR16),
+	/*6830*/ uint16(xArgRM16),
+	/*6831*/ uint16(xMatch),
+	/*6832*/ uint16(xSetOp), uint16(MOVSX),
+	/*6834*/ uint16(xReadSlashR),
+	/*6835*/ uint16(xArgR32),
+	/*6836*/ uint16(xArgRM16),
+	/*6837*/ uint16(xMatch),
+	/*6838*/ uint16(xCondDataSize), 6826, 6832, 6842,
+	/*6842*/ uint16(xSetOp), uint16(MOVSX),
+	/*6844*/ uint16(xReadSlashR),
+	/*6845*/ uint16(xArgR64),
+	/*6846*/ uint16(xArgRM16),
+	/*6847*/ uint16(xMatch),
+	/*6848*/ uint16(xSetOp), uint16(XADD),
+	/*6850*/ uint16(xReadSlashR),
+	/*6851*/ uint16(xArgRM8),
+	/*6852*/ uint16(xArgR8),
+	/*6853*/ uint16(xMatch),
+	/*6854*/ uint16(xCondIs64), 6857, 6873,
+	/*6857*/ uint16(xCondDataSize), 6861, 6867, 0,
+	/*6861*/ uint16(xSetOp), uint16(XADD),
+	/*6863*/ uint16(xReadSlashR),
+	/*6864*/ uint16(xArgRM16),
+	/*6865*/ uint16(xArgR16),
+	/*6866*/ uint16(xMatch),
+	/*6867*/ uint16(xSetOp), uint16(XADD),
+	/*6869*/ uint16(xReadSlashR),
+	/*6870*/ uint16(xArgRM32),
+	/*6871*/ uint16(xArgR32),
+	/*6872*/ uint16(xMatch),
+	/*6873*/ uint16(xCondDataSize), 6861, 6867, 6877,
+	/*6877*/ uint16(xSetOp), uint16(XADD),
+	/*6879*/ uint16(xReadSlashR),
+	/*6880*/ uint16(xArgRM64),
+	/*6881*/ uint16(xArgR64),
+	/*6882*/ uint16(xMatch),
+	/*6883*/ uint16(xCondPrefix), 4,
+	0xF3, 6917,
+	0xF2, 6909,
+	0x66, 6901,
+	0x0, 6893,
+	/*6893*/ uint16(xSetOp), uint16(CMPPS),
+	/*6895*/ uint16(xReadSlashR),
+	/*6896*/ uint16(xReadIb),
+	/*6897*/ uint16(xArgXmm1),
+	/*6898*/ uint16(xArgXmm2M128),
+	/*6899*/ uint16(xArgImm8u),
+	/*6900*/ uint16(xMatch),
+	/*6901*/ uint16(xSetOp), uint16(CMPPD),
+	/*6903*/ uint16(xReadSlashR),
+	/*6904*/ uint16(xReadIb),
+	/*6905*/ uint16(xArgXmm1),
+	/*6906*/ uint16(xArgXmm2M128),
+	/*6907*/ uint16(xArgImm8u),
+	/*6908*/ uint16(xMatch),
+	/*6909*/ uint16(xSetOp), uint16(CMPSD_XMM),
+	/*6911*/ uint16(xReadSlashR),
+	/*6912*/ uint16(xReadIb),
+	/*6913*/ uint16(xArgXmm1),
+	/*6914*/ uint16(xArgXmm2M64),
+	/*6915*/ uint16(xArgImm8u),
+	/*6916*/ uint16(xMatch),
+	/*6917*/ uint16(xSetOp), uint16(CMPSS),
+	/*6919*/ uint16(xReadSlashR),
+	/*6920*/ uint16(xReadIb),
+	/*6921*/ uint16(xArgXmm1),
+	/*6922*/ uint16(xArgXmm2M32),
+	/*6923*/ uint16(xArgImm8u),
+	/*6924*/ uint16(xMatch),
+	/*6925*/ uint16(xCondIs64), 6928, 6944,
+	/*6928*/ uint16(xCondDataSize), 6932, 6938, 0,
+	/*6932*/ uint16(xSetOp), uint16(MOVNTI),
+	/*6934*/ uint16(xReadSlashR),
+	/*6935*/ uint16(xArgM32),
+	/*6936*/ uint16(xArgR32),
+	/*6937*/ uint16(xMatch),
+	/*6938*/ uint16(xSetOp), uint16(MOVNTI),
+	/*6940*/ uint16(xReadSlashR),
+	/*6941*/ uint16(xArgM32),
+	/*6942*/ uint16(xArgR32),
+	/*6943*/ uint16(xMatch),
+	/*6944*/ uint16(xCondDataSize), 6932, 6938, 6948,
+	/*6948*/ uint16(xSetOp), uint16(MOVNTI),
+	/*6950*/ uint16(xReadSlashR),
+	/*6951*/ uint16(xArgM64),
+	/*6952*/ uint16(xArgR64),
+	/*6953*/ uint16(xMatch),
+	/*6954*/ uint16(xCondPrefix), 2,
+	0x66, 6968,
+	0x0, 6960,
+	/*6960*/ uint16(xSetOp), uint16(PINSRW),
+	/*6962*/ uint16(xReadSlashR),
+	/*6963*/ uint16(xReadIb),
+	/*6964*/ uint16(xArgMm),
+	/*6965*/ uint16(xArgR32M16),
+	/*6966*/ uint16(xArgImm8u),
+	/*6967*/ uint16(xMatch),
+	/*6968*/ uint16(xSetOp), uint16(PINSRW),
+	/*6970*/ uint16(xReadSlashR),
+	/*6971*/ uint16(xReadIb),
+	/*6972*/ uint16(xArgXmm),
+	/*6973*/ uint16(xArgR32M16),
+	/*6974*/ uint16(xArgImm8u),
+	/*6975*/ uint16(xMatch),
+	/*6976*/ uint16(xCondPrefix), 2,
+	0x66, 6990,
+	0x0, 6982,
+	/*6982*/ uint16(xSetOp), uint16(PEXTRW),
+	/*6984*/ uint16(xReadSlashR),
+	/*6985*/ uint16(xReadIb),
+	/*6986*/ uint16(xArgR32),
+	/*6987*/ uint16(xArgMm2),
+	/*6988*/ uint16(xArgImm8u),
+	/*6989*/ uint16(xMatch),
+	/*6990*/ uint16(xSetOp), uint16(PEXTRW),
+	/*6992*/ uint16(xReadSlashR),
+	/*6993*/ uint16(xReadIb),
+	/*6994*/ uint16(xArgR32),
+	/*6995*/ uint16(xArgXmm2),
+	/*6996*/ uint16(xArgImm8u),
+	/*6997*/ uint16(xMatch),
+	/*6998*/ uint16(xCondPrefix), 2,
+	0x66, 7012,
+	0x0, 7004,
+	/*7004*/ uint16(xSetOp), uint16(SHUFPS),
+	/*7006*/ uint16(xReadSlashR),
+	/*7007*/ uint16(xReadIb),
+	/*7008*/ uint16(xArgXmm1),
+	/*7009*/ uint16(xArgXmm2M128),
+	/*7010*/ uint16(xArgImm8u),
+	/*7011*/ uint16(xMatch),
+	/*7012*/ uint16(xSetOp), uint16(SHUFPD),
+	/*7014*/ uint16(xReadSlashR),
+	/*7015*/ uint16(xReadIb),
+	/*7016*/ uint16(xArgXmm1),
+	/*7017*/ uint16(xArgXmm2M128),
+	/*7018*/ uint16(xArgImm8u),
+	/*7019*/ uint16(xMatch),
+	/*7020*/ uint16(xCondSlashR),
+	0,    // 0
+	7029, // 1
+	0,    // 2
+	7052, // 3
+	7075, // 4
+	7098, // 5
+	7121, // 6
+	0,    // 7
+	/*7029*/ uint16(xCondIs64), 7032, 7044,
+	/*7032*/ uint16(xCondDataSize), 7036, 7040, 0,
+	/*7036*/ uint16(xSetOp), uint16(CMPXCHG8B),
+	/*7038*/ uint16(xArgM64),
+	/*7039*/ uint16(xMatch),
+	/*7040*/ uint16(xSetOp), uint16(CMPXCHG8B),
+	/*7042*/ uint16(xArgM64),
+	/*7043*/ uint16(xMatch),
+	/*7044*/ uint16(xCondDataSize), 7036, 7040, 7048,
+	/*7048*/ uint16(xSetOp), uint16(CMPXCHG16B),
+	/*7050*/ uint16(xArgM128),
+	/*7051*/ uint16(xMatch),
+	/*7052*/ uint16(xCondIs64), 7055, 7067,
+	/*7055*/ uint16(xCondDataSize), 7059, 7063, 0,
+	/*7059*/ uint16(xSetOp), uint16(XRSTORS),
+	/*7061*/ uint16(xArgMem),
+	/*7062*/ uint16(xMatch),
+	/*7063*/ uint16(xSetOp), uint16(XRSTORS),
+	/*7065*/ uint16(xArgMem),
+	/*7066*/ uint16(xMatch),
+	/*7067*/ uint16(xCondDataSize), 7059, 7063, 7071,
+	/*7071*/ uint16(xSetOp), uint16(XRSTORS64),
+	/*7073*/ uint16(xArgMem),
+	/*7074*/ uint16(xMatch),
+	/*7075*/ uint16(xCondIs64), 7078, 7090,
+	/*7078*/ uint16(xCondDataSize), 7082, 7086, 0,
+	/*7082*/ uint16(xSetOp), uint16(XSAVEC),
+	/*7084*/ uint16(xArgMem),
+	/*7085*/ uint16(xMatch),
+	/*7086*/ uint16(xSetOp), uint16(XSAVEC),
+	/*7088*/ uint16(xArgMem),
+	/*7089*/ uint16(xMatch),
+	/*7090*/ uint16(xCondDataSize), 7082, 7086, 7094,
+	/*7094*/ uint16(xSetOp), uint16(XSAVEC64),
+	/*7096*/ uint16(xArgMem),
+	/*7097*/ uint16(xMatch),
+	/*7098*/ uint16(xCondIs64), 7101, 7113,
+	/*7101*/ uint16(xCondDataSize), 7105, 7109, 0,
+	/*7105*/ uint16(xSetOp), uint16(XSAVES),
+	/*7107*/ uint16(xArgMem),
+	/*7108*/ uint16(xMatch),
+	/*7109*/ uint16(xSetOp), uint16(XSAVES),
+	/*7111*/ uint16(xArgMem),
+	/*7112*/ uint16(xMatch),
+	/*7113*/ uint16(xCondDataSize), 7105, 7109, 7117,
+	/*7117*/ uint16(xSetOp), uint16(XSAVES64),
+	/*7119*/ uint16(xArgMem),
+	/*7120*/ uint16(xMatch),
+	/*7121*/ uint16(xCondIs64), 7124, 7142,
+	/*7124*/ uint16(xCondDataSize), 7128, 7135, 0,
+	/*7128*/ uint16(xCondIsMem), 7131, 0,
+	/*7131*/ uint16(xSetOp), uint16(RDRAND),
+	/*7133*/ uint16(xArgRmf16),
+	/*7134*/ uint16(xMatch),
+	/*7135*/ uint16(xCondIsMem), 7138, 0,
+	/*7138*/ uint16(xSetOp), uint16(RDRAND),
+	/*7140*/ uint16(xArgRmf32),
+	/*7141*/ uint16(xMatch),
+	/*7142*/ uint16(xCondDataSize), 7128, 7135, 7146,
+	/*7146*/ uint16(xSetOp), uint16(RDRAND),
+	/*7148*/ uint16(xMatch),
+	/*7149*/ uint16(xCondIs64), 7152, 7164,
+	/*7152*/ uint16(xCondDataSize), 7156, 7160, 0,
+	/*7156*/ uint16(xSetOp), uint16(BSWAP),
+	/*7158*/ uint16(xArgR16op),
+	/*7159*/ uint16(xMatch),
+	/*7160*/ uint16(xSetOp), uint16(BSWAP),
+	/*7162*/ uint16(xArgR32op),
+	/*7163*/ uint16(xMatch),
+	/*7164*/ uint16(xCondDataSize), 7156, 7160, 7168,
+	/*7168*/ uint16(xSetOp), uint16(BSWAP),
+	/*7170*/ uint16(xArgR64op),
+	/*7171*/ uint16(xMatch),
+	/*7172*/ uint16(xCondPrefix), 2,
+	0xF2, 7184,
+	0x66, 7178,
+	/*7178*/ uint16(xSetOp), uint16(ADDSUBPD),
+	/*7180*/ uint16(xReadSlashR),
+	/*7181*/ uint16(xArgXmm1),
+	/*7182*/ uint16(xArgXmm2M128),
+	/*7183*/ uint16(xMatch),
+	/*7184*/ uint16(xSetOp), uint16(ADDSUBPS),
+	/*7186*/ uint16(xReadSlashR),
+	/*7187*/ uint16(xArgXmm1),
+	/*7188*/ uint16(xArgXmm2M128),
+	/*7189*/ uint16(xMatch),
+	/*7190*/ uint16(xCondPrefix), 2,
+	0x66, 7202,
+	0x0, 7196,
+	/*7196*/ uint16(xSetOp), uint16(PSRLW),
+	/*7198*/ uint16(xReadSlashR),
+	/*7199*/ uint16(xArgMm),
+	/*7200*/ uint16(xArgMmM64),
+	/*7201*/ uint16(xMatch),
+	/*7202*/ uint16(xSetOp), uint16(PSRLW),
+	/*7204*/ uint16(xReadSlashR),
+	/*7205*/ uint16(xArgXmm1),
+	/*7206*/ uint16(xArgXmm2M128),
+	/*7207*/ uint16(xMatch),
+	/*7208*/ uint16(xCondPrefix), 2,
+	0x66, 7220,
+	0x0, 7214,
+	/*7214*/ uint16(xSetOp), uint16(PSRLD),
+	/*7216*/ uint16(xReadSlashR),
+	/*7217*/ uint16(xArgMm),
+	/*7218*/ uint16(xArgMmM64),
+	/*7219*/ uint16(xMatch),
+	/*7220*/ uint16(xSetOp), uint16(PSRLD),
+	/*7222*/ uint16(xReadSlashR),
+	/*7223*/ uint16(xArgXmm1),
+	/*7224*/ uint16(xArgXmm2M128),
+	/*7225*/ uint16(xMatch),
+	/*7226*/ uint16(xCondPrefix), 2,
+	0x66, 7238,
+	0x0, 7232,
+	/*7232*/ uint16(xSetOp), uint16(PSRLQ),
+	/*7234*/ uint16(xReadSlashR),
+	/*7235*/ uint16(xArgMm),
+	/*7236*/ uint16(xArgMmM64),
+	/*7237*/ uint16(xMatch),
+	/*7238*/ uint16(xSetOp), uint16(PSRLQ),
+	/*7240*/ uint16(xReadSlashR),
+	/*7241*/ uint16(xArgXmm1),
+	/*7242*/ uint16(xArgXmm2M128),
+	/*7243*/ uint16(xMatch),
+	/*7244*/ uint16(xCondPrefix), 2,
+	0x66, 7256,
+	0x0, 7250,
+	/*7250*/ uint16(xSetOp), uint16(PADDQ),
+	/*7252*/ uint16(xReadSlashR),
+	/*7253*/ uint16(xArgMm1),
+	/*7254*/ uint16(xArgMm2M64),
+	/*7255*/ uint16(xMatch),
+	/*7256*/ uint16(xSetOp), uint16(PADDQ),
+	/*7258*/ uint16(xReadSlashR),
+	/*7259*/ uint16(xArgXmm1),
+	/*7260*/ uint16(xArgXmm2M128),
+	/*7261*/ uint16(xMatch),
+	/*7262*/ uint16(xCondPrefix), 2,
+	0x66, 7274,
+	0x0, 7268,
+	/*7268*/ uint16(xSetOp), uint16(PMULLW),
+	/*7270*/ uint16(xReadSlashR),
+	/*7271*/ uint16(xArgMm),
+	/*7272*/ uint16(xArgMmM64),
+	/*7273*/ uint16(xMatch),
+	/*7274*/ uint16(xSetOp), uint16(PMULLW),
+	/*7276*/ uint16(xReadSlashR),
+	/*7277*/ uint16(xArgXmm1),
+	/*7278*/ uint16(xArgXmm2M128),
+	/*7279*/ uint16(xMatch),
+	/*7280*/ uint16(xCondPrefix), 3,
+	0xF3, 7300,
+	0xF2, 7294,
+	0x66, 7288,
+	/*7288*/ uint16(xSetOp), uint16(MOVQ),
+	/*7290*/ uint16(xReadSlashR),
+	/*7291*/ uint16(xArgXmm2M64),
+	/*7292*/ uint16(xArgXmm1),
+	/*7293*/ uint16(xMatch),
+	/*7294*/ uint16(xSetOp), uint16(MOVDQ2Q),
+	/*7296*/ uint16(xReadSlashR),
+	/*7297*/ uint16(xArgMm),
+	/*7298*/ uint16(xArgXmm2),
+	/*7299*/ uint16(xMatch),
+	/*7300*/ uint16(xSetOp), uint16(MOVQ2DQ),
+	/*7302*/ uint16(xReadSlashR),
+	/*7303*/ uint16(xArgXmm1),
+	/*7304*/ uint16(xArgMm2),
+	/*7305*/ uint16(xMatch),
+	/*7306*/ uint16(xCondPrefix), 2,
+	0x66, 7318,
+	0x0, 7312,
+	/*7312*/ uint16(xSetOp), uint16(PMOVMSKB),
+	/*7314*/ uint16(xReadSlashR),
+	/*7315*/ uint16(xArgR32),
+	/*7316*/ uint16(xArgMm2),
+	/*7317*/ uint16(xMatch),
+	/*7318*/ uint16(xSetOp), uint16(PMOVMSKB),
+	/*7320*/ uint16(xReadSlashR),
+	/*7321*/ uint16(xArgR32),
+	/*7322*/ uint16(xArgXmm2),
+	/*7323*/ uint16(xMatch),
+	/*7324*/ uint16(xCondPrefix), 2,
+	0x66, 7336,
+	0x0, 7330,
+	/*7330*/ uint16(xSetOp), uint16(PSUBUSB),
+	/*7332*/ uint16(xReadSlashR),
+	/*7333*/ uint16(xArgMm),
+	/*7334*/ uint16(xArgMmM64),
+	/*7335*/ uint16(xMatch),
+	/*7336*/ uint16(xSetOp), uint16(PSUBUSB),
+	/*7338*/ uint16(xReadSlashR),
+	/*7339*/ uint16(xArgXmm1),
+	/*7340*/ uint16(xArgXmm2M128),
+	/*7341*/ uint16(xMatch),
+	/*7342*/ uint16(xCondPrefix), 2,
+	0x66, 7354,
+	0x0, 7348,
+	/*7348*/ uint16(xSetOp), uint16(PSUBUSW),
+	/*7350*/ uint16(xReadSlashR),
+	/*7351*/ uint16(xArgMm),
+	/*7352*/ uint16(xArgMmM64),
+	/*7353*/ uint16(xMatch),
+	/*7354*/ uint16(xSetOp), uint16(PSUBUSW),
+	/*7356*/ uint16(xReadSlashR),
+	/*7357*/ uint16(xArgXmm1),
+	/*7358*/ uint16(xArgXmm2M128),
+	/*7359*/ uint16(xMatch),
+	/*7360*/ uint16(xCondPrefix), 2,
+	0x66, 7372,
+	0x0, 7366,
+	/*7366*/ uint16(xSetOp), uint16(PMINUB),
+	/*7368*/ uint16(xReadSlashR),
+	/*7369*/ uint16(xArgMm1),
+	/*7370*/ uint16(xArgMm2M64),
+	/*7371*/ uint16(xMatch),
+	/*7372*/ uint16(xSetOp), uint16(PMINUB),
+	/*7374*/ uint16(xReadSlashR),
+	/*7375*/ uint16(xArgXmm1),
+	/*7376*/ uint16(xArgXmm2M128),
+	/*7377*/ uint16(xMatch),
+	/*7378*/ uint16(xCondPrefix), 2,
+	0x66, 7390,
+	0x0, 7384,
+	/*7384*/ uint16(xSetOp), uint16(PAND),
+	/*7386*/ uint16(xReadSlashR),
+	/*7387*/ uint16(xArgMm),
+	/*7388*/ uint16(xArgMmM64),
+	/*7389*/ uint16(xMatch),
+	/*7390*/ uint16(xSetOp), uint16(PAND),
+	/*7392*/ uint16(xReadSlashR),
+	/*7393*/ uint16(xArgXmm1),
+	/*7394*/ uint16(xArgXmm2M128),
+	/*7395*/ uint16(xMatch),
+	/*7396*/ uint16(xCondPrefix), 2,
+	0x66, 7408,
+	0x0, 7402,
+	/*7402*/ uint16(xSetOp), uint16(PADDUSB),
+	/*7404*/ uint16(xReadSlashR),
+	/*7405*/ uint16(xArgMm),
+	/*7406*/ uint16(xArgMmM64),
+	/*7407*/ uint16(xMatch),
+	/*7408*/ uint16(xSetOp), uint16(PADDUSB),
+	/*7410*/ uint16(xReadSlashR),
+	/*7411*/ uint16(xArgXmm1),
+	/*7412*/ uint16(xArgXmm2M128),
+	/*7413*/ uint16(xMatch),
+	/*7414*/ uint16(xCondPrefix), 2,
+	0x66, 7426,
+	0x0, 7420,
+	/*7420*/ uint16(xSetOp), uint16(PADDUSW),
+	/*7422*/ uint16(xReadSlashR),
+	/*7423*/ uint16(xArgMm),
+	/*7424*/ uint16(xArgMmM64),
+	/*7425*/ uint16(xMatch),
+	/*7426*/ uint16(xSetOp), uint16(PADDUSW),
+	/*7428*/ uint16(xReadSlashR),
+	/*7429*/ uint16(xArgXmm1),
+	/*7430*/ uint16(xArgXmm2M128),
+	/*7431*/ uint16(xMatch),
+	/*7432*/ uint16(xCondPrefix), 2,
+	0x66, 7444,
+	0x0, 7438,
+	/*7438*/ uint16(xSetOp), uint16(PMAXUB),
+	/*7440*/ uint16(xReadSlashR),
+	/*7441*/ uint16(xArgMm1),
+	/*7442*/ uint16(xArgMm2M64),
+	/*7443*/ uint16(xMatch),
+	/*7444*/ uint16(xSetOp), uint16(PMAXUB),
+	/*7446*/ uint16(xReadSlashR),
+	/*7447*/ uint16(xArgXmm1),
+	/*7448*/ uint16(xArgXmm2M128),
+	/*7449*/ uint16(xMatch),
+	/*7450*/ uint16(xCondPrefix), 2,
+	0x66, 7462,
+	0x0, 7456,
+	/*7456*/ uint16(xSetOp), uint16(PANDN),
+	/*7458*/ uint16(xReadSlashR),
+	/*7459*/ uint16(xArgMm),
+	/*7460*/ uint16(xArgMmM64),
+	/*7461*/ uint16(xMatch),
+	/*7462*/ uint16(xSetOp), uint16(PANDN),
+	/*7464*/ uint16(xReadSlashR),
+	/*7465*/ uint16(xArgXmm1),
+	/*7466*/ uint16(xArgXmm2M128),
+	/*7467*/ uint16(xMatch),
+	/*7468*/ uint16(xCondPrefix), 2,
+	0x66, 7480,
+	0x0, 7474,
+	/*7474*/ uint16(xSetOp), uint16(PAVGB),
+	/*7476*/ uint16(xReadSlashR),
+	/*7477*/ uint16(xArgMm1),
+	/*7478*/ uint16(xArgMm2M64),
+	/*7479*/ uint16(xMatch),
+	/*7480*/ uint16(xSetOp), uint16(PAVGB),
+	/*7482*/ uint16(xReadSlashR),
+	/*7483*/ uint16(xArgXmm1),
+	/*7484*/ uint16(xArgXmm2M128),
+	/*7485*/ uint16(xMatch),
+	/*7486*/ uint16(xCondPrefix), 2,
+	0x66, 7498,
+	0x0, 7492,
+	/*7492*/ uint16(xSetOp), uint16(PSRAW),
+	/*7494*/ uint16(xReadSlashR),
+	/*7495*/ uint16(xArgMm),
+	/*7496*/ uint16(xArgMmM64),
+	/*7497*/ uint16(xMatch),
+	/*7498*/ uint16(xSetOp), uint16(PSRAW),
+	/*7500*/ uint16(xReadSlashR),
+	/*7501*/ uint16(xArgXmm1),
+	/*7502*/ uint16(xArgXmm2M128),
+	/*7503*/ uint16(xMatch),
+	/*7504*/ uint16(xCondPrefix), 2,
+	0x66, 7516,
+	0x0, 7510,
+	/*7510*/ uint16(xSetOp), uint16(PSRAD),
+	/*7512*/ uint16(xReadSlashR),
+	/*7513*/ uint16(xArgMm),
+	/*7514*/ uint16(xArgMmM64),
+	/*7515*/ uint16(xMatch),
+	/*7516*/ uint16(xSetOp), uint16(PSRAD),
+	/*7518*/ uint16(xReadSlashR),
+	/*7519*/ uint16(xArgXmm1),
+	/*7520*/ uint16(xArgXmm2M128),
+	/*7521*/ uint16(xMatch),
+	/*7522*/ uint16(xCondPrefix), 2,
+	0x66, 7534,
+	0x0, 7528,
+	/*7528*/ uint16(xSetOp), uint16(PAVGW),
+	/*7530*/ uint16(xReadSlashR),
+	/*7531*/ uint16(xArgMm1),
+	/*7532*/ uint16(xArgMm2M64),
+	/*7533*/ uint16(xMatch),
+	/*7534*/ uint16(xSetOp), uint16(PAVGW),
+	/*7536*/ uint16(xReadSlashR),
+	/*7537*/ uint16(xArgXmm1),
+	/*7538*/ uint16(xArgXmm2M128),
+	/*7539*/ uint16(xMatch),
+	/*7540*/ uint16(xCondPrefix), 2,
+	0x66, 7552,
+	0x0, 7546,
+	/*7546*/ uint16(xSetOp), uint16(PMULHUW),
+	/*7548*/ uint16(xReadSlashR),
+	/*7549*/ uint16(xArgMm1),
+	/*7550*/ uint16(xArgMm2M64),
+	/*7551*/ uint16(xMatch),
+	/*7552*/ uint16(xSetOp), uint16(PMULHUW),
+	/*7554*/ uint16(xReadSlashR),
+	/*7555*/ uint16(xArgXmm1),
+	/*7556*/ uint16(xArgXmm2M128),
+	/*7557*/ uint16(xMatch),
+	/*7558*/ uint16(xCondPrefix), 2,
+	0x66, 7570,
+	0x0, 7564,
+	/*7564*/ uint16(xSetOp), uint16(PMULHW),
+	/*7566*/ uint16(xReadSlashR),
+	/*7567*/ uint16(xArgMm),
+	/*7568*/ uint16(xArgMmM64),
+	/*7569*/ uint16(xMatch),
+	/*7570*/ uint16(xSetOp), uint16(PMULHW),
+	/*7572*/ uint16(xReadSlashR),
+	/*7573*/ uint16(xArgXmm1),
+	/*7574*/ uint16(xArgXmm2M128),
+	/*7575*/ uint16(xMatch),
+	/*7576*/ uint16(xCondPrefix), 3,
+	0xF3, 7596,
+	0xF2, 7590,
+	0x66, 7584,
+	/*7584*/ uint16(xSetOp), uint16(CVTTPD2DQ),
+	/*7586*/ uint16(xReadSlashR),
+	/*7587*/ uint16(xArgXmm1),
+	/*7588*/ uint16(xArgXmm2M128),
+	/*7589*/ uint16(xMatch),
+	/*7590*/ uint16(xSetOp), uint16(CVTPD2DQ),
+	/*7592*/ uint16(xReadSlashR),
+	/*7593*/ uint16(xArgXmm1),
+	/*7594*/ uint16(xArgXmm2M128),
+	/*7595*/ uint16(xMatch),
+	/*7596*/ uint16(xSetOp), uint16(CVTDQ2PD),
+	/*7598*/ uint16(xReadSlashR),
+	/*7599*/ uint16(xArgXmm1),
+	/*7600*/ uint16(xArgXmm2M64),
+	/*7601*/ uint16(xMatch),
+	/*7602*/ uint16(xCondPrefix), 2,
+	0x66, 7614,
+	0x0, 7608,
+	/*7608*/ uint16(xSetOp), uint16(MOVNTQ),
+	/*7610*/ uint16(xReadSlashR),
+	/*7611*/ uint16(xArgM64),
+	/*7612*/ uint16(xArgMm),
+	/*7613*/ uint16(xMatch),
+	/*7614*/ uint16(xSetOp), uint16(MOVNTDQ),
+	/*7616*/ uint16(xReadSlashR),
+	/*7617*/ uint16(xArgM128),
+	/*7618*/ uint16(xArgXmm),
+	/*7619*/ uint16(xMatch),
+	/*7620*/ uint16(xCondPrefix), 2,
+	0x66, 7632,
+	0x0, 7626,
+	/*7626*/ uint16(xSetOp), uint16(PSUBSB),
+	/*7628*/ uint16(xReadSlashR),
+	/*7629*/ uint16(xArgMm),
+	/*7630*/ uint16(xArgMmM64),
+	/*7631*/ uint16(xMatch),
+	/*7632*/ uint16(xSetOp), uint16(PSUBSB),
+	/*7634*/ uint16(xReadSlashR),
+	/*7635*/ uint16(xArgXmm1),
+	/*7636*/ uint16(xArgXmm2M128),
+	/*7637*/ uint16(xMatch),
+	/*7638*/ uint16(xCondPrefix), 2,
+	0x66, 7650,
+	0x0, 7644,
+	/*7644*/ uint16(xSetOp), uint16(PSUBSW),
+	/*7646*/ uint16(xReadSlashR),
+	/*7647*/ uint16(xArgMm),
+	/*7648*/ uint16(xArgMmM64),
+	/*7649*/ uint16(xMatch),
+	/*7650*/ uint16(xSetOp), uint16(PSUBSW),
+	/*7652*/ uint16(xReadSlashR),
+	/*7653*/ uint16(xArgXmm1),
+	/*7654*/ uint16(xArgXmm2M128),
+	/*7655*/ uint16(xMatch),
+	/*7656*/ uint16(xCondPrefix), 2,
+	0x66, 7668,
+	0x0, 7662,
+	/*7662*/ uint16(xSetOp), uint16(PMINSW),
+	/*7664*/ uint16(xReadSlashR),
+	/*7665*/ uint16(xArgMm1),
+	/*7666*/ uint16(xArgMm2M64),
+	/*7667*/ uint16(xMatch),
+	/*7668*/ uint16(xSetOp), uint16(PMINSW),
+	/*7670*/ uint16(xReadSlashR),
+	/*7671*/ uint16(xArgXmm1),
+	/*7672*/ uint16(xArgXmm2M128),
+	/*7673*/ uint16(xMatch),
+	/*7674*/ uint16(xCondPrefix), 2,
+	0x66, 7686,
+	0x0, 7680,
+	/*7680*/ uint16(xSetOp), uint16(POR),
+	/*7682*/ uint16(xReadSlashR),
+	/*7683*/ uint16(xArgMm),
+	/*7684*/ uint16(xArgMmM64),
+	/*7685*/ uint16(xMatch),
+	/*7686*/ uint16(xSetOp), uint16(POR),
+	/*7688*/ uint16(xReadSlashR),
+	/*7689*/ uint16(xArgXmm1),
+	/*7690*/ uint16(xArgXmm2M128),
+	/*7691*/ uint16(xMatch),
+	/*7692*/ uint16(xCondPrefix), 2,
+	0x66, 7704,
+	0x0, 7698,
+	/*7698*/ uint16(xSetOp), uint16(PADDSB),
+	/*7700*/ uint16(xReadSlashR),
+	/*7701*/ uint16(xArgMm),
+	/*7702*/ uint16(xArgMmM64),
+	/*7703*/ uint16(xMatch),
+	/*7704*/ uint16(xSetOp), uint16(PADDSB),
+	/*7706*/ uint16(xReadSlashR),
+	/*7707*/ uint16(xArgXmm1),
+	/*7708*/ uint16(xArgXmm2M128),
+	/*7709*/ uint16(xMatch),
+	/*7710*/ uint16(xCondPrefix), 2,
+	0x66, 7722,
+	0x0, 7716,
+	/*7716*/ uint16(xSetOp), uint16(PADDSW),
+	/*7718*/ uint16(xReadSlashR),
+	/*7719*/ uint16(xArgMm),
+	/*7720*/ uint16(xArgMmM64),
+	/*7721*/ uint16(xMatch),
+	/*7722*/ uint16(xSetOp), uint16(PADDSW),
+	/*7724*/ uint16(xReadSlashR),
+	/*7725*/ uint16(xArgXmm1),
+	/*7726*/ uint16(xArgXmm2M128),
+	/*7727*/ uint16(xMatch),
+	/*7728*/ uint16(xCondPrefix), 2,
+	0x66, 7740,
+	0x0, 7734,
+	/*7734*/ uint16(xSetOp), uint16(PMAXSW),
+	/*7736*/ uint16(xReadSlashR),
+	/*7737*/ uint16(xArgMm1),
+	/*7738*/ uint16(xArgMm2M64),
+	/*7739*/ uint16(xMatch),
+	/*7740*/ uint16(xSetOp), uint16(PMAXSW),
+	/*7742*/ uint16(xReadSlashR),
+	/*7743*/ uint16(xArgXmm1),
+	/*7744*/ uint16(xArgXmm2M128),
+	/*7745*/ uint16(xMatch),
+	/*7746*/ uint16(xCondPrefix), 2,
+	0x66, 7758,
+	0x0, 7752,
+	/*7752*/ uint16(xSetOp), uint16(PXOR),
+	/*7754*/ uint16(xReadSlashR),
+	/*7755*/ uint16(xArgMm),
+	/*7756*/ uint16(xArgMmM64),
+	/*7757*/ uint16(xMatch),
+	/*7758*/ uint16(xSetOp), uint16(PXOR),
+	/*7760*/ uint16(xReadSlashR),
+	/*7761*/ uint16(xArgXmm1),
+	/*7762*/ uint16(xArgXmm2M128),
+	/*7763*/ uint16(xMatch),
+	/*7764*/ uint16(xCondPrefix), 1,
+	0xF2, 7768,
+	/*7768*/ uint16(xSetOp), uint16(LDDQU),
+	/*7770*/ uint16(xReadSlashR),
+	/*7771*/ uint16(xArgXmm1),
+	/*7772*/ uint16(xArgM128),
+	/*7773*/ uint16(xMatch),
+	/*7774*/ uint16(xCondPrefix), 2,
+	0x66, 7786,
+	0x0, 7780,
+	/*7780*/ uint16(xSetOp), uint16(PSLLW),
+	/*7782*/ uint16(xReadSlashR),
+	/*7783*/ uint16(xArgMm),
+	/*7784*/ uint16(xArgMmM64),
+	/*7785*/ uint16(xMatch),
+	/*7786*/ uint16(xSetOp), uint16(PSLLW),
+	/*7788*/ uint16(xReadSlashR),
+	/*7789*/ uint16(xArgXmm1),
+	/*7790*/ uint16(xArgXmm2M128),
+	/*7791*/ uint16(xMatch),
+	/*7792*/ uint16(xCondPrefix), 2,
+	0x66, 7804,
+	0x0, 7798,
+	/*7798*/ uint16(xSetOp), uint16(PSLLD),
+	/*7800*/ uint16(xReadSlashR),
+	/*7801*/ uint16(xArgMm),
+	/*7802*/ uint16(xArgMmM64),
+	/*7803*/ uint16(xMatch),
+	/*7804*/ uint16(xSetOp), uint16(PSLLD),
+	/*7806*/ uint16(xReadSlashR),
+	/*7807*/ uint16(xArgXmm1),
+	/*7808*/ uint16(xArgXmm2M128),
+	/*7809*/ uint16(xMatch),
+	/*7810*/ uint16(xCondPrefix), 2,
+	0x66, 7822,
+	0x0, 7816,
+	/*7816*/ uint16(xSetOp), uint16(PSLLQ),
+	/*7818*/ uint16(xReadSlashR),
+	/*7819*/ uint16(xArgMm),
+	/*7820*/ uint16(xArgMmM64),
+	/*7821*/ uint16(xMatch),
+	/*7822*/ uint16(xSetOp), uint16(PSLLQ),
+	/*7824*/ uint16(xReadSlashR),
+	/*7825*/ uint16(xArgXmm1),
+	/*7826*/ uint16(xArgXmm2M128),
+	/*7827*/ uint16(xMatch),
+	/*7828*/ uint16(xCondPrefix), 2,
+	0x66, 7840,
+	0x0, 7834,
+	/*7834*/ uint16(xSetOp), uint16(PMULUDQ),
+	/*7836*/ uint16(xReadSlashR),
+	/*7837*/ uint16(xArgMm1),
+	/*7838*/ uint16(xArgMm2M64),
+	/*7839*/ uint16(xMatch),
+	/*7840*/ uint16(xSetOp), uint16(PMULUDQ),
+	/*7842*/ uint16(xReadSlashR),
+	/*7843*/ uint16(xArgXmm1),
+	/*7844*/ uint16(xArgXmm2M128),
+	/*7845*/ uint16(xMatch),
+	/*7846*/ uint16(xCondPrefix), 2,
+	0x66, 7858,
+	0x0, 7852,
+	/*7852*/ uint16(xSetOp), uint16(PMADDWD),
+	/*7854*/ uint16(xReadSlashR),
+	/*7855*/ uint16(xArgMm),
+	/*7856*/ uint16(xArgMmM64),
+	/*7857*/ uint16(xMatch),
+	/*7858*/ uint16(xSetOp), uint16(PMADDWD),
+	/*7860*/ uint16(xReadSlashR),
+	/*7861*/ uint16(xArgXmm1),
+	/*7862*/ uint16(xArgXmm2M128),
+	/*7863*/ uint16(xMatch),
+	/*7864*/ uint16(xCondPrefix), 2,
+	0x66, 7876,
+	0x0, 7870,
+	/*7870*/ uint16(xSetOp), uint16(PSADBW),
+	/*7872*/ uint16(xReadSlashR),
+	/*7873*/ uint16(xArgMm1),
+	/*7874*/ uint16(xArgMm2M64),
+	/*7875*/ uint16(xMatch),
+	/*7876*/ uint16(xSetOp), uint16(PSADBW),
+	/*7878*/ uint16(xReadSlashR),
+	/*7879*/ uint16(xArgXmm1),
+	/*7880*/ uint16(xArgXmm2M128),
+	/*7881*/ uint16(xMatch),
+	/*7882*/ uint16(xCondPrefix), 2,
+	0x66, 7894,
+	0x0, 7888,
+	/*7888*/ uint16(xSetOp), uint16(MASKMOVQ),
+	/*7890*/ uint16(xReadSlashR),
+	/*7891*/ uint16(xArgMm1),
+	/*7892*/ uint16(xArgMm2),
+	/*7893*/ uint16(xMatch),
+	/*7894*/ uint16(xSetOp), uint16(MASKMOVDQU),
+	/*7896*/ uint16(xReadSlashR),
+	/*7897*/ uint16(xArgXmm1),
+	/*7898*/ uint16(xArgXmm2),
+	/*7899*/ uint16(xMatch),
+	/*7900*/ uint16(xCondPrefix), 2,
+	0x66, 7912,
+	0x0, 7906,
+	/*7906*/ uint16(xSetOp), uint16(PSUBB),
+	/*7908*/ uint16(xReadSlashR),
+	/*7909*/ uint16(xArgMm),
+	/*7910*/ uint16(xArgMmM64),
+	/*7911*/ uint16(xMatch),
+	/*7912*/ uint16(xSetOp), uint16(PSUBB),
+	/*7914*/ uint16(xReadSlashR),
+	/*7915*/ uint16(xArgXmm1),
+	/*7916*/ uint16(xArgXmm2M128),
+	/*7917*/ uint16(xMatch),
+	/*7918*/ uint16(xCondPrefix), 2,
+	0x66, 7930,
+	0x0, 7924,
+	/*7924*/ uint16(xSetOp), uint16(PSUBW),
+	/*7926*/ uint16(xReadSlashR),
+	/*7927*/ uint16(xArgMm),
+	/*7928*/ uint16(xArgMmM64),
+	/*7929*/ uint16(xMatch),
+	/*7930*/ uint16(xSetOp), uint16(PSUBW),
+	/*7932*/ uint16(xReadSlashR),
+	/*7933*/ uint16(xArgXmm1),
+	/*7934*/ uint16(xArgXmm2M128),
+	/*7935*/ uint16(xMatch),
+	/*7936*/ uint16(xCondPrefix), 2,
+	0x66, 7948,
+	0x0, 7942,
+	/*7942*/ uint16(xSetOp), uint16(PSUBD),
+	/*7944*/ uint16(xReadSlashR),
+	/*7945*/ uint16(xArgMm),
+	/*7946*/ uint16(xArgMmM64),
+	/*7947*/ uint16(xMatch),
+	/*7948*/ uint16(xSetOp), uint16(PSUBD),
+	/*7950*/ uint16(xReadSlashR),
+	/*7951*/ uint16(xArgXmm1),
+	/*7952*/ uint16(xArgXmm2M128),
+	/*7953*/ uint16(xMatch),
+	/*7954*/ uint16(xCondPrefix), 2,
+	0x66, 7966,
+	0x0, 7960,
+	/*7960*/ uint16(xSetOp), uint16(PSUBQ),
+	/*7962*/ uint16(xReadSlashR),
+	/*7963*/ uint16(xArgMm1),
+	/*7964*/ uint16(xArgMm2M64),
+	/*7965*/ uint16(xMatch),
+	/*7966*/ uint16(xSetOp), uint16(PSUBQ),
+	/*7968*/ uint16(xReadSlashR),
+	/*7969*/ uint16(xArgXmm1),
+	/*7970*/ uint16(xArgXmm2M128),
+	/*7971*/ uint16(xMatch),
+	/*7972*/ uint16(xCondPrefix), 2,
+	0x66, 7984,
+	0x0, 7978,
+	/*7978*/ uint16(xSetOp), uint16(PADDB),
+	/*7980*/ uint16(xReadSlashR),
+	/*7981*/ uint16(xArgMm),
+	/*7982*/ uint16(xArgMmM64),
+	/*7983*/ uint16(xMatch),
+	/*7984*/ uint16(xSetOp), uint16(PADDB),
+	/*7986*/ uint16(xReadSlashR),
+	/*7987*/ uint16(xArgXmm1),
+	/*7988*/ uint16(xArgXmm2M128),
+	/*7989*/ uint16(xMatch),
+	/*7990*/ uint16(xCondPrefix), 2,
+	0x66, 8002,
+	0x0, 7996,
+	/*7996*/ uint16(xSetOp), uint16(PADDW),
+	/*7998*/ uint16(xReadSlashR),
+	/*7999*/ uint16(xArgMm),
+	/*8000*/ uint16(xArgMmM64),
+	/*8001*/ uint16(xMatch),
+	/*8002*/ uint16(xSetOp), uint16(PADDW),
+	/*8004*/ uint16(xReadSlashR),
+	/*8005*/ uint16(xArgXmm1),
+	/*8006*/ uint16(xArgXmm2M128),
+	/*8007*/ uint16(xMatch),
+	/*8008*/ uint16(xCondPrefix), 2,
+	0x66, 8020,
+	0x0, 8014,
+	/*8014*/ uint16(xSetOp), uint16(PADDD),
+	/*8016*/ uint16(xReadSlashR),
+	/*8017*/ uint16(xArgMm),
+	/*8018*/ uint16(xArgMmM64),
+	/*8019*/ uint16(xMatch),
+	/*8020*/ uint16(xSetOp), uint16(PADDD),
+	/*8022*/ uint16(xReadSlashR),
+	/*8023*/ uint16(xArgXmm1),
+	/*8024*/ uint16(xArgXmm2M128),
+	/*8025*/ uint16(xMatch),
+	/*8026*/ uint16(xSetOp), uint16(ADC),
+	/*8028*/ uint16(xReadSlashR),
+	/*8029*/ uint16(xArgRM8),
+	/*8030*/ uint16(xArgR8),
+	/*8031*/ uint16(xMatch),
+	/*8032*/ uint16(xCondIs64), 8035, 8051,
+	/*8035*/ uint16(xCondDataSize), 8039, 8045, 0,
+	/*8039*/ uint16(xSetOp), uint16(ADC),
+	/*8041*/ uint16(xReadSlashR),
+	/*8042*/ uint16(xArgRM16),
+	/*8043*/ uint16(xArgR16),
+	/*8044*/ uint16(xMatch),
+	/*8045*/ uint16(xSetOp), uint16(ADC),
+	/*8047*/ uint16(xReadSlashR),
+	/*8048*/ uint16(xArgRM32),
+	/*8049*/ uint16(xArgR32),
+	/*8050*/ uint16(xMatch),
+	/*8051*/ uint16(xCondDataSize), 8039, 8045, 8055,
+	/*8055*/ uint16(xSetOp), uint16(ADC),
+	/*8057*/ uint16(xReadSlashR),
+	/*8058*/ uint16(xArgRM64),
+	/*8059*/ uint16(xArgR64),
+	/*8060*/ uint16(xMatch),
+	/*8061*/ uint16(xSetOp), uint16(ADC),
+	/*8063*/ uint16(xReadSlashR),
+	/*8064*/ uint16(xArgR8),
+	/*8065*/ uint16(xArgRM8),
+	/*8066*/ uint16(xMatch),
+	/*8067*/ uint16(xCondIs64), 8070, 8086,
+	/*8070*/ uint16(xCondDataSize), 8074, 8080, 0,
+	/*8074*/ uint16(xSetOp), uint16(ADC),
+	/*8076*/ uint16(xReadSlashR),
+	/*8077*/ uint16(xArgR16),
+	/*8078*/ uint16(xArgRM16),
+	/*8079*/ uint16(xMatch),
+	/*8080*/ uint16(xSetOp), uint16(ADC),
+	/*8082*/ uint16(xReadSlashR),
+	/*8083*/ uint16(xArgR32),
+	/*8084*/ uint16(xArgRM32),
+	/*8085*/ uint16(xMatch),
+	/*8086*/ uint16(xCondDataSize), 8074, 8080, 8090,
+	/*8090*/ uint16(xSetOp), uint16(ADC),
+	/*8092*/ uint16(xReadSlashR),
+	/*8093*/ uint16(xArgR64),
+	/*8094*/ uint16(xArgRM64),
+	/*8095*/ uint16(xMatch),
+	/*8096*/ uint16(xSetOp), uint16(ADC),
+	/*8098*/ uint16(xReadIb),
+	/*8099*/ uint16(xArgAL),
+	/*8100*/ uint16(xArgImm8u),
+	/*8101*/ uint16(xMatch),
+	/*8102*/ uint16(xCondIs64), 8105, 8121,
+	/*8105*/ uint16(xCondDataSize), 8109, 8115, 0,
+	/*8109*/ uint16(xSetOp), uint16(ADC),
+	/*8111*/ uint16(xReadIw),
+	/*8112*/ uint16(xArgAX),
+	/*8113*/ uint16(xArgImm16),
+	/*8114*/ uint16(xMatch),
+	/*8115*/ uint16(xSetOp), uint16(ADC),
+	/*8117*/ uint16(xReadId),
+	/*8118*/ uint16(xArgEAX),
+	/*8119*/ uint16(xArgImm32),
+	/*8120*/ uint16(xMatch),
+	/*8121*/ uint16(xCondDataSize), 8109, 8115, 8125,
+	/*8125*/ uint16(xSetOp), uint16(ADC),
+	/*8127*/ uint16(xReadId),
+	/*8128*/ uint16(xArgRAX),
+	/*8129*/ uint16(xArgImm32),
+	/*8130*/ uint16(xMatch),
+	/*8131*/ uint16(xCondIs64), 8134, 0,
+	/*8134*/ uint16(xSetOp), uint16(PUSH),
+	/*8136*/ uint16(xArgSS),
+	/*8137*/ uint16(xMatch),
+	/*8138*/ uint16(xCondIs64), 8141, 0,
+	/*8141*/ uint16(xSetOp), uint16(POP),
+	/*8143*/ uint16(xArgSS),
+	/*8144*/ uint16(xMatch),
+	/*8145*/ uint16(xSetOp), uint16(SBB),
+	/*8147*/ uint16(xReadSlashR),
+	/*8148*/ uint16(xArgRM8),
+	/*8149*/ uint16(xArgR8),
+	/*8150*/ uint16(xMatch),
+	/*8151*/ uint16(xCondIs64), 8154, 8170,
+	/*8154*/ uint16(xCondDataSize), 8158, 8164, 0,
+	/*8158*/ uint16(xSetOp), uint16(SBB),
+	/*8160*/ uint16(xReadSlashR),
+	/*8161*/ uint16(xArgRM16),
+	/*8162*/ uint16(xArgR16),
+	/*8163*/ uint16(xMatch),
+	/*8164*/ uint16(xSetOp), uint16(SBB),
+	/*8166*/ uint16(xReadSlashR),
+	/*8167*/ uint16(xArgRM32),
+	/*8168*/ uint16(xArgR32),
+	/*8169*/ uint16(xMatch),
+	/*8170*/ uint16(xCondDataSize), 8158, 8164, 8174,
+	/*8174*/ uint16(xSetOp), uint16(SBB),
+	/*8176*/ uint16(xReadSlashR),
+	/*8177*/ uint16(xArgRM64),
+	/*8178*/ uint16(xArgR64),
+	/*8179*/ uint16(xMatch),
+	/*8180*/ uint16(xSetOp), uint16(SBB),
+	/*8182*/ uint16(xReadSlashR),
+	/*8183*/ uint16(xArgR8),
+	/*8184*/ uint16(xArgRM8),
+	/*8185*/ uint16(xMatch),
+	/*8186*/ uint16(xCondIs64), 8189, 8205,
+	/*8189*/ uint16(xCondDataSize), 8193, 8199, 0,
+	/*8193*/ uint16(xSetOp), uint16(SBB),
+	/*8195*/ uint16(xReadSla