| // Copyright 2016 The Go Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style |
| // license that can be found in the LICENSE file. |
| |
| package main |
| |
| import ( |
| "fmt" |
| "os" |
| "sort" |
| "strings" |
| ) |
| |
| // Clean up the data from the Intel manual for correctness |
| // and to annotate details relevant to decoding or encoding, |
| // such as whether an instruction is valid only in certain |
| // operand size modes. |
| |
| // encodeReplace maps (argument, encoding) pairs to the corrected argument. |
| // We use a suffix 1 for the register and 2 for the r/m in the modrm byte. |
| // We use a suffix V for a register number specified in the VEX.vvvv bits. |
| var encodeReplace = map[[2]string]string{ |
| {"mm", "ModRM:reg"}: "mm1", |
| {"mm", "ModRM:r/m"}: "mm2", |
| {"mm1", "ModRM:r/m"}: "mm2", |
| {"mm2", "ModRM:reg"}: "mm1", |
| {"mm/m32", "ModRM:r/m"}: "mm2/m32", |
| {"mm/m64", "ModRM:r/m"}: "mm2/m64", |
| {"xmm", "ModRM:reg"}: "xmm1", |
| {"xmm", "ModRM:r/m"}: "xmm2", |
| {"xmm/m64", "ModRM:r/m"}: "xmm2/m64", |
| {"xmm0", "ModRM:reg"}: "xmm1", |
| {"xmm1", "ModRM:r/m"}: "xmm2", |
| {"xmm1/m16", "ModRM:r/m"}: "xmm2/m16", |
| {"xmm1/m32", "ModRM:r/m"}: "xmm2/m32", |
| {"xmm1/m64", "ModRM:r/m"}: "xmm2/m64", |
| {"xmm1/m128", "ModRM:r/m"}: "xmm2/m128", |
| {"xmm1/m256", "ModRM:r/m"}: "xmm2/m256", |
| {"xmm/m16", "ModRM:r/m"}: "xmm2/m16", |
| {"xmm/m32", "ModRM:r/m"}: "xmm2/m32", |
| {"xmm/m64", "ModRM:r/m"}: "xmm2/m64", |
| {"xmm/m128", "ModRM:r/m"}: "xmm2/m128", |
| {"xmm/m256", "ModRM:r/m"}: "xmm2/m256", |
| {"xmm3", "ModRM:reg"}: "xmm1", |
| {"xmm3", "ModRM:r/m"}: "xmm2", |
| {"xmm3/m16", "ModRM:r/m"}: "xmm2/m16", |
| {"xmm3/m32", "ModRM:r/m"}: "xmm2/m32", |
| {"xmm3/m64", "ModRM:r/m"}: "xmm2/m64", |
| {"xmm3/m128", "ModRM:r/m"}: "xmm2/m128", |
| {"xmm3/m256", "ModRM:r/m"}: "xmm2/m256", |
| {"xmm2", "ModRM:reg"}: "xmm1", |
| {"xmm2/m16", "ModRM:reg"}: "xmm1/m16", |
| {"xmm2/m32", "ModRM:reg"}: "xmm1/m32", |
| {"xmm2/m64", "ModRM:reg"}: "xmm1/m64", |
| {"xmm2/m128", "ModRM:reg"}: "xmm1/m128", |
| {"xmm2/m256", "ModRM:reg"}: "xmm1/m256", |
| {"ymm", "ModRM:reg"}: "ymm1", |
| {"ymm", "ModRM:r/m"}: "ymm2", |
| {"ymm0", "ModRM:reg"}: "ymm1", |
| {"ymm1", "ModRM:r/m"}: "ymm2", |
| {"ymm1/m16", "ModRM:r/m"}: "ymm2/m16", |
| {"ymm1/m32", "ModRM:r/m"}: "ymm2/m32", |
| {"ymm1/m64", "ModRM:r/m"}: "ymm2/m64", |
| {"ymm1/m128", "ModRM:r/m"}: "ymm2/m128", |
| {"ymm1/m256", "ModRM:r/m"}: "ymm2/m256", |
| {"ymm3", "ModRM:reg"}: "ymm1", |
| {"ymm3", "ModRM:r/m"}: "ymm2", |
| {"ymm3/m16", "ModRM:r/m"}: "ymm2/m16", |
| {"ymm3/m32", "ModRM:r/m"}: "ymm2/m32", |
| {"ymm3/m64", "ModRM:r/m"}: "ymm2/m64", |
| {"ymm3/m128", "ModRM:r/m"}: "ymm2/m128", |
| {"ymm3/m256", "ModRM:r/m"}: "ymm2/m256", |
| {"ymm2", "ModRM:reg"}: "ymm1", |
| {"ymm2/m16", "ModRM:reg"}: "ymm1/m16", |
| {"ymm2/m32", "ModRM:reg"}: "ymm1/m32", |
| {"ymm2/m64", "ModRM:reg"}: "ymm1/m64", |
| {"ymm2/m128", "ModRM:reg"}: "ymm1/m128", |
| {"ymm2/m256", "ModRM:reg"}: "ymm1/m256", |
| {"xmm1", "VEX.vvvv"}: "xmmV", |
| {"xmm2", "VEX.vvvv"}: "xmmV", |
| {"ymm1", "VEX.vvvv"}: "ymmV", |
| {"ymm2", "VEX.vvvv"}: "ymmV", |
| {"xmm4", "imm8[7:4]"}: "xmmIH", |
| {"ymm4", "imm8[7:4]"}: "ymmIH", |
| {"r8", "opcode + rd"}: "r8op", |
| {"r16", "opcode + rd"}: "r16op", |
| {"r32", "opcode + rd"}: "r32op", |
| {"r64", "opcode + rd"}: "r64op", |
| {"reg/m32", "ModRM:r/m"}: "r/m32", |
| {"reg/m16", "ModRM:r/m"}: "r32/m16", |
| {"bnd", "ModRM:reg"}: "bnd1", |
| {"bnd2", "ModRM:reg"}: "bnd1", |
| {"bnd1/m64", "ModRM:r/m"}: "bnd2/m64", |
| {"bnd1/m128", "ModRM:r/m"}: "bnd2/m128", |
| {"r32a", "ModRM:reg"}: "r32", |
| {"r64a", "ModRM:reg"}: "r64", |
| {"r32", "VEX.vvvv"}: "r32V", |
| {"r64", "VEX.vvvv"}: "r64V", |
| {"r32b", "VEX.vvvv"}: "r32V", |
| {"r64b", "VEX.vvvv"}: "r64V", |
| {"r64", "VEX.vvvv"}: "r64V", |
| {"ST", "ST(0)"}: "ST(0)", |
| } |
| |
| // A few instructions do not have the usual encoding descriptions. |
| // Supply them. |
| var encodings = map[string][]string{ |
| "FADD m32fp": {"ModRM:r/m (r)"}, |
| "FADD m64fp": {"ModRM:r/m (r)"}, |
| "FADD ST(0), ST(i)": {"ST(0) (r, w)", "ST(i) (r)"}, |
| "FADD ST(i), ST(0)": {"ST(i) (r, w)", "ST(0) (r)"}, |
| "FADDP ST(i), ST(0)": {"ST(i) (r, w)", "ST(0) (r)"}, |
| "FIADD m32int": {"ModRM:r/m (r)"}, |
| "FIADD m16int": {"ModRM:r/m (r)"}, |
| "FBLD m80dec": {"ModRM:r/m (r)"}, |
| "FBSTP m80bcd": {"ModRM:r/m (w)"}, |
| "FCMOVB ST(0), ST(i)": {"ST(0) (r, w)", "ST(i) (r)"}, |
| "FCMOVE ST(0), ST(i)": {"ST(0) (r, w)", "ST(i) (r)"}, |
| "FCMOVBE ST(0), ST(i)": {"ST(0) (r, w)", "ST(i) (r)"}, |
| "FCMOVU ST(0), ST(i)": {"ST(0) (r, w)", "ST(i) (r)"}, |
| "FCMOVNB ST(0), ST(i)": {"ST(0) (r, w)", "ST(i) (r)"}, |
| "FCMOVNE ST(0), ST(i)": {"ST(0) (r, w)", "ST(i) (r)"}, |
| "FCMOVNBE ST(0), ST(i)": {"ST(0) (r, w)", "ST(i) (r)"}, |
| "FCMOVNU ST(0), ST(i)": {"ST(0) (r, w)", "ST(i) (r)"}, |
| "FCOM m32fp": {"ModRM:r/m (r)"}, |
| "FCOM m64fp": {"ModRM:r/m (r)"}, |
| "FCOM ST(i)": {"ST(i) (r)"}, |
| "FCOMP m32fp": {"ModRM:r/m (r)"}, |
| "FCOMP m64fp": {"ModRM:r/m (r)"}, |
| "FCOMP ST(i)": {"ST(i) (r)"}, |
| "FCOMI ST, ST(i)": {"ST(0) (r)", "ST(i) (r)"}, |
| "FCOMIP ST, ST(i)": {"ST(0) (r)", "ST(i) (r)"}, |
| "FUCOMI ST, ST(i)": {"ST(0) (r)", "ST(i) (r)"}, |
| "FUCOMIP ST, ST(i)": {"ST(0) (r)", "ST(i) (r)"}, |
| "FDIV m32fp": {"ModRM:r/m (r)"}, |
| "FDIV m64fp": {"ModRM:r/m (r)"}, |
| "FDIV ST(0), ST(i)": {"ST(0) (r, w)", "ST(i) (r)"}, |
| "FDIV ST(i), ST(0)": {"ST(i) (r, w)", "ST(0) (r)"}, |
| "FDIVP ST(i), ST(0)": {"ST(i) (r, w)", "ST(0) (r)"}, |
| "FIDIV m16int": {"ModRM:r/m (r)"}, |
| "FIDIV m32int": {"ModRM:r/m (r)"}, |
| "FIDIV m64int": {"ModRM:r/m (r)"}, |
| "FDIVR m32fp": {"ModRM:r/m (r)"}, |
| "FDIVR m64fp": {"ModRM:r/m (r)"}, |
| "FDIVR ST(0), ST(i)": {"ST(0) (r, w)", "ST(i) (r)"}, |
| "FDIVR ST(i), ST(0)": {"ST(i) (r, w)", "ST(0) (r)"}, |
| "FDIVRP ST(i), ST(0)": {"ST(i) (r, w)", "ST(0) (r)"}, |
| "FIDIVR m16int": {"ModRM:r/m (r)"}, |
| "FIDIVR m32int": {"ModRM:r/m (r)"}, |
| "FIDIVR m64int": {"ModRM:r/m (r)"}, |
| "FFREE ST(i)": {"ST(i) (w)"}, |
| "FICOM m16int": {"ModRM:r/m (r)"}, |
| "FICOM m32int": {"ModRM:r/m (r)"}, |
| "FICOMP m16int": {"ModRM:r/m (r)"}, |
| "FICOMP m32int": {"ModRM:r/m (r)"}, |
| "FILD m16int": {"ModRM:r/m (r)"}, |
| "FILD m32int": {"ModRM:r/m (r)"}, |
| "FILD m64int": {"ModRM:r/m (r)"}, |
| "FIST m16int": {"ModRM:r/m (w)"}, |
| "FIST m32int": {"ModRM:r/m (w)"}, |
| "FISTP m16int": {"ModRM:r/m (w)"}, |
| "FISTP m32int": {"ModRM:r/m (w)"}, |
| "FISTP m64int": {"ModRM:r/m (w)"}, |
| "FISTTP m16int": {"ModRM:r/m (w)"}, |
| "FISTTP m32int": {"ModRM:r/m (w)"}, |
| "FISTTP m64int": {"ModRM:r/m (w)"}, |
| "FLD m32fp": {"ModRM:r/m (r)"}, |
| "FLD m64fp": {"ModRM:r/m (r)"}, |
| "FLD m80fp": {"ModRM:r/m (r)"}, |
| "FLD ST(i)": {"ST(i) (r)"}, |
| "FLDCW m2byte": {"ModRM:r/m (r)"}, |
| "FLDENV m14/28byte": {"ModRM:r/m (r)"}, |
| "FMUL m32fp": {"ModRM:r/m (r)"}, |
| "FMUL m64fp": {"ModRM:r/m (r)"}, |
| "FMUL ST(0), ST(i)": {"ST(0) (r, w)", "ST(i) (r)"}, |
| "FMUL ST(i), ST(0)": {"ST(i) (r, w)", "ST(0) (r)"}, |
| "FMULP ST(i), ST(0)": {"ST(i) (r, w)", "ST(0) (r)"}, |
| "FIMUL m16int": {"ModRM:r/m (r)"}, |
| "FIMUL m32int": {"ModRM:r/m (r)"}, |
| "FRSTOR m94/108byte": {"ModRM:r/m (r)"}, |
| "FSAVE m94/108byte": {"ModRM:r/m (w)"}, |
| "FNSAVE m94/108byte": {"ModRM:r/m (w)"}, |
| "FST m32fp": {"ModRM:r/m (w)"}, |
| "FST m64fp": {"ModRM:r/m (w)"}, |
| "FST m80fp": {"ModRM:r/m (w)"}, |
| "FST ST(i)": {"ST(i) (w)"}, |
| "FSTP m32fp": {"ModRM:r/m (w)"}, |
| "FSTP m64fp": {"ModRM:r/m (w)"}, |
| "FSTP m80fp": {"ModRM:r/m (w)"}, |
| "FSTP ST(i)": {"ST(i) (w)"}, |
| "FSTCW m2byte": {"ModRM:r/m (w)"}, |
| "FNSTCW m2byte": {"ModRM:r/m (w)"}, |
| "FSTENV m14/28byte": {"ModRM:r/m (w)"}, |
| "FNSTENV m14/28byte": {"ModRM:r/m (w)"}, |
| "FSTSW m2byte": {"ModRM:r/m (w)"}, |
| "FSTSW AX": {"AX (w)"}, |
| "FNSTSW m2byte": {"ModRM:r/m (w)"}, |
| "FNSTSW AX": {"AX (w)"}, |
| "FSUB m32fp": {"ModRM:r/m (r)"}, |
| "FSUB m64fp": {"ModRM:r/m (r)"}, |
| "FSUB ST(0), ST(i)": {"ST(0) (r, w)", "ST(i) (r)"}, |
| "FSUB ST(i), ST(0)": {"ST(i) (r, w)", "ST(0) (r)"}, |
| "FSUBP ST(i), ST(0)": {"ST(i) (r, w)", "ST(0) (r)"}, |
| "FISUB m16int": {"ModRM:r/m (r)"}, |
| "FISUB m32int": {"ModRM:r/m (r)"}, |
| "FSUBR m32fp": {"ModRM:r/m (r)"}, |
| "FSUBR m64fp": {"ModRM:r/m (r)"}, |
| "FSUBR ST(0), ST(i)": {"ST(0) (r, w)", "ST(i) (r)"}, |
| "FSUBR ST(i), ST(0)": {"ST(i) (r, w)", "ST(0) (r)"}, |
| "FSUBRP ST(i), ST(0)": {"ST(i) (r, w)", "ST(0) (r)"}, |
| "FISUBR m16int": {"ModRM:r/m (r)"}, |
| "FISUBR m32int": {"ModRM:r/m (r)"}, |
| "FISUBR m64int": {"ModRM:r/m (r)"}, |
| "FUCOM ST(i)": {"ST(i) (r)"}, |
| "FUCOMP ST(i)": {"ST(i) (r)"}, |
| "FXCH ST(i)": {"ST(i) (r, w)"}, |
| "POP DS": {"DS (w)"}, |
| "POP ES": {"ES (w)"}, |
| "POP FS": {"FS (w)"}, |
| "POP GS": {"GS (w)"}, |
| "POP SS": {"SS (w)"}, |
| "POP CS": {"CS (w)"}, |
| "PUSH CS": {"CS (r)"}, |
| "PUSH DS": {"DS (r)"}, |
| "PUSH ES": {"ES (r)"}, |
| "PUSH FS": {"FS (r)"}, |
| "PUSH GS": {"GS (r)"}, |
| "PUSH SS": {"SS (r)"}, |
| "INT 3": {"3 (r)"}, |
| |
| // In manual but hard to parse |
| "BNDLDX bnd, mib": {"ModRM:reg (w)", "ModRM:r/m (r)"}, |
| "BNDSTX mib, bnd": {"ModRM:r/m (r)", "ModRM:reg (r)"}, |
| |
| // In manual but wrong |
| "CALL rel16": {"Offset"}, |
| "CALL rel32": {"Offset"}, |
| "IN AL, imm8": {"AL (w)", "imm8 (r)"}, |
| "IN AX, imm8": {"AX (w)", "imm8 (r)"}, |
| "IN EAX, imm8": {"EAX (w)", "imm8 (r)"}, |
| "IN AL, DX": {"AL (w)", "DX (r)"}, |
| "IN AX, DX": {"AX (w)", "DX (r)"}, |
| "IN EAX, DX": {"EAX (w)", "DX (r)"}, |
| "OUT DX, AL": {"DX (r)", "AL (r)"}, |
| "OUT DX, AX": {"DX (r)", "AX (r)"}, |
| "OUT DX, EAX": {"DX (r)", "EAX (r)"}, |
| "OUT imm8, AL": {"imm8 (r)", "AL (r)"}, |
| "OUT imm8, AX": {"imm8 (r)", "AX (r)"}, |
| "OUT imm8, EAX": {"imm8 (r)", "EAX (r)"}, |
| "XCHG AX, r16": {"AX (r, w)", "opcode + rd (r, w)"}, |
| "XCHG EAX, r32": {"EAX (r, w)", "opcode + rd (r, w)"}, |
| "XCHG RAX, r64": {"RAX (r, w)", "opcode + rd (r, w)"}, |
| |
| // Encoding not listed. |
| "INVEPT r32, m128": {"ModRM:reg (r)", "ModRM:r/m (r)"}, |
| "INVEPT r64, m128": {"ModRM:reg (r)", "ModRM:r/m (r)"}, |
| "INVVPID r32, m128": {"ModRM:reg (r)", "ModRM:r/m (r)"}, |
| "INVVPID r64, m128": {"ModRM:reg (r)", "ModRM:r/m (r)"}, |
| "VMREAD r/m32, r32": {"ModRM:r/m (w)", "ModRM:reg (r)"}, |
| "VMREAD r/m64, r64": {"ModRM:r/m (w)", "ModRM:reg (r)"}, |
| "VMWRITE r32, r/m32": {"ModRM:reg (r)", "ModRM:r/m (r)"}, |
| "VMWRITE r64, r/m64": {"ModRM:reg (r)", "ModRM:r/m (r)"}, |
| "VMCLEAR m64": {"ModRM:r/m (w)"}, |
| "VMPTRLD m64": {"ModRM:r/m (r)"}, |
| "VMPTRST m64": {"ModRM:r/m (w)"}, |
| "VMXON m64": {"ModRM:r/m (r)"}, |
| } |
| |
| // opAction lists the read/write actions for individual opcodes, |
| // where the manual does not. |
| var opAction = map[string][]string{ |
| "ADC": {"rw", "r"}, |
| "ADD": {"rw", "r"}, |
| "AND": {"rw", "r"}, |
| "BLENDVPD": {"rw", "r", "r"}, |
| "BLENDVPS": {"rw", "r", "r"}, |
| "IN": {"w", "r"}, |
| "MOV": {"w", "r"}, |
| "OR": {"rw", "r"}, |
| "OUT": {"r", "r"}, |
| "PBLENDVB": {"rw", "r", "r"}, |
| "RCL": {"rw", "r"}, |
| "RCR": {"rw", "r"}, |
| "ROL": {"rw", "r"}, |
| "ROR": {"rw", "r"}, |
| "SAL": {"rw", "r"}, |
| "SAR": {"rw", "r"}, |
| "SBB": {"rw", "r"}, |
| "SHL": {"rw", "r"}, |
| "SHLD": {"rw", "r", "r"}, |
| "SHR": {"rw", "r"}, |
| "SHRD": {"rw", "r", "r"}, |
| "SUB": {"rw", "r", "r"}, |
| "TEST": {"r", "r"}, |
| "VBLENDVPD": {"rw", "r", "r"}, |
| "VBLENDVPS": {"rw", "r", "r"}, |
| "VPBLENDVB": {"rw", "r", "r"}, |
| "VPMASKMOVD": {"w", "r", "r"}, |
| "VPMASKMOVQ": {"w", "r", "r"}, |
| "VPSLLVD": {"w", "r", "r"}, |
| "VPSRAVD": {"w", "r", "r"}, |
| "VPSRLVD": {"w", "r", "r"}, |
| "VPSRLVQ": {"w", "r", "r"}, |
| "VINSERTI128": {"w", "r", "r"}, |
| "VPBLENDD": {"w", "r", "r"}, |
| "VPERMD": {"w", "r", "r"}, |
| "VPERMPS": {"w", "r", "r"}, |
| "VPERM2I128": {"w", "r", "r"}, |
| "VPSLLVQ": {"w", "r", "r"}, |
| "XCHG": {"rw", "rw"}, |
| "XOR": {"rw", "r"}, |
| } |
| |
| // encodeOK lists valid arg, encoding pairs. |
| // Any pair not listed gets a warning. |
| var encodeOK = map[[2]string]bool{ |
| {"0", "imm8"}: true, |
| {"1", "1"}: true, |
| {"1", "imm8"}: true, |
| {"<XMM0>", "<XMM0>"}: true, |
| {"<XMM0>", "implicit XMM0"}: true, |
| {"AL", "AL"}: true, |
| {"AL", "AL/AX/EAX/RAX"}: true, |
| {"AX", "AL/AX/EAX/RAX"}: true, |
| {"AX", "AX"}: true, |
| {"AX", "AX/EAX/RAX"}: true, |
| {"CL", "CL"}: true, |
| {"CR0-CR7", "ModRM:reg"}: true, |
| {"CR8", ""}: true, |
| {"CS", "CS"}: true, |
| {"DR0-DR7", "ModRM:reg"}: true, |
| {"DS", "DS"}: true, |
| {"DX", "DX"}: true, |
| {"EAX", "AL/AX/EAX/RAX"}: true, |
| {"EAX", "AX/EAX/RAX"}: true, |
| {"EAX", "EAX"}: true, |
| {"ES", "ES"}: true, |
| {"FS", "FS"}: true, |
| {"GS", "GS"}: true, |
| {"RAX", "AL/AX/EAX/RAX"}: true, |
| {"RAX", "AX/EAX/RAX"}: true, |
| {"RAX", "RAX"}: true, |
| {"ST", "ST(0)"}: true, |
| {"ST(0)", "ST(0)"}: true, |
| {"ST(i)", "ST(i)"}: true, |
| {"Sreg", "ModRM:reg"}: true, |
| {"bnd1", "ModRM:reg"}: true, |
| {"bnd2/m128", "ModRM:r/m"}: true, |
| {"bnd2/m64", "ModRM:r/m"}: true, |
| {"imm16", "imm16"}: true, |
| {"imm16", "imm8"}: true, |
| {"imm16", "imm8/16/32"}: true, |
| {"imm16", "imm8/16/32"}: true, |
| {"imm16", "imm8/16/32/64"}: true, |
| {"imm16", "iw"}: true, |
| {"imm32", "imm8"}: true, |
| {"imm32", "imm8/16/32"}: true, |
| {"imm32", "imm8/16/32"}: true, |
| {"imm32", "imm8/16/32/64"}: true, |
| {"imm64", "imm8/16/32/64"}: true, |
| {"imm8", "imm8"}: true, |
| {"imm8", "imm8/16/32"}: true, |
| {"imm8", "imm8/16/32"}: true, |
| {"imm8", "imm8/16/32/64"}: true, |
| {"imm8", "imm8[3:0]"}: true, |
| {"m", "ModRM:r/m"}: true, |
| {"m128", "ModRM:r/m"}: true, |
| {"m14/28byte", "ModRM:r/m"}: true, |
| {"m16", "ModRM:r/m"}: true, |
| {"m16&16", "ModRM:r/m"}: true, |
| {"m16&32", "ModRM:r/m"}: true, |
| {"m16&64", "ModRM:r/m"}: true, |
| {"m16:16", "ModRM:r/m"}: true, |
| {"m16:16", "Offset"}: true, |
| {"m16:32", "ModRM:r/m"}: true, |
| {"m16:32", "Offset"}: true, |
| {"m16:64", "ModRM:r/m"}: true, |
| {"m16:64", "Offset"}: true, |
| {"m16int", "ModRM:r/m"}: true, |
| {"m256", "ModRM:r/m"}: true, |
| {"m2byte", "ModRM:r/m"}: true, |
| {"m32", "ModRM:r/m"}: true, |
| {"m32&32", "ModRM:r/m"}: true, |
| {"m32fp", "ModRM:r/m"}: true, |
| {"m32int", "ModRM:r/m"}: true, |
| {"m512byte", "ModRM:r/m"}: true, |
| {"m64", "ModRM:r/m"}: true, |
| {"m64fp", "ModRM:r/m"}: true, |
| {"m64int", "ModRM:r/m"}: true, |
| {"m8", "ModRM:r/m"}: true, |
| {"m80bcd", "ModRM:r/m"}: true, |
| {"m80dec", "ModRM:r/m"}: true, |
| {"m80fp", "ModRM:r/m"}: true, |
| {"m94/108byte", "ModRM:r/m"}: true, |
| {"mem", "ModRM:r/m"}: true, |
| {"mib", "ModRM:r/m"}: true, |
| {"mm/m32", "ModRM:r/m"}: true, |
| {"mm1", "ModRM:reg"}: true, |
| {"mm2", "ModRM:r/m"}: true, |
| {"mm2/m32", "ModRM:r/m"}: true, |
| {"mm2/m64", "ModRM:r/m"}: true, |
| {"moffs16", "Moffs"}: true, |
| {"moffs32", "Moffs"}: true, |
| {"moffs64", "Moffs"}: true, |
| {"moffs8", "Moffs"}: true, |
| {"ptr16:16", "Offset"}: true, |
| {"ptr16:32", "Offset"}: true, |
| {"r/m16", "ModRM:r/m"}: true, |
| {"r/m32", "ModRM:r/m"}: true, |
| {"r/m64", "ModRM:r/m"}: true, |
| {"r/m8", "ModRM:r/m"}: true, |
| {"r16", "ModRM:reg"}: true, |
| {"r16op", "opcode + rd"}: true, |
| {"r32", "ModRM:reg"}: true, |
| {"r32", "VEX.vvvv"}: true, |
| {"r32/m16", "ModRM:r/m"}: true, |
| {"r32/m8", "ModRM:r/m"}: true, |
| {"r32V", "VEX.vvvv"}: true, |
| {"r32op", "opcode + rd"}: true, |
| {"r64", "ModRM:reg"}: true, |
| {"r64/m16", "ModRM:r/m"}: true, |
| {"r64V", "VEX.vvvv"}: true, |
| {"r64op", "opcode + rd"}: true, |
| {"r8", "ModRM:reg"}: true, |
| {"r8op", "opcode + rd"}: true, |
| {"rel16", "Offset"}: true, |
| {"rel32", "Offset"}: true, |
| {"rel8", "Offset"}: true, |
| {"rmr16", "ModRM:r/m"}: true, |
| {"rmr32", "ModRM:r/m"}: true, |
| {"rmr64", "ModRM:r/m"}: true, |
| {"xmm/m128", "ModRM:r/m"}: true, |
| {"xmm/m32", "ModRM:r/m"}: true, |
| {"xmm1", "ModRM:reg"}: true, |
| {"xmm2", "ModRM:r/m"}: true, |
| {"xmm2/m128", "ModRM:r/m"}: true, |
| {"xmm2/m16", "ModRM:r/m"}: true, |
| {"xmm2/m32", "ModRM:r/m"}: true, |
| {"xmm2/m64", "ModRM:r/m"}: true, |
| {"xmm2/m8", "ModRM:r/m"}: true, |
| {"xmmIH", "imm8[7:4]"}: true, |
| {"xmmV", "VEX.vvvv"}: true, |
| {"ymm1", "ModRM:reg"}: true, |
| {"ymm2", "ModRM:r/m"}: true, |
| {"ymm2/m256", "ModRM:r/m"}: true, |
| {"ymmIH", "imm8[7:4]"}: true, |
| {"ymmV", "VEX.vvvv"}: true, |
| {"vm32x", "vsib"}: true, |
| {"vm64x", "vsib"}: true, |
| {"vm32y", "vsib"}: true, |
| {"vm64y", "vsib"}: true, |
| {"SS", "SS"}: true, |
| {"3", "3"}: true, |
| } |
| |
| // instBlacklist lists the instruction syntaxes to ignore when parsing. |
| // We exclude Intel's general forms for these not-actually-general instructions. |
| // The syntax makes it look like arbitrary memory operands can be used when in fact |
| // the exact address is fixed in all cases - [DI] or [SI], for example |
| var instBlacklist = map[string]bool{ |
| "CMPS m16, m16": true, |
| "CMPS m32, m32": true, |
| "CMPS m64, m64": true, |
| "CMPS m8, m8": true, |
| "INS m16, DX": true, |
| "INS m32, DX": true, |
| "INS m8, DX": true, |
| "LODS m16": true, |
| "LODS m32": true, |
| "LODS m64": true, |
| "LODS m8": true, |
| "MOVS m16, m16": true, |
| "MOVS m32, m32": true, |
| "MOVS m64, m64": true, |
| "MOVS m8, m8": true, |
| "OUTS DX, m16": true, |
| "OUTS DX, m32": true, |
| "OUTS DX, m8": true, |
| "REP INS m16, DX": true, |
| "REP INS m32, DX": true, |
| "REP INS m8, DX": true, |
| "REP INS r/m32, DX": true, |
| "REP LODS AL": true, |
| "REP LODS AX": true, |
| "REP LODS EAX": true, |
| "REP LODS RAX": true, |
| "REP MOVS m16, m16": true, |
| "REP MOVS m32, m32": true, |
| "REP MOVS m64, m64": true, |
| "REP MOVS m8, m8": true, |
| "REP OUTS DX, m16": true, |
| "REP OUTS DX, m32": true, |
| "REP OUTS DX, m8": true, |
| "REP OUTS DX, r/m16": true, |
| "REP OUTS DX, r/m32": true, |
| "REP OUTS DX, r/m8": true, |
| "REP STOS m16": true, |
| "REP STOS m32": true, |
| "REP STOS m64": true, |
| "REP STOS m8": true, |
| "REPE CMPS m16, m16": true, |
| "REPE CMPS m32, m32": true, |
| "REPE CMPS m64, m64": true, |
| "REPE CMPS m8, m8": true, |
| "REPE SCAS m16": true, |
| "REPE SCAS m32": true, |
| "REPE SCAS m64": true, |
| "REPE SCAS m8": true, |
| "REPNE CMPS m16, m16": true, |
| "REPNE CMPS m32, m32": true, |
| "REPNE CMPS m64, m64": true, |
| "REPNE CMPS m8, m8": true, |
| "REPNE SCAS m16": true, |
| "REPNE SCAS m32": true, |
| "REPNE SCAS m64": true, |
| "REPNE SCAS m8": true, |
| "SCAS m16": true, |
| "SCAS m32": true, |
| "SCAS m64": true, |
| "SCAS m8": true, |
| "STOS m16": true, |
| "STOS m32": true, |
| "STOS m64": true, |
| "STOS m8": true, |
| "XLAT m8": true, |
| } |
| |
| // condPrefs lists preferences for condition code suffixes. |
| // The first suffix in each pair takes priority over the second. |
| var condPrefs = [][2]string{ |
| {"B", "C"}, |
| {"B", "NAE"}, |
| {"AE", "NB"}, |
| {"AE", "NC"}, |
| {"E", "Z"}, |
| {"NE", "NZ"}, |
| {"BE", "NA"}, |
| {"A", "NBE"}, |
| {"P", "PE"}, |
| {"NP", "PO"}, |
| {"L", "NGE"}, |
| {"GE", "NL"}, |
| {"LE", "NG"}, |
| {"G", "NLE"}, |
| } |
| |
| // conv16 specifies replacements to turn a 16-bit syntax into a 32-bit syntax. |
| // If the conv16 can be applied to one form to create a new form with the same |
| // fixed instruction prefix, the pair is tagged as operand16 and operand32 |
| // respectively. |
| var conv16 = strings.NewReplacer( |
| "16:16", "16:32", |
| "16", "32", |
| "AX", "EAX", |
| "CBW", "CWDE", |
| "CMPSW", "CMPSD", |
| "CWD", "CDQ", |
| "INSW", "INSD", |
| "IRET", "IRETD", |
| "LODSW", "LODSD", |
| "MOVSW", "MOVSD", |
| "OUTSW", "OUTSD", |
| "POPA", "POPAD", |
| "POPF", "POPFD", |
| "PUSHA", "PUSHAD", |
| "PUSHF", "PUSHFD", |
| "SCASW", "SCASD", |
| "STOSW", "STOSD", |
| ) |
| |
| // fixup records additional modifications needed that are not derived |
| // from the instructions in the manual. It is keyed by the syntax and opcode. |
| var fixup = map[[2]string][]fixer{ |
| // NOP is a very special case overloading XCHG AX, AX. |
| // The decoder handles it in custom code; exclude from the usual tables. |
| {"NOP", "90"}: {fixAddTag("pseudo")}, |
| |
| // PAUSE is a special case of NOP. |
| {"PAUSE", "F3 90"}: {fixAddTag("pseudo")}, // used to add 'keepop' tag but not sure what that means |
| |
| // Far CALL, JMP, RET are given L prefix (long) for disambiguation. |
| {"CALL m16:16", "FF /3"}: {fixRename("CALL_FAR")}, |
| {"CALL m16:32", "FF /3"}: {fixRename("CALL_FAR")}, |
| {"CALL m16:64", "REX.W FF /3"}: {fixRename("CALL_FAR")}, |
| {"CALL ptr16:16", "9A cd"}: {fixRename("CALL_FAR")}, |
| {"CALL ptr16:32", "9A cp"}: {fixRename("CALL_FAR")}, |
| {"JMP m16:16", "FF /5"}: {fixRename("JMP_FAR")}, |
| {"JMP m16:32", "FF /5"}: {fixRename("JMP_FAR")}, |
| {"JMP m16:64", "REX.W FF /5"}: {fixRename("JMP_FAR")}, |
| {"JMP ptr16:16", "EA cd"}: {fixRename("JMP_FAR")}, |
| {"JMP ptr16:32", "EA cp"}: {fixRename("JMP_FAR")}, |
| {"RET imm16", "CA iw"}: {fixRename("RET_FAR"), fixArg(0, "imm16u")}, |
| {"RET", "CB"}: {fixRename("RET_FAR")}, |
| |
| // Unsigned immediates. (RET far imm16 handled above.) |
| // Some of these are just preferences for disassembling. |
| {"ENTER imm16, imm8", "C8 iw ib"}: {fixArg(1, "imm8b")}, |
| {"RET imm16", "C2 iw"}: {fixArg(0, "imm16u")}, |
| {"IN AL, imm8", "E4 ib"}: {fixArg(1, "imm8u")}, |
| {"IN AX, imm8", "E5 ib"}: {fixArg(1, "imm8u")}, |
| {"IN EAX, imm8", "E5 ib"}: {fixArg(1, "imm8u"), fixAddTag("operand64")}, |
| {"OUT imm8, AL", "E6 ib"}: {fixArg(0, "imm8u")}, |
| {"OUT imm8, AX", "E7 ib"}: {fixArg(0, "imm8u")}, |
| {"OUT imm8, EAX", "E7 ib"}: {fixArg(0, "imm8u"), fixAddTag("operand64")}, |
| {"MOV r8op, imm8", "B0+rb ib"}: {fixArg(1, "imm8u")}, |
| {"MOV r8op, imm8", "REX B0+rb ib"}: {fixArg(1, "imm8u"), fixAddTag("pseudo64")}, |
| {"MOV r/m8, imm8", "C6 /0 ib"}: {fixArg(1, "imm8u")}, |
| {"MOV r/m8, imm8", "REX C6 /0 ib"}: {fixArg(1, "imm8u"), fixAddTag("pseudo64")}, |
| |
| // The listings for MOVSX and MOVSXD do not list some variants that |
| // assemblers seem to allow. |
| // As a result, this instruction got the wrong tag. |
| // The other instructions are listed in extraInsts. |
| {"MOVSX r32, r/m16", "0F BF /r"}: {fixRemoveTag("operand16"), fixAddTag("operand32")}, |
| {"MOVZX r32, r/m16", "0F B7 /r"}: {fixRemoveTag("operand16")}, |
| |
| // Listings are incomplete or incorrect. Fix tags to adjust for new instructions below. |
| {"SLDT r/m16", "0F 00 /0"}: {fixRemoveTag("operand32")}, |
| {"STR r/m16", "0F 00 /1"}: {fixAddTag("operand16")}, |
| {"BSWAP r32op", "0F C8+rd"}: {fixRemoveTag("operand16")}, |
| {"MOV Sreg, r/m16", "8E /r"}: {fixRemoveTag("operand32")}, |
| {"MOV Sreg, r/m64", "REX.W 8E /r"}: {fixArg(1, "r/m16")}, |
| {"MOV r/m64, Sreg", "REX.W 8C /r"}: {fixArg(0, "r/m16")}, |
| {"MOV r/m16, Sreg", "8C /r"}: {fixRemoveTag("operand32")}, |
| {"MOV r/m64, imm32", "REX.W C7 /0 io"}: {fixOpcode("REX.W C7 /0 id")}, |
| |
| // On 64-bit, these ignore 64-bit mode change. |
| {"POP FS", "0F A1"}: {fixIfValid("N.E.", "V", fixAddTag("operand64"))}, |
| {"POP GS", "0F A9"}: {fixIfValid("N.E.", "V", fixAddTag("operand64"))}, |
| {"LEAVE", "C9"}: {fixIfValid("N.E.", "V", fixAddTag("operand64"))}, |
| |
| {"IN EAX, DX", "ED"}: {fixAddTag("operand64")}, |
| {"INSD", "6D"}: {fixAddTag("operand64")}, |
| {"OUT DX, EAX", "EF"}: {fixAddTag("operand64")}, |
| {"OUTSD", "6F"}: {fixAddTag("operand64")}, |
| {"XBEGIN rel32", "C7 F8 cd"}: {fixAddTag("operand64")}, |
| |
| // Treat FWAIT, not WAIT, as canonical. |
| {"FWAIT", "9B"}: {fixRemoveTag("pseudo")}, |
| {"WAIT", "9B"}: {fixAddTag("pseudo")}, |
| |
| // LAHF and SAHF are listed as "Invalid*" for 64-bit mode. |
| // They are actually defined, so Valid from our point of view. |
| // It's just that only a very few 64-bit processors allowed them. |
| {"LAHF", "9F"}: {fixValid("V", "V")}, |
| {"SAHF", "9E"}: {fixValid("V", "V")}, |
| |
| // The JZ forms are listed twice in the table, which confuses things. |
| {"JZ rel16", "0F 84 cw"}: {fixAddTag("operand16"), fixRemoveTag("operand32")}, |
| {"JZ rel32", "0F 84 cd"}: {fixAddTag("operand32"), fixRemoveTag("operand16")}, |
| |
| // XCHG has two of every instruction, which makes things bad. |
| // The XX hack below takes care of most problems but this one remains. |
| {"XCHG r/m16, r16", "87 /r"}: {fixRemoveTag("pseudo")}, |
| |
| // MOV CR8 is just the obvious extension of the MOV CR0-CR7 form. |
| {"MOV rmr64, CR8", "REX.R + 0F 20 /0"}: {fixAddTag("pseudo")}, |
| {"MOV CR8, rmr64", "REX.R + 0F 22 /0"}: {fixAddTag("pseudo")}, |
| |
| // TODO: EXPLAIN ALL THESE |
| {"ADCX r32, r/m32", "66 0F 38 F6 /r"}: {fixAddTag("operand16"), fixAddTag("operand32")}, |
| {"ADOX r32, r/m32", "F3 0F 38 F6 /r"}: {fixAddTag("operand16"), fixAddTag("operand32")}, |
| {"POPFQ", "9D"}: {fixAddTag("operand32"), fixAddTag("operand64")}, |
| {"PUSHFQ", "9C"}: {fixAddTag("operand32"), fixAddTag("operand64")}, |
| {"JCXZ rel8", "E3 cb"}: {fixAddTag("address16")}, |
| {"JECXZ rel8", "E3 cb"}: {fixAddTag("address32")}, |
| {"JRCXZ rel8", "E3 cb"}: {fixAddTag("address64")}, |
| {"PUSH r64op", "50+rd"}: {fixAddTag("operand32"), fixAddTag("operand64")}, |
| {"PUSH r/m64", "FF /6"}: {fixAddTag("operand32"), fixAddTag("operand64")}, |
| {"POP r64op", "58+rd"}: {fixAddTag("operand32"), fixAddTag("operand64")}, |
| {"POP r/m64", "8F /0"}: {fixAddTag("operand32"), fixAddTag("operand64")}, |
| {"SMSW r/m16", "0F 01 /4"}: {fixAddTag("operand16")}, |
| {"SMSW r32/m16", "0F 01 /4"}: {fixRemoveTag("operand16"), fixAddTag("operand32")}, |
| |
| // Express to the decoder that the rel16 only applies in 16-bit operand mode. |
| {"JA rel16", "0F 87 cw"}: {fixAddTag("operand16")}, |
| {"JAE rel16", "0F 83 cw"}: {fixAddTag("operand16")}, |
| {"JB rel16", "0F 82 cw"}: {fixAddTag("operand16")}, |
| {"JBE rel16", "0F 86 cw"}: {fixAddTag("operand16")}, |
| {"JE rel16", "0F 84 cw"}: {fixAddTag("operand16")}, |
| {"JG rel16", "0F 8F cw"}: {fixAddTag("operand16")}, |
| {"JGE rel16", "0F 8D cw"}: {fixAddTag("operand16")}, |
| {"JL rel16", "0F 8C cw"}: {fixAddTag("operand16")}, |
| {"JLE rel16", "0F 8E cw"}: {fixAddTag("operand16")}, |
| {"JNE rel16", "0F 85 cw"}: {fixAddTag("operand16")}, |
| {"JNO rel16", "0F 81 cw"}: {fixAddTag("operand16")}, |
| {"JNP rel16", "0F 8B cw"}: {fixAddTag("operand16")}, |
| {"JNS rel16", "0F 89 cw"}: {fixAddTag("operand16")}, |
| {"JO rel16", "0F 80 cw"}: {fixAddTag("operand16")}, |
| {"JP rel16", "0F 8A cw"}: {fixAddTag("operand16")}, |
| {"JS rel16", "0F 88 cw"}: {fixAddTag("operand16")}, |
| |
| {"JA rel32", "0F 87 cd"}: {fixAddTag("operand32")}, |
| {"JAE rel32", "0F 83 cd"}: {fixAddTag("operand32")}, |
| {"JB rel32", "0F 82 cd"}: {fixAddTag("operand32")}, |
| {"JBE rel32", "0F 86 cd"}: {fixAddTag("operand32")}, |
| {"JE rel32", "0F 84 cd"}: {fixAddTag("operand32")}, |
| {"JG rel32", "0F 8F cd"}: {fixAddTag("operand32")}, |
| {"JGE rel32", "0F 8D cd"}: {fixAddTag("operand32")}, |
| {"JL rel32", "0F 8C cd"}: {fixAddTag("operand32")}, |
| {"JLE rel32", "0F 8E cd"}: {fixAddTag("operand32")}, |
| {"JNE rel32", "0F 85 cd"}: {fixAddTag("operand32")}, |
| {"JNO rel32", "0F 81 cd"}: {fixAddTag("operand32")}, |
| {"JNP rel32", "0F 8B cd"}: {fixAddTag("operand32")}, |
| {"JNS rel32", "0F 89 cd"}: {fixAddTag("operand32")}, |
| {"JO rel32", "0F 80 cd"}: {fixAddTag("operand32")}, |
| {"JP rel32", "0F 8A cd"}: {fixAddTag("operand32")}, |
| {"JS rel32", "0F 88 cd"}: {fixAddTag("operand32")}, |
| |
| {"LSL r16, r/m16", "0F 03 /r"}: {fixAddTag("operand16")}, |
| } |
| |
| var extraInsts = []*instruction{ |
| // Undocumented. |
| {syntax: "ICEBP", opcode: "F1", valid32: "V", valid64: "V"}, |
| {syntax: "UD1", opcode: "0F B9", valid32: "V", valid64: "V"}, |
| {syntax: "FFREEP ST(i)", opcode: "DF C0+i", valid32: "V", valid64: "V", action: "w"}, |
| |
| // Where did these come from? They were in version 0.01 of the csv table. |
| {syntax: "MOVNTSD m64, xmm1", opcode: "F2 0F 2B /r", valid32: "V", valid64: "V", cpuid: "SSE", action: "w,r"}, |
| {syntax: "MOVNTSS m32, xmm1", opcode: "F3 0F 2B /r", valid32: "V", valid64: "V", cpuid: "SSE", action: "w,r"}, |
| |
| // These express to the decoder that in 64-bit mode |
| // an operand prefix does not affect the size of the relative offset. |
| {syntax: "CALL rel32", opcode: "E8 cd", valid32: "N.S.", valid64: "V", tags: []string{"operand16", "operand64"}, action: "r"}, |
| {syntax: "JMP rel32", opcode: "E9 cd", valid32: "N.S.", valid64: "V", tags: []string{"operand16", "operand64"}, action: "r"}, |
| {syntax: "JA rel32", opcode: "0F 87 cd", valid32: "N.S.", valid64: "V", tags: []string{"operand16", "operand64"}, action: "r"}, |
| {syntax: "JAE rel32", opcode: "0F 83 cd", valid32: "N.S.", valid64: "V", tags: []string{"operand16", "operand64"}, action: "r"}, |
| {syntax: "JB rel32", opcode: "0F 82 cd", valid32: "N.S.", valid64: "V", tags: []string{"operand16", "operand64"}, action: "r"}, |
| {syntax: "JBE rel32", opcode: "0F 86 cd", valid32: "N.S.", valid64: "V", tags: []string{"operand16", "operand64"}, action: "r"}, |
| {syntax: "JE rel32", opcode: "0F 84 cd", valid32: "N.S.", valid64: "V", tags: []string{"operand16", "operand64"}, action: "r"}, |
| {syntax: "JG rel32", opcode: "0F 8F cd", valid32: "N.S.", valid64: "V", tags: []string{"operand16", "operand64"}, action: "r"}, |
| {syntax: "JGE rel32", opcode: "0F 8D cd", valid32: "N.S.", valid64: "V", tags: []string{"operand16", "operand64"}, action: "r"}, |
| {syntax: "JL rel32", opcode: "0F 8C cd", valid32: "N.S.", valid64: "V", tags: []string{"operand16", "operand64"}, action: "r"}, |
| {syntax: "JLE rel32", opcode: "0F 8E cd", valid32: "N.S.", valid64: "V", tags: []string{"operand16", "operand64"}, action: "r"}, |
| {syntax: "JNE rel32", opcode: "0F 85 cd", valid32: "N.S.", valid64: "V", tags: []string{"operand16", "operand64"}, action: "r"}, |
| {syntax: "JNO rel32", opcode: "0F 81 cd", valid32: "N.S.", valid64: "V", tags: []string{"operand16", "operand64"}, action: "r"}, |
| {syntax: "JNP rel32", opcode: "0F 8B cd", valid32: "N.S.", valid64: "V", tags: []string{"operand16", "operand64"}, action: "r"}, |
| {syntax: "JNS rel32", opcode: "0F 89 cd", valid32: "N.S.", valid64: "V", tags: []string{"operand16", "operand64"}, action: "r"}, |
| {syntax: "JO rel32", opcode: "0F 80 cd", valid32: "N.S.", valid64: "V", tags: []string{"operand16", "operand64"}, action: "r"}, |
| {syntax: "JP rel32", opcode: "0F 8A cd", valid32: "N.S.", valid64: "V", tags: []string{"operand16", "operand64"}, action: "r"}, |
| {syntax: "JS rel32", opcode: "0F 88 cd", valid32: "N.S.", valid64: "V", tags: []string{"operand16", "operand64"}, action: "r"}, |
| |
| // Disassemblers recognize these, but they're not in the manual. |
| // Not sure if they really exist. |
| |
| // The 16-16 and 32-32 forms don't really make sense since there's nothing to extend. |
| {syntax: "MOVSX r16, r/m16", opcode: "0F BF /r", valid32: "V", valid64: "V", tags: []string{"operand16"}, action: "w,r"}, |
| {syntax: "MOVSXD r16, r/m32", opcode: "63 /r", valid32: "N.E.", valid64: "V", tags: []string{"operand16"}, action: "w,r"}, |
| {syntax: "MOVSXD r32, r/m32", opcode: "63 /r", valid32: "N.E.", valid64: "V", tags: []string{"operand32"}, action: "w,r"}, |
| {syntax: "MOVZX r16, r/m16", opcode: "0F B7 /r", valid32: "V", valid64: "V", tags: []string{"operand16"}, action: "w,r"}, |
| |
| {syntax: "LAR r64, r/m16", opcode: "REX.W 0F 02 /r", valid32: "N.E.", valid64: "V", action: "w,r"}, |
| {syntax: "SLDT r32/m16", opcode: "0F 00 /0", valid32: "V", valid64: "V", tags: []string{"operand32"}, action: "w"}, |
| {syntax: "STR r32/m16", opcode: "0F 00 /1", valid32: "V", valid64: "V", tags: []string{"operand32"}, action: "w"}, |
| {syntax: "STR r64/m16", opcode: "REX.W 0F 00 /1", valid32: "N.E.", valid64: "V", action: "w"}, |
| |
| {syntax: "BSWAP r16op", opcode: "0F C8+rd", valid32: "V", valid64: "V", tags: []string{"operand16"}, action: "rw"}, |
| |
| // Do these exist? |
| // I am not sure where they came from, and xed doesn't recognize them. |
| //{syntax: "MOV TR0-TR7, rmr32", opcode: "0F 26 /r", valid32: "V", valid64: "N.E.", tags: []string{"modrm_regonly"}, action: "w,r"}, |
| //{syntax: "MOV TR0-TR7, rmr64", opcode: "0F 26 /r", valid32: "N.E.", valid64: "V", tags: []string{"modrm_regonly"}, action: "w,r"}, |
| //{syntax: "MOV rmr32, TR0-TR7", opcode: "0F 24 /r", valid32: "V", valid64: "N.E.", tags: []string{"modrm_regonly"}, action: "w,r"}, |
| //{syntax: "MOV rmr64, TR0-TR7", opcode: "0F 24 /r", valid32: "N.E.", valid64: "V", tags: []string{"modrm_regonly"}, action: "w,r"}, |
| {syntax: "MOV Sreg, r32/m16", opcode: "8E /r", valid32: "V", valid64: "V", tags: []string{"operand32"}, action: "w,r"}, |
| {syntax: "MOV r/m32, Sreg", opcode: "8C /r", valid32: "V", valid64: "V", tags: []string{"operand32"}, action: "w,r"}, |
| } |
| |
| type fixer func(*instruction) |
| |
| func fixAddTag(tag string) fixer { |
| return func(inst *instruction) { |
| addTag(inst, tag) |
| } |
| } |
| |
| func fixRemoveTag(tag string) fixer { |
| return func(inst *instruction) { |
| removeTag(inst, tag) |
| } |
| } |
| |
| func fixRename(op string) fixer { |
| return func(inst *instruction) { |
| _, args := splitSyntax(inst.syntax) |
| inst.syntax = joinSyntax(op, args) |
| } |
| } |
| |
| func fixArg(i int, arg string) fixer { |
| return func(inst *instruction) { |
| op, args := splitSyntax(inst.syntax) |
| args[i] = arg |
| inst.syntax = joinSyntax(op, args) |
| } |
| } |
| |
| func fixIfValid(valid32, valid64 string, fix fixer) fixer { |
| return func(inst *instruction) { |
| if inst.valid32 == valid32 && inst.valid64 == valid64 { |
| fix(inst) |
| } |
| } |
| } |
| |
| func fixValid(valid32, valid64 string) fixer { |
| return func(inst *instruction) { |
| inst.valid32 = valid32 |
| inst.valid64 = valid64 |
| } |
| } |
| |
| func fixOpcode(opcode string) fixer { |
| return func(inst *instruction) { |
| inst.opcode = opcode |
| } |
| } |
| |
| func cleanup(insts []*instruction) []*instruction { |
| var haveOp map[string]bool |
| if onlySomePages { |
| haveOp = map[string]bool{} |
| } |
| |
| // Clean individual instruction encodings and opcode sequences. |
| sawJZ := map[string]bool{} |
| out := insts[:0] |
| for seq, inst := range insts { |
| inst.seq = seq |
| |
| // There are two copies each of JZ rel16 and JZ rel32. Delete the second. |
| if strings.HasPrefix(inst.syntax, "JZ rel") { |
| if sawJZ[inst.syntax] { |
| continue |
| } |
| sawJZ[inst.syntax] = true |
| } |
| out = append(out, inst) |
| |
| // Intel CMPXCHG16B and CMPXCHG8B have surprise "m64" or " m128" at end of encoding. |
| surprises := []string{ |
| " m64", |
| " m128", |
| } |
| for _, s := range surprises { |
| if strings.HasSuffix(inst.syntax, s) && strings.HasSuffix(inst.opcode, s) { |
| inst.opcode = strings.TrimSuffix(inst.opcode, s) |
| } |
| } |
| |
| op, args := splitSyntax(inst.syntax) |
| op = strings.TrimRight(op, "*") |
| inst.syntax = joinSyntax(op, args) |
| |
| // Check argument names in syntax against encoding details. |
| if enc, ok := encodings[inst.syntax]; ok { |
| inst.args = enc |
| } |
| if len(args) == len(inst.args)+1 && args[len(args)-1] == "imm8" { |
| fixed := make([]string, len(args)) |
| copy(fixed, inst.args) |
| fixed[len(args)-1] = "imm8" |
| inst.args = fixed |
| } else if len(args) == 0 && len(inst.args) == 1 && inst.args[0] == "NA" { |
| inst.args = []string{} |
| } else if len(args) != len(inst.args) { |
| fmt.Fprintf(os.Stderr, "p.%d: %s has %d args but %d encoding details:\n\t%s\n", inst.page, inst.syntax, len(args), len(inst.args), strings.Join(inst.args, "; ")) |
| inst.syntax = joinSyntax(op, args) |
| continue |
| } |
| |
| var action []string |
| for i, arg := range args { |
| arg = strings.TrimSpace(arg) |
| arg = strings.TrimRight(arg, "*") |
| if (arg == "reg" || strings.HasPrefix(arg, "reg/")) && containsAll(inst.desc, "upper bits", "r64", "zero") { |
| arg = "r32" + strings.TrimPrefix(arg, "reg") |
| } |
| |
| enc := inst.args[i] |
| enc = strings.TrimSpace(enc) |
| switch { |
| case strings.HasSuffix(enc, " (r))"): |
| enc = strings.TrimSuffix(enc, ")") |
| case strings.HasSuffix(enc, " (R)"): |
| enc = strings.TrimSuffix(enc, " (R)") + " (r)" |
| case strings.HasSuffix(enc, " (W)"): |
| enc = strings.TrimSuffix(enc, " (W)") + " (w)" |
| case strings.HasSuffix(enc, " (r,w)"): |
| enc = strings.TrimSuffix(enc, " (r,w)") + " (r, w)" |
| case enc == "Imm8": |
| enc = "imm8" |
| case enc == "imm8/26/32": |
| enc = "imm8/16/32" |
| case enc == "BaseReg (R): VSIB:base, VectorReg(R): VSIB:index": |
| enc = "vsib (r)" |
| } |
| inst.args[i] = enc |
| |
| switch { |
| case strings.HasSuffix(enc, " (r)"): |
| action = append(action, "r") |
| enc = strings.TrimSuffix(enc, " (r)") |
| case strings.HasSuffix(enc, " (w)"): |
| action = append(action, "w") |
| enc = strings.TrimSuffix(enc, " (w)") |
| case strings.HasSuffix(enc, " (r, w)"): |
| action = append(action, "rw") |
| enc = strings.TrimSuffix(enc, " (r, w)") |
| case strings.HasPrefix(enc, "imm"), enc == "Offset", enc == "iw", arg == "1", arg == "0", arg == "3": |
| action = append(action, "r") |
| case i < len(opAction[op]): |
| action = append(action, opAction[op][i]) |
| default: |
| fmt.Fprintf(os.Stderr, "p.%d: %s has encoding %s for %s but no r/w annotations\n", inst.page, inst.syntax, enc, arg) |
| action = append(action, "?") |
| } |
| |
| if arg == "mem" && op == "LDDQU" { |
| arg = "m128" |
| } |
| if arg == "reg" && op == "LAR" { |
| arg = "r32" |
| } |
| if actual := encodeReplace[[2]string{arg, enc}]; actual != "" { |
| arg = actual |
| } |
| |
| if (arg == "r8" || arg == "r16" || arg == "r32" || arg == "r64") && enc == "ModRM:r/m" { |
| addTag(inst, "modrm_regonly") |
| arg = "rmr" + arg[1:] |
| } |
| if (arg == "xmm2" || arg == "ymm2") && enc == "ModRM:r/m" { |
| addTag(inst, "modrm_regonly") |
| } |
| |
| if (arg == "m8" || arg == "m16" || arg == "m32" || arg == "m64" || arg == "m128" || arg == "m256") && enc == "ModRM:r/m" { |
| addTag(inst, "modrm_memonly") |
| } |
| |
| if arg == "r64" && (inst.syntax == "MOV r64, CR8" || inst.syntax == "MOV CR8, r64") { |
| arg = "rmr64" |
| addTag(inst, "modrm_regonly") |
| } |
| if arg == "CR8" { |
| enc = "" |
| } |
| |
| if !encodeOK[[2]string{arg, enc}] { |
| fmt.Fprintf(os.Stderr, "p.%d: %s has invalid encoding %s for %s\n\t{%q, %q}: true,\n", inst.page, inst.syntax, enc, arg, arg, enc) |
| } |
| |
| args[i] = arg |
| |
| // Intel SETcc and others are missing the /r. |
| // But CALL rel16 and CALL rel32 have a bad encoding table so ignore the ModRM there. |
| if strings.HasPrefix(enc, "ModRM") && !strings.Contains(inst.opcode, " /") && op != "CALL" { |
| inst.opcode += " /r" |
| } |
| if strings.HasPrefix(enc, "ModRM:reg") && !strings.Contains(inst.opcode, "/r") { |
| // The opcode is taken up with something else. Bug in table. |
| fmt.Fprintf(os.Stderr, "p.%d: %s has invalid encoding %s: no reg field in %s\n", inst.page, inst.syntax, arg, inst.opcode) |
| } |
| // XBEGIN is missing cw cd. |
| if enc == "Offset" && arg == "rel16" && !strings.Contains(inst.opcode, " cw") { |
| inst.opcode += " cw" |
| } |
| if enc == "Offset" && arg == "rel32" && !strings.Contains(inst.opcode, " cd") { |
| inst.opcode += " cd" |
| } |
| if enc == "Moffs" && !strings.Contains(inst.opcode, "cm") { |
| inst.opcode += " cm" |
| } |
| |
| inst.action = strings.Join(action, ",") |
| } |
| |
| inst.syntax = joinSyntax(op, args) |
| |
| // The Intel manual lists each XCHG form with arguments in both orders. |
| // While this is technically correct, it confuses lots of the analysis. |
| // Change half of them to start with a fake "XX" byte. |
| if op == "XCHG" && !strings.HasPrefix(args[0], "r/") && !strings.HasSuffix(args[0], "op") { |
| inst.opcode = "XX " + inst.opcode |
| } |
| |
| // Intel manual is not great about disabling REX instructions on 32-bit systems. |
| if strings.Contains(inst.opcode, "REX") && inst.valid32 == "V" { |
| inst.valid32 = "N.E." |
| } |
| |
| if inst.valid32 == "V" { |
| switch { |
| case containsAll(inst.compat, "not supported", "earlier than the Intel486"): |
| inst.cpuid = "486" |
| case containsAll(inst.compat, "not supported", "earlier than the Pentium"), |
| containsAll(inst.compat, "were introduced", "with the Pentium"): |
| inst.cpuid = "Pentium" |
| case containsAll(inst.compat, "were introduced", "in the Pentium II"): |
| inst.cpuid = "PentiumII" |
| case containsAll(inst.compat, "were introduced", "in the P6 family"), |
| containsAll(inst.compat, "were introduced in P6 family"): |
| addTag(inst, "P6") |
| } |
| } |
| |
| if onlySomePages { |
| op, _ := splitSyntax(inst.syntax) |
| haveOp[op] = true |
| } |
| } |
| |
| insts = out |
| sort.Sort(byOpcode(insts)) |
| |
| // Detect operand size dependencies. |
| var last *instruction |
| for _, inst := range insts { |
| if last != nil { |
| f1, _ := splitOpcode(last.opcode) |
| f2, _ := splitOpcode(inst.opcode) |
| if f1 == f2 { |
| // Conflict: cannot distinguish instructions based on fixed prefix. |
| if is16vs32pair(last, inst) { |
| addTag(last, "operand16") |
| addTag(inst, "operand32") |
| continue |
| } |
| if is16vs32pair(inst, last) { |
| addTag(last, "operand32") |
| addTag(inst, "operand16") |
| last = inst |
| continue |
| } |
| } |
| } |
| last = inst |
| } |
| |
| // Detect pseudo-ops, defined as opcode entries subsumed by more general ones. |
| seen := map[string]*instruction{} |
| for _, inst := range insts { |
| if strings.HasPrefix(inst.opcode, "9B ") { // FWAIT prefix |
| addTag(inst, "pseudo") |
| continue |
| } |
| if inst.opcode == "F0" || inst.opcode == "F2" || inst.opcode == "F3" { |
| addTag(inst, "pseudo") |
| continue |
| } |
| if strings.HasPrefix(inst.syntax, "REP ") || strings.HasPrefix(inst.syntax, "REPE ") || strings.HasPrefix(inst.syntax, "REPNE ") { |
| addTag(inst, "pseudo") |
| continue |
| } |
| if strings.HasPrefix(inst.syntax, "SAL ") { // SHL is canonical |
| addTag(inst, "pseudo") |
| continue |
| } |
| if old := seen[inst.opcode]; old != nil { |
| if condLess(old.syntax, inst.syntax) { |
| addTag(inst, "pseudo") |
| continue |
| } |
| if xchgLess(inst.syntax, old.syntax) { |
| old.tags = append(old.tags, "pseudo") |
| seen[inst.opcode] = inst |
| continue |
| } |
| } |
| |
| seen[inst.opcode] = inst |
| |
| if last != nil && canGenerate(last.opcode, inst.opcode) { |
| addTag(inst, "pseudo") |
| continue |
| } |
| last = inst |
| } |
| for _, inst := range insts { |
| if strings.Contains(inst.opcode, "REX ") { |
| if old := seen[strings.Replace(inst.opcode, "REX ", "", 1)]; old != nil && old.syntax == inst.syntax { |
| addTag(inst, "pseudo64") |
| continue |
| } else if old != nil && hasTag(old, "pseudo") { |
| addTag(inst, "pseudo") |
| continue |
| } |
| } |
| if strings.Contains(inst.opcode, "REX.W ") { |
| if old := seen[strings.Replace(inst.opcode, "REX.W ", "", -1)]; old != nil && old.syntax == inst.syntax { |
| addTag(old, "ignoreREXW") |
| addTag(inst, "pseudo") |
| continue |
| } else if old != nil && hasTag(old, "pseudo") { |
| addTag(inst, "pseudo") |
| continue |
| } else if old != nil && !hasTag(old, "operand16") && !hasTag(old, "operand32") { |
| // There is a 64-bit form of this instruction. |
| // Mark this one as only valid in the non-64-bit operand modes. |
| addTag(old, "operand16") |
| addTag(old, "operand32") |
| continue |
| } |
| } |
| } |
| |
| // Undo XCHG hack above. |
| for _, inst := range insts { |
| if strings.HasPrefix(inst.opcode, "XX ") { |
| inst.opcode = strings.TrimPrefix(inst.opcode, "XX ") |
| addTag(inst, "pseudo") |
| removeTag(inst, "pseudo64") |
| } |
| } |
| |
| // Last ditch effort. Manual fixes. |
| // Some things are too hard to infer. |
| for _, inst := range insts { |
| for _, fix := range fixup[[2]string{inst.syntax, inst.opcode}] { |
| fix(inst) |
| } |
| sort.Strings(inst.tags) |
| } |
| |
| sort.Sort(bySeq(insts)) |
| |
| if onlySomePages { |
| for _, inst := range extraInsts { |
| op, _ := splitSyntax(inst.syntax) |
| if haveOp[op] { |
| insts = append(insts, inst) |
| } |
| } |
| } else { |
| insts = append(insts, extraInsts...) |
| } |
| return insts |
| } |
| |
| func hasTag(inst *instruction, tag string) bool { |
| for _, t := range inst.tags { |
| if t == tag { |
| return true |
| } |
| } |
| return false |
| } |
| |
| func removeTag(inst *instruction, tag string) { |
| if !hasTag(inst, tag) { |
| return |
| } |
| out := inst.tags[:0] |
| for _, t := range inst.tags { |
| if t != tag { |
| out = append(out, t) |
| } |
| } |
| inst.tags = out |
| } |
| |
| func addTag(inst *instruction, tag string) { |
| if !hasTag(inst, tag) { |
| inst.tags = append(inst.tags, tag) |
| } |
| } |
| |
| type byOpcode []*instruction |
| |
| func (x byOpcode) Len() int { return len(x) } |
| func (x byOpcode) Swap(i, j int) { x[i], x[j] = x[j], x[i] } |
| func (x byOpcode) Less(i, j int) bool { |
| if x[i].opcode != x[j].opcode { |
| return opcodeLess(x[i].opcode, x[j].opcode) |
| } |
| if condLess(x[i].syntax, x[j].syntax) { |
| return true |
| } |
| if condLess(x[j].syntax, x[i].syntax) { |
| return false |
| } |
| if x[i].syntax != x[j].syntax { |
| return x[i].syntax < x[j].syntax |
| } |
| return x[i].seq < x[j].seq |
| } |
| |
| type bySeq []*instruction |
| |
| func (x bySeq) Len() int { return len(x) } |
| func (x bySeq) Swap(i, j int) { x[i], x[j] = x[j], x[i] } |
| func (x bySeq) Less(i, j int) bool { |
| return x[i].seq < x[j].seq |
| } |
| |
| type bySyntax []*instruction |
| |
| func (x bySyntax) Len() int { return len(x) } |
| func (x bySyntax) Swap(i, j int) { x[i], x[j] = x[j], x[i] } |
| func (x bySyntax) Less(i, j int) bool { |
| if x[i].syntax != x[j].syntax { |
| return x[i].syntax < x[j].syntax |
| } |
| return x[i].opcode < x[j].opcode |
| } |
| |
| // condLess reports whether the conditional instruction syntax |
| // x should be considered less than y. |
| // We sort condition codes we prefer ahead of condition codes we don't, |
| // so that the latter are recorded as the pseudo-operations. |
| func condLess(x, y string) bool { |
| x, _ = splitSyntax(x) |
| y, _ = splitSyntax(y) |
| for _, pref := range condPrefs { |
| if strings.HasSuffix(x, pref[0]) && strings.HasSuffix(y, pref[1]) && strings.TrimSuffix(x, pref[0]) == strings.TrimSuffix(y, pref[1]) { |
| return true |
| } |
| } |
| return false |
| } |
| |
| // xchgLess reports whether the xchg instruction x should be considered less than y. |
| func xchgLess(x, y string) bool { |
| return strings.HasPrefix(x, "XCHG ") && x > y |
| } |
| |
| // opcodeLess reports whether opcode string x should be considered less than y. |
| // We sort wildcard fields like "ib" before literal bytes like "0A". |
| func opcodeLess(x, y string) bool { |
| for i := 0; i < len(x) || i < len(y); i++ { |
| if i >= len(x) { |
| return true |
| } |
| if i >= len(y) { |
| return false |
| } |
| if x[i] != y[i] { |
| // sort word before doubleword |
| if x[i] == 'w' && y[i] == 'd' { |
| return true |
| } |
| if x[i] == 'd' && y[i] == 'w' { |
| return false |
| } |
| // Sort lower-case before non-lower-case. |
| // This sorts "ib" before literal bytes like "0A", for example. |
| return x[i]-'a' < y[i]-'a' |
| } |
| } |
| return false |
| } |
| |
| // splitOpcode splits an opcode into its fixed and variable portions. |
| // For example "05 iw" splits into "05" and "iw". |
| func splitOpcode(x string) (fixed, variable string) { |
| i := 0 |
| for i < len(x) { |
| c := x[i] |
| if '0' <= c && c <= '9' || 'A' <= c && c <= 'Z' || c == ' ' || c == '.' || c == '+' { |
| i++ |
| continue |
| } |
| if i+2 <= len(x) && c == '/' { |
| i += 2 |
| continue |
| } |
| break |
| } |
| return strings.TrimSpace(x[:i]), x[i:] |
| } |
| |
| // canGenerate reports whether opcode string x can generate opcode string y. |
| // For example "D5 ib" can generate "D5 0A". |
| // Any string x is not considered to generate itself. |
| func canGenerate(x, y string) bool { |
| i := 0 |
| for i < len(x) && i < len(y) && x[i] == y[i] { |
| i++ |
| } |
| if i == len(x) || i == len(y) { |
| return false |
| } |
| switch x[i:] { |
| case "ib": |
| return len(y[i:]) == 2 && allHex(y[i:]) |
| case "0+i": |
| return len(y[i:]) == 1 && '0' <= y[i] && y[i] <= '7' |
| case "8+i": |
| return len(y[i:]) == 1 && (y[i] == '8' || y[i] == '9' || 'A' <= y[i] && y[i] <= 'F') |
| } |
| return false |
| } |
| |
| // allHex reports whether s is entirely hex digits. |
| func allHex(s string) bool { |
| for _, c := range s { |
| if '0' <= c && c <= '9' || 'A' <= c && c <= 'F' { |
| continue |
| } |
| return false |
| } |
| return true |
| } |
| |
| // is16vs32pair reports whether x and y are the 16- and 32-bit variants of the same instruction, |
| // based on analysis of the mnemonic syntax. |
| func is16vs32pair(x, y *instruction) bool { |
| return conv16.Replace(x.syntax) == y.syntax || |
| strings.Replace(x.syntax, "r16, r/", "r32, r32/", -1) == y.syntax || // LSL etc |
| strings.Replace(x.syntax, "r16", "r32", 1) == y.syntax // MOVSXD, MOVSX, etc |
| } |
| |
| func containsAll(x string, targ ...string) bool { |
| for _, y := range targ { |
| i := strings.Index(x, y) |
| if i < 0 { |
| return false |
| } |
| x = x[i+len(y):] |
| } |
| return true |
| } |