blob: 76f36380783df5290f635370100288d172a10eb9 [file] [log] [blame]
// Copyright 2016 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package main
import (
"fmt"
"os"
"sort"
"strings"
)
// Clean up the data from the Intel manual for correctness
// and to annotate details relevant to decoding or encoding,
// such as whether an instruction is valid only in certain
// operand size modes.
// encodeReplace maps (argument, encoding) pairs to the corrected argument.
// We use a suffix 1 for the register and 2 for the r/m in the modrm byte.
// We use a suffix V for a register number specified in the VEX.vvvv bits.
var encodeReplace = map[[2]string]string{
{"mm", "ModRM:reg"}: "mm1",
{"mm", "ModRM:r/m"}: "mm2",
{"mm1", "ModRM:r/m"}: "mm2",
{"mm2", "ModRM:reg"}: "mm1",
{"mm/m32", "ModRM:r/m"}: "mm2/m32",
{"mm/m64", "ModRM:r/m"}: "mm2/m64",
{"xmm", "ModRM:reg"}: "xmm1",
{"xmm", "ModRM:r/m"}: "xmm2",
{"xmm/m64", "ModRM:r/m"}: "xmm2/m64",
{"xmm0", "ModRM:reg"}: "xmm1",
{"xmm1", "ModRM:r/m"}: "xmm2",
{"xmm1/m16", "ModRM:r/m"}: "xmm2/m16",
{"xmm1/m32", "ModRM:r/m"}: "xmm2/m32",
{"xmm1/m64", "ModRM:r/m"}: "xmm2/m64",
{"xmm1/m128", "ModRM:r/m"}: "xmm2/m128",
{"xmm1/m256", "ModRM:r/m"}: "xmm2/m256",
{"xmm/m16", "ModRM:r/m"}: "xmm2/m16",
{"xmm/m32", "ModRM:r/m"}: "xmm2/m32",
{"xmm/m64", "ModRM:r/m"}: "xmm2/m64",
{"xmm/m128", "ModRM:r/m"}: "xmm2/m128",
{"xmm/m256", "ModRM:r/m"}: "xmm2/m256",
{"xmm3", "ModRM:reg"}: "xmm1",
{"xmm3", "ModRM:r/m"}: "xmm2",
{"xmm3/m16", "ModRM:r/m"}: "xmm2/m16",
{"xmm3/m32", "ModRM:r/m"}: "xmm2/m32",
{"xmm3/m64", "ModRM:r/m"}: "xmm2/m64",
{"xmm3/m128", "ModRM:r/m"}: "xmm2/m128",
{"xmm3/m256", "ModRM:r/m"}: "xmm2/m256",
{"xmm2", "ModRM:reg"}: "xmm1",
{"xmm2/m16", "ModRM:reg"}: "xmm1/m16",
{"xmm2/m32", "ModRM:reg"}: "xmm1/m32",
{"xmm2/m64", "ModRM:reg"}: "xmm1/m64",
{"xmm2/m128", "ModRM:reg"}: "xmm1/m128",
{"xmm2/m256", "ModRM:reg"}: "xmm1/m256",
{"ymm", "ModRM:reg"}: "ymm1",
{"ymm", "ModRM:r/m"}: "ymm2",
{"ymm0", "ModRM:reg"}: "ymm1",
{"ymm1", "ModRM:r/m"}: "ymm2",
{"ymm1/m16", "ModRM:r/m"}: "ymm2/m16",
{"ymm1/m32", "ModRM:r/m"}: "ymm2/m32",
{"ymm1/m64", "ModRM:r/m"}: "ymm2/m64",
{"ymm1/m128", "ModRM:r/m"}: "ymm2/m128",
{"ymm1/m256", "ModRM:r/m"}: "ymm2/m256",
{"ymm3", "ModRM:reg"}: "ymm1",
{"ymm3", "ModRM:r/m"}: "ymm2",
{"ymm3/m16", "ModRM:r/m"}: "ymm2/m16",
{"ymm3/m32", "ModRM:r/m"}: "ymm2/m32",
{"ymm3/m64", "ModRM:r/m"}: "ymm2/m64",
{"ymm3/m128", "ModRM:r/m"}: "ymm2/m128",
{"ymm3/m256", "ModRM:r/m"}: "ymm2/m256",
{"ymm2", "ModRM:reg"}: "ymm1",
{"ymm2/m16", "ModRM:reg"}: "ymm1/m16",
{"ymm2/m32", "ModRM:reg"}: "ymm1/m32",
{"ymm2/m64", "ModRM:reg"}: "ymm1/m64",
{"ymm2/m128", "ModRM:reg"}: "ymm1/m128",
{"ymm2/m256", "ModRM:reg"}: "ymm1/m256",
{"xmm1", "VEX.vvvv"}: "xmmV",
{"xmm2", "VEX.vvvv"}: "xmmV",
{"ymm1", "VEX.vvvv"}: "ymmV",
{"ymm2", "VEX.vvvv"}: "ymmV",
{"xmm4", "imm8[7:4]"}: "xmmIH",
{"ymm4", "imm8[7:4]"}: "ymmIH",
{"r8", "opcode + rd"}: "r8op",
{"r16", "opcode + rd"}: "r16op",
{"r32", "opcode + rd"}: "r32op",
{"r64", "opcode + rd"}: "r64op",
{"reg/m32", "ModRM:r/m"}: "r/m32",
{"reg/m16", "ModRM:r/m"}: "r32/m16",
{"bnd", "ModRM:reg"}: "bnd1",
{"bnd2", "ModRM:reg"}: "bnd1",
{"bnd1/m64", "ModRM:r/m"}: "bnd2/m64",
{"bnd1/m128", "ModRM:r/m"}: "bnd2/m128",
{"r32a", "ModRM:reg"}: "r32",
{"r64a", "ModRM:reg"}: "r64",
{"r32", "VEX.vvvv"}: "r32V",
{"r64", "VEX.vvvv"}: "r64V",
{"r32b", "VEX.vvvv"}: "r32V",
{"r64b", "VEX.vvvv"}: "r64V",
{"r64", "VEX.vvvv"}: "r64V",
{"ST", "ST(0)"}: "ST(0)",
}
// A few instructions do not have the usual encoding descriptions.
// Supply them.
var encodings = map[string][]string{
"FADD m32fp": {"ModRM:r/m (r)"},
"FADD m64fp": {"ModRM:r/m (r)"},
"FADD ST(0), ST(i)": {"ST(0) (r, w)", "ST(i) (r)"},
"FADD ST(i), ST(0)": {"ST(i) (r, w)", "ST(0) (r)"},
"FADDP ST(i), ST(0)": {"ST(i) (r, w)", "ST(0) (r)"},
"FIADD m32int": {"ModRM:r/m (r)"},
"FIADD m16int": {"ModRM:r/m (r)"},
"FBLD m80dec": {"ModRM:r/m (r)"},
"FBSTP m80bcd": {"ModRM:r/m (w)"},
"FCMOVB ST(0), ST(i)": {"ST(0) (r, w)", "ST(i) (r)"},
"FCMOVE ST(0), ST(i)": {"ST(0) (r, w)", "ST(i) (r)"},
"FCMOVBE ST(0), ST(i)": {"ST(0) (r, w)", "ST(i) (r)"},
"FCMOVU ST(0), ST(i)": {"ST(0) (r, w)", "ST(i) (r)"},
"FCMOVNB ST(0), ST(i)": {"ST(0) (r, w)", "ST(i) (r)"},
"FCMOVNE ST(0), ST(i)": {"ST(0) (r, w)", "ST(i) (r)"},
"FCMOVNBE ST(0), ST(i)": {"ST(0) (r, w)", "ST(i) (r)"},
"FCMOVNU ST(0), ST(i)": {"ST(0) (r, w)", "ST(i) (r)"},
"FCOM m32fp": {"ModRM:r/m (r)"},
"FCOM m64fp": {"ModRM:r/m (r)"},
"FCOM ST(i)": {"ST(i) (r)"},
"FCOMP m32fp": {"ModRM:r/m (r)"},
"FCOMP m64fp": {"ModRM:r/m (r)"},
"FCOMP ST(i)": {"ST(i) (r)"},
"FCOMI ST, ST(i)": {"ST(0) (r)", "ST(i) (r)"},
"FCOMIP ST, ST(i)": {"ST(0) (r)", "ST(i) (r)"},
"FUCOMI ST, ST(i)": {"ST(0) (r)", "ST(i) (r)"},
"FUCOMIP ST, ST(i)": {"ST(0) (r)", "ST(i) (r)"},
"FDIV m32fp": {"ModRM:r/m (r)"},
"FDIV m64fp": {"ModRM:r/m (r)"},
"FDIV ST(0), ST(i)": {"ST(0) (r, w)", "ST(i) (r)"},
"FDIV ST(i), ST(0)": {"ST(i) (r, w)", "ST(0) (r)"},
"FDIVP ST(i), ST(0)": {"ST(i) (r, w)", "ST(0) (r)"},
"FIDIV m16int": {"ModRM:r/m (r)"},
"FIDIV m32int": {"ModRM:r/m (r)"},
"FIDIV m64int": {"ModRM:r/m (r)"},
"FDIVR m32fp": {"ModRM:r/m (r)"},
"FDIVR m64fp": {"ModRM:r/m (r)"},
"FDIVR ST(0), ST(i)": {"ST(0) (r, w)", "ST(i) (r)"},
"FDIVR ST(i), ST(0)": {"ST(i) (r, w)", "ST(0) (r)"},
"FDIVRP ST(i), ST(0)": {"ST(i) (r, w)", "ST(0) (r)"},
"FIDIVR m16int": {"ModRM:r/m (r)"},
"FIDIVR m32int": {"ModRM:r/m (r)"},
"FIDIVR m64int": {"ModRM:r/m (r)"},
"FFREE ST(i)": {"ST(i) (w)"},
"FICOM m16int": {"ModRM:r/m (r)"},
"FICOM m32int": {"ModRM:r/m (r)"},
"FICOMP m16int": {"ModRM:r/m (r)"},
"FICOMP m32int": {"ModRM:r/m (r)"},
"FILD m16int": {"ModRM:r/m (r)"},
"FILD m32int": {"ModRM:r/m (r)"},
"FILD m64int": {"ModRM:r/m (r)"},
"FIST m16int": {"ModRM:r/m (w)"},
"FIST m32int": {"ModRM:r/m (w)"},
"FISTP m16int": {"ModRM:r/m (w)"},
"FISTP m32int": {"ModRM:r/m (w)"},
"FISTP m64int": {"ModRM:r/m (w)"},
"FISTTP m16int": {"ModRM:r/m (w)"},
"FISTTP m32int": {"ModRM:r/m (w)"},
"FISTTP m64int": {"ModRM:r/m (w)"},
"FLD m32fp": {"ModRM:r/m (r)"},
"FLD m64fp": {"ModRM:r/m (r)"},
"FLD m80fp": {"ModRM:r/m (r)"},
"FLD ST(i)": {"ST(i) (r)"},
"FLDCW m2byte": {"ModRM:r/m (r)"},
"FLDENV m14/28byte": {"ModRM:r/m (r)"},
"FMUL m32fp": {"ModRM:r/m (r)"},
"FMUL m64fp": {"ModRM:r/m (r)"},
"FMUL ST(0), ST(i)": {"ST(0) (r, w)", "ST(i) (r)"},
"FMUL ST(i), ST(0)": {"ST(i) (r, w)", "ST(0) (r)"},
"FMULP ST(i), ST(0)": {"ST(i) (r, w)", "ST(0) (r)"},
"FIMUL m16int": {"ModRM:r/m (r)"},
"FIMUL m32int": {"ModRM:r/m (r)"},
"FRSTOR m94/108byte": {"ModRM:r/m (r)"},
"FSAVE m94/108byte": {"ModRM:r/m (w)"},
"FNSAVE m94/108byte": {"ModRM:r/m (w)"},
"FST m32fp": {"ModRM:r/m (w)"},
"FST m64fp": {"ModRM:r/m (w)"},
"FST m80fp": {"ModRM:r/m (w)"},
"FST ST(i)": {"ST(i) (w)"},
"FSTP m32fp": {"ModRM:r/m (w)"},
"FSTP m64fp": {"ModRM:r/m (w)"},
"FSTP m80fp": {"ModRM:r/m (w)"},
"FSTP ST(i)": {"ST(i) (w)"},
"FSTCW m2byte": {"ModRM:r/m (w)"},
"FNSTCW m2byte": {"ModRM:r/m (w)"},
"FSTENV m14/28byte": {"ModRM:r/m (w)"},
"FNSTENV m14/28byte": {"ModRM:r/m (w)"},
"FSTSW m2byte": {"ModRM:r/m (w)"},
"FSTSW AX": {"AX (w)"},
"FNSTSW m2byte": {"ModRM:r/m (w)"},
"FNSTSW AX": {"AX (w)"},
"FSUB m32fp": {"ModRM:r/m (r)"},
"FSUB m64fp": {"ModRM:r/m (r)"},
"FSUB ST(0), ST(i)": {"ST(0) (r, w)", "ST(i) (r)"},
"FSUB ST(i), ST(0)": {"ST(i) (r, w)", "ST(0) (r)"},
"FSUBP ST(i), ST(0)": {"ST(i) (r, w)", "ST(0) (r)"},
"FISUB m16int": {"ModRM:r/m (r)"},
"FISUB m32int": {"ModRM:r/m (r)"},
"FSUBR m32fp": {"ModRM:r/m (r)"},
"FSUBR m64fp": {"ModRM:r/m (r)"},
"FSUBR ST(0), ST(i)": {"ST(0) (r, w)", "ST(i) (r)"},
"FSUBR ST(i), ST(0)": {"ST(i) (r, w)", "ST(0) (r)"},
"FSUBRP ST(i), ST(0)": {"ST(i) (r, w)", "ST(0) (r)"},
"FISUBR m16int": {"ModRM:r/m (r)"},
"FISUBR m32int": {"ModRM:r/m (r)"},
"FISUBR m64int": {"ModRM:r/m (r)"},
"FUCOM ST(i)": {"ST(i) (r)"},
"FUCOMP ST(i)": {"ST(i) (r)"},
"FXCH ST(i)": {"ST(i) (r, w)"},
"POP DS": {"DS (w)"},
"POP ES": {"ES (w)"},
"POP FS": {"FS (w)"},
"POP GS": {"GS (w)"},
"POP SS": {"SS (w)"},
"POP CS": {"CS (w)"},
"PUSH CS": {"CS (r)"},
"PUSH DS": {"DS (r)"},
"PUSH ES": {"ES (r)"},
"PUSH FS": {"FS (r)"},
"PUSH GS": {"GS (r)"},
"PUSH SS": {"SS (r)"},
"INT 3": {"3 (r)"},
// In manual but hard to parse
"BNDLDX bnd, mib": {"ModRM:reg (w)", "ModRM:r/m (r)"},
"BNDSTX mib, bnd": {"ModRM:r/m (r)", "ModRM:reg (r)"},
// In manual but wrong
"CALL rel16": {"Offset"},
"CALL rel32": {"Offset"},
"IN AL, imm8": {"AL (w)", "imm8 (r)"},
"IN AX, imm8": {"AX (w)", "imm8 (r)"},
"IN EAX, imm8": {"EAX (w)", "imm8 (r)"},
"IN AL, DX": {"AL (w)", "DX (r)"},
"IN AX, DX": {"AX (w)", "DX (r)"},
"IN EAX, DX": {"EAX (w)", "DX (r)"},
"OUT DX, AL": {"DX (r)", "AL (r)"},
"OUT DX, AX": {"DX (r)", "AX (r)"},
"OUT DX, EAX": {"DX (r)", "EAX (r)"},
"OUT imm8, AL": {"imm8 (r)", "AL (r)"},
"OUT imm8, AX": {"imm8 (r)", "AX (r)"},
"OUT imm8, EAX": {"imm8 (r)", "EAX (r)"},
"XCHG AX, r16": {"AX (r, w)", "opcode + rd (r, w)"},
"XCHG EAX, r32": {"EAX (r, w)", "opcode + rd (r, w)"},
"XCHG RAX, r64": {"RAX (r, w)", "opcode + rd (r, w)"},
// Encoding not listed.
"INVEPT r32, m128": {"ModRM:reg (r)", "ModRM:r/m (r)"},
"INVEPT r64, m128": {"ModRM:reg (r)", "ModRM:r/m (r)"},
"INVVPID r32, m128": {"ModRM:reg (r)", "ModRM:r/m (r)"},
"INVVPID r64, m128": {"ModRM:reg (r)", "ModRM:r/m (r)"},
"VMREAD r/m32, r32": {"ModRM:r/m (w)", "ModRM:reg (r)"},
"VMREAD r/m64, r64": {"ModRM:r/m (w)", "ModRM:reg (r)"},
"VMWRITE r32, r/m32": {"ModRM:reg (r)", "ModRM:r/m (r)"},
"VMWRITE r64, r/m64": {"ModRM:reg (r)", "ModRM:r/m (r)"},
"VMCLEAR m64": {"ModRM:r/m (w)"},
"VMPTRLD m64": {"ModRM:r/m (r)"},
"VMPTRST m64": {"ModRM:r/m (w)"},
"VMXON m64": {"ModRM:r/m (r)"},
}
// opAction lists the read/write actions for individual opcodes,
// where the manual does not.
var opAction = map[string][]string{
"ADC": {"rw", "r"},
"ADD": {"rw", "r"},
"AND": {"rw", "r"},
"BLENDVPD": {"rw", "r", "r"},
"BLENDVPS": {"rw", "r", "r"},
"IN": {"w", "r"},
"MOV": {"w", "r"},
"OR": {"rw", "r"},
"OUT": {"r", "r"},
"PBLENDVB": {"rw", "r", "r"},
"RCL": {"rw", "r"},
"RCR": {"rw", "r"},
"ROL": {"rw", "r"},
"ROR": {"rw", "r"},
"SAL": {"rw", "r"},
"SAR": {"rw", "r"},
"SBB": {"rw", "r"},
"SHL": {"rw", "r"},
"SHLD": {"rw", "r", "r"},
"SHR": {"rw", "r"},
"SHRD": {"rw", "r", "r"},
"SUB": {"rw", "r", "r"},
"TEST": {"r", "r"},
"VBLENDVPD": {"rw", "r", "r"},
"VBLENDVPS": {"rw", "r", "r"},
"VPBLENDVB": {"rw", "r", "r"},
"VPMASKMOVD": {"w", "r", "r"},
"VPMASKMOVQ": {"w", "r", "r"},
"VPSLLVD": {"w", "r", "r"},
"VPSRAVD": {"w", "r", "r"},
"VPSRLVD": {"w", "r", "r"},
"VPSRLVQ": {"w", "r", "r"},
"VINSERTI128": {"w", "r", "r"},
"VPBLENDD": {"w", "r", "r"},
"VPERMD": {"w", "r", "r"},
"VPERMPS": {"w", "r", "r"},
"VPERM2I128": {"w", "r", "r"},
"VPSLLVQ": {"w", "r", "r"},
"XCHG": {"rw", "rw"},
"XOR": {"rw", "r"},
}
// encodeOK lists valid arg, encoding pairs.
// Any pair not listed gets a warning.
var encodeOK = map[[2]string]bool{
{"0", "imm8"}: true,
{"1", "1"}: true,
{"1", "imm8"}: true,
{"<XMM0>", "<XMM0>"}: true,
{"<XMM0>", "implicit XMM0"}: true,
{"AL", "AL"}: true,
{"AL", "AL/AX/EAX/RAX"}: true,
{"AX", "AL/AX/EAX/RAX"}: true,
{"AX", "AX"}: true,
{"AX", "AX/EAX/RAX"}: true,
{"CL", "CL"}: true,
{"CR0-CR7", "ModRM:reg"}: true,
{"CR8", ""}: true,
{"CS", "CS"}: true,
{"DR0-DR7", "ModRM:reg"}: true,
{"DS", "DS"}: true,
{"DX", "DX"}: true,
{"EAX", "AL/AX/EAX/RAX"}: true,
{"EAX", "AX/EAX/RAX"}: true,
{"EAX", "EAX"}: true,
{"ES", "ES"}: true,
{"FS", "FS"}: true,
{"GS", "GS"}: true,
{"RAX", "AL/AX/EAX/RAX"}: true,
{"RAX", "AX/EAX/RAX"}: true,
{"RAX", "RAX"}: true,
{"ST", "ST(0)"}: true,
{"ST(0)", "ST(0)"}: true,
{"ST(i)", "ST(i)"}: true,
{"Sreg", "ModRM:reg"}: true,
{"bnd1", "ModRM:reg"}: true,
{"bnd2/m128", "ModRM:r/m"}: true,
{"bnd2/m64", "ModRM:r/m"}: true,
{"imm16", "imm16"}: true,
{"imm16", "imm8"}: true,
{"imm16", "imm8/16/32"}: true,
{"imm16", "imm8/16/32"}: true,
{"imm16", "imm8/16/32/64"}: true,
{"imm16", "iw"}: true,
{"imm32", "imm8"}: true,
{"imm32", "imm8/16/32"}: true,
{"imm32", "imm8/16/32"}: true,
{"imm32", "imm8/16/32/64"}: true,
{"imm64", "imm8/16/32/64"}: true,
{"imm8", "imm8"}: true,
{"imm8", "imm8/16/32"}: true,
{"imm8", "imm8/16/32"}: true,
{"imm8", "imm8/16/32/64"}: true,
{"imm8", "imm8[3:0]"}: true,
{"m", "ModRM:r/m"}: true,
{"m128", "ModRM:r/m"}: true,
{"m14/28byte", "ModRM:r/m"}: true,
{"m16", "ModRM:r/m"}: true,
{"m16&16", "ModRM:r/m"}: true,
{"m16&32", "ModRM:r/m"}: true,
{"m16&64", "ModRM:r/m"}: true,
{"m16:16", "ModRM:r/m"}: true,
{"m16:16", "Offset"}: true,
{"m16:32", "ModRM:r/m"}: true,
{"m16:32", "Offset"}: true,
{"m16:64", "ModRM:r/m"}: true,
{"m16:64", "Offset"}: true,
{"m16int", "ModRM:r/m"}: true,
{"m256", "ModRM:r/m"}: true,
{"m2byte", "ModRM:r/m"}: true,
{"m32", "ModRM:r/m"}: true,
{"m32&32", "ModRM:r/m"}: true,
{"m32fp", "ModRM:r/m"}: true,
{"m32int", "ModRM:r/m"}: true,
{"m512byte", "ModRM:r/m"}: true,
{"m64", "ModRM:r/m"}: true,
{"m64fp", "ModRM:r/m"}: true,
{"m64int", "ModRM:r/m"}: true,
{"m8", "ModRM:r/m"}: true,
{"m80bcd", "ModRM:r/m"}: true,
{"m80dec", "ModRM:r/m"}: true,
{"m80fp", "ModRM:r/m"}: true,
{"m94/108byte", "ModRM:r/m"}: true,
{"mem", "ModRM:r/m"}: true,
{"mib", "ModRM:r/m"}: true,
{"mm/m32", "ModRM:r/m"}: true,
{"mm1", "ModRM:reg"}: true,
{"mm2", "ModRM:r/m"}: true,
{"mm2/m32", "ModRM:r/m"}: true,
{"mm2/m64", "ModRM:r/m"}: true,
{"moffs16", "Moffs"}: true,
{"moffs32", "Moffs"}: true,
{"moffs64", "Moffs"}: true,
{"moffs8", "Moffs"}: true,
{"ptr16:16", "Offset"}: true,
{"ptr16:32", "Offset"}: true,
{"r/m16", "ModRM:r/m"}: true,
{"r/m32", "ModRM:r/m"}: true,
{"r/m64", "ModRM:r/m"}: true,
{"r/m8", "ModRM:r/m"}: true,
{"r16", "ModRM:reg"}: true,
{"r16op", "opcode + rd"}: true,
{"r32", "ModRM:reg"}: true,
{"r32", "VEX.vvvv"}: true,
{"r32/m16", "ModRM:r/m"}: true,
{"r32/m8", "ModRM:r/m"}: true,
{"r32V", "VEX.vvvv"}: true,
{"r32op", "opcode + rd"}: true,
{"r64", "ModRM:reg"}: true,
{"r64/m16", "ModRM:r/m"}: true,
{"r64V", "VEX.vvvv"}: true,
{"r64op", "opcode + rd"}: true,
{"r8", "ModRM:reg"}: true,
{"r8op", "opcode + rd"}: true,
{"rel16", "Offset"}: true,
{"rel32", "Offset"}: true,
{"rel8", "Offset"}: true,
{"rmr16", "ModRM:r/m"}: true,
{"rmr32", "ModRM:r/m"}: true,
{"rmr64", "ModRM:r/m"}: true,
{"xmm/m128", "ModRM:r/m"}: true,
{"xmm/m32", "ModRM:r/m"}: true,
{"xmm1", "ModRM:reg"}: true,
{"xmm2", "ModRM:r/m"}: true,
{"xmm2/m128", "ModRM:r/m"}: true,
{"xmm2/m16", "ModRM:r/m"}: true,
{"xmm2/m32", "ModRM:r/m"}: true,
{"xmm2/m64", "ModRM:r/m"}: true,
{"xmm2/m8", "ModRM:r/m"}: true,
{"xmmIH", "imm8[7:4]"}: true,
{"xmmV", "VEX.vvvv"}: true,
{"ymm1", "ModRM:reg"}: true,
{"ymm2", "ModRM:r/m"}: true,
{"ymm2/m256", "ModRM:r/m"}: true,
{"ymmIH", "imm8[7:4]"}: true,
{"ymmV", "VEX.vvvv"}: true,
{"vm32x", "vsib"}: true,
{"vm64x", "vsib"}: true,
{"vm32y", "vsib"}: true,
{"vm64y", "vsib"}: true,
{"SS", "SS"}: true,
{"3", "3"}: true,
}
// instBlacklist lists the instruction syntaxes to ignore when parsing.
// We exclude Intel's general forms for these not-actually-general instructions.
// The syntax makes it look like arbitrary memory operands can be used when in fact
// the exact address is fixed in all cases - [DI] or [SI], for example
var instBlacklist = map[string]bool{
"CMPS m16, m16": true,
"CMPS m32, m32": true,
"CMPS m64, m64": true,
"CMPS m8, m8": true,
"INS m16, DX": true,
"INS m32, DX": true,
"INS m8, DX": true,
"LODS m16": true,
"LODS m32": true,
"LODS m64": true,
"LODS m8": true,
"MOVS m16, m16": true,
"MOVS m32, m32": true,
"MOVS m64, m64": true,
"MOVS m8, m8": true,
"OUTS DX, m16": true,
"OUTS DX, m32": true,
"OUTS DX, m8": true,
"REP INS m16, DX": true,
"REP INS m32, DX": true,
"REP INS m8, DX": true,
"REP INS r/m32, DX": true,
"REP LODS AL": true,
"REP LODS AX": true,
"REP LODS EAX": true,
"REP LODS RAX": true,
"REP MOVS m16, m16": true,
"REP MOVS m32, m32": true,
"REP MOVS m64, m64": true,
"REP MOVS m8, m8": true,
"REP OUTS DX, m16": true,
"REP OUTS DX, m32": true,
"REP OUTS DX, m8": true,
"REP OUTS DX, r/m16": true,
"REP OUTS DX, r/m32": true,
"REP OUTS DX, r/m8": true,
"REP STOS m16": true,
"REP STOS m32": true,
"REP STOS m64": true,
"REP STOS m8": true,
"REPE CMPS m16, m16": true,
"REPE CMPS m32, m32": true,
"REPE CMPS m64, m64": true,
"REPE CMPS m8, m8": true,
"REPE SCAS m16": true,
"REPE SCAS m32": true,
"REPE SCAS m64": true,
"REPE SCAS m8": true,
"REPNE CMPS m16, m16": true,
"REPNE CMPS m32, m32": true,
"REPNE CMPS m64, m64": true,
"REPNE CMPS m8, m8": true,
"REPNE SCAS m16": true,
"REPNE SCAS m32": true,
"REPNE SCAS m64": true,
"REPNE SCAS m8": true,
"SCAS m16": true,
"SCAS m32": true,
"SCAS m64": true,
"SCAS m8": true,
"STOS m16": true,
"STOS m32": true,
"STOS m64": true,
"STOS m8": true,
"XLAT m8": true,
}
// condPrefs lists preferences for condition code suffixes.
// The first suffix in each pair takes priority over the second.
var condPrefs = [][2]string{
{"B", "C"},
{"B", "NAE"},
{"AE", "NB"},
{"AE", "NC"},
{"E", "Z"},
{"NE", "NZ"},
{"BE", "NA"},
{"A", "NBE"},
{"P", "PE"},
{"NP", "PO"},
{"L", "NGE"},
{"GE", "NL"},
{"LE", "NG"},
{"G", "NLE"},
}
// conv16 specifies replacements to turn a 16-bit syntax into a 32-bit syntax.
// If the conv16 can be applied to one form to create a new form with the same
// fixed instruction prefix, the pair is tagged as operand16 and operand32
// respectively.
var conv16 = strings.NewReplacer(
"16:16", "16:32",
"16", "32",
"AX", "EAX",
"CBW", "CWDE",
"CMPSW", "CMPSD",
"CWD", "CDQ",
"INSW", "INSD",
"IRET", "IRETD",
"LODSW", "LODSD",
"MOVSW", "MOVSD",
"OUTSW", "OUTSD",
"POPA", "POPAD",
"POPF", "POPFD",
"PUSHA", "PUSHAD",
"PUSHF", "PUSHFD",
"SCASW", "SCASD",
"STOSW", "STOSD",
)
// fixup records additional modifications needed that are not derived
// from the instructions in the manual. It is keyed by the syntax and opcode.
var fixup = map[[2]string][]fixer{
// NOP is a very special case overloading XCHG AX, AX.
// The decoder handles it in custom code; exclude from the usual tables.
{"NOP", "90"}: {fixAddTag("pseudo")},
// PAUSE is a special case of NOP.
{"PAUSE", "F3 90"}: {fixAddTag("pseudo")}, // used to add 'keepop' tag but not sure what that means
// Far CALL, JMP, RET are given L prefix (long) for disambiguation.
{"CALL m16:16", "FF /3"}: {fixRename("CALL_FAR")},
{"CALL m16:32", "FF /3"}: {fixRename("CALL_FAR")},
{"CALL m16:64", "REX.W FF /3"}: {fixRename("CALL_FAR")},
{"CALL ptr16:16", "9A cd"}: {fixRename("CALL_FAR")},
{"CALL ptr16:32", "9A cp"}: {fixRename("CALL_FAR")},
{"JMP m16:16", "FF /5"}: {fixRename("JMP_FAR")},
{"JMP m16:32", "FF /5"}: {fixRename("JMP_FAR")},
{"JMP m16:64", "REX.W FF /5"}: {fixRename("JMP_FAR")},
{"JMP ptr16:16", "EA cd"}: {fixRename("JMP_FAR")},
{"JMP ptr16:32", "EA cp"}: {fixRename("JMP_FAR")},
{"RET imm16", "CA iw"}: {fixRename("RET_FAR"), fixArg(0, "imm16u")},
{"RET", "CB"}: {fixRename("RET_FAR")},
// Unsigned immediates. (RET far imm16 handled above.)
// Some of these are just preferences for disassembling.
{"ENTER imm16, imm8", "C8 iw ib"}: {fixArg(1, "imm8b")},
{"RET imm16", "C2 iw"}: {fixArg(0, "imm16u")},
{"IN AL, imm8", "E4 ib"}: {fixArg(1, "imm8u")},
{"IN AX, imm8", "E5 ib"}: {fixArg(1, "imm8u")},
{"IN EAX, imm8", "E5 ib"}: {fixArg(1, "imm8u"), fixAddTag("operand64")},
{"OUT imm8, AL", "E6 ib"}: {fixArg(0, "imm8u")},
{"OUT imm8, AX", "E7 ib"}: {fixArg(0, "imm8u")},
{"OUT imm8, EAX", "E7 ib"}: {fixArg(0, "imm8u"), fixAddTag("operand64")},
{"MOV r8op, imm8", "B0+rb ib"}: {fixArg(1, "imm8u")},
{"MOV r8op, imm8", "REX B0+rb ib"}: {fixArg(1, "imm8u"), fixAddTag("pseudo64")},
{"MOV r/m8, imm8", "C6 /0 ib"}: {fixArg(1, "imm8u")},
{"MOV r/m8, imm8", "REX C6 /0 ib"}: {fixArg(1, "imm8u"), fixAddTag("pseudo64")},
// The listings for MOVSX and MOVSXD do not list some variants that
// assemblers seem to allow.
// As a result, this instruction got the wrong tag.
// The other instructions are listed in extraInsts.
{"MOVSX r32, r/m16", "0F BF /r"}: {fixRemoveTag("operand16"), fixAddTag("operand32")},
{"MOVZX r32, r/m16", "0F B7 /r"}: {fixRemoveTag("operand16")},
// Listings are incomplete or incorrect. Fix tags to adjust for new instructions below.
{"SLDT r/m16", "0F 00 /0"}: {fixRemoveTag("operand32")},
{"STR r/m16", "0F 00 /1"}: {fixAddTag("operand16")},
{"BSWAP r32op", "0F C8+rd"}: {fixRemoveTag("operand16")},
{"MOV Sreg, r/m16", "8E /r"}: {fixRemoveTag("operand32")},
{"MOV Sreg, r/m64", "REX.W 8E /r"}: {fixArg(1, "r/m16")},
{"MOV r/m64, Sreg", "REX.W 8C /r"}: {fixArg(0, "r/m16")},
{"MOV r/m16, Sreg", "8C /r"}: {fixRemoveTag("operand32")},
{"MOV r/m64, imm32", "REX.W C7 /0 io"}: {fixOpcode("REX.W C7 /0 id")},
// On 64-bit, these ignore 64-bit mode change.
{"POP FS", "0F A1"}: {fixIfValid("N.E.", "V", fixAddTag("operand64"))},
{"POP GS", "0F A9"}: {fixIfValid("N.E.", "V", fixAddTag("operand64"))},
{"LEAVE", "C9"}: {fixIfValid("N.E.", "V", fixAddTag("operand64"))},
{"IN EAX, DX", "ED"}: {fixAddTag("operand64")},
{"INSD", "6D"}: {fixAddTag("operand64")},
{"OUT DX, EAX", "EF"}: {fixAddTag("operand64")},
{"OUTSD", "6F"}: {fixAddTag("operand64")},
{"XBEGIN rel32", "C7 F8 cd"}: {fixAddTag("operand64")},
// Treat FWAIT, not WAIT, as canonical.
{"FWAIT", "9B"}: {fixRemoveTag("pseudo")},
{"WAIT", "9B"}: {fixAddTag("pseudo")},
// LAHF and SAHF are listed as "Invalid*" for 64-bit mode.
// They are actually defined, so Valid from our point of view.
// It's just that only a very few 64-bit processors allowed them.
{"LAHF", "9F"}: {fixValid("V", "V")},
{"SAHF", "9E"}: {fixValid("V", "V")},
// The JZ forms are listed twice in the table, which confuses things.
{"JZ rel16", "0F 84 cw"}: {fixAddTag("operand16"), fixRemoveTag("operand32")},
{"JZ rel32", "0F 84 cd"}: {fixAddTag("operand32"), fixRemoveTag("operand16")},
// XCHG has two of every instruction, which makes things bad.
// The XX hack below takes care of most problems but this one remains.
{"XCHG r/m16, r16", "87 /r"}: {fixRemoveTag("pseudo")},
// MOV CR8 is just the obvious extension of the MOV CR0-CR7 form.
{"MOV rmr64, CR8", "REX.R + 0F 20 /0"}: {fixAddTag("pseudo")},
{"MOV CR8, rmr64", "REX.R + 0F 22 /0"}: {fixAddTag("pseudo")},
// TODO: EXPLAIN ALL THESE
{"ADCX r32, r/m32", "66 0F 38 F6 /r"}: {fixAddTag("operand16"), fixAddTag("operand32")},
{"ADOX r32, r/m32", "F3 0F 38 F6 /r"}: {fixAddTag("operand16"), fixAddTag("operand32")},
{"POPFQ", "9D"}: {fixAddTag("operand32"), fixAddTag("operand64")},
{"PUSHFQ", "9C"}: {fixAddTag("operand32"), fixAddTag("operand64")},
{"JCXZ rel8", "E3 cb"}: {fixAddTag("address16")},
{"JECXZ rel8", "E3 cb"}: {fixAddTag("address32")},
{"JRCXZ rel8", "E3 cb"}: {fixAddTag("address64")},
{"PUSH r64op", "50+rd"}: {fixAddTag("operand32"), fixAddTag("operand64")},
{"PUSH r/m64", "FF /6"}: {fixAddTag("operand32"), fixAddTag("operand64")},
{"POP r64op", "58+rd"}: {fixAddTag("operand32"), fixAddTag("operand64")},
{"POP r/m64", "8F /0"}: {fixAddTag("operand32"), fixAddTag("operand64")},
{"SMSW r/m16", "0F 01 /4"}: {fixAddTag("operand16")},
{"SMSW r32/m16", "0F 01 /4"}: {fixRemoveTag("operand16"), fixAddTag("operand32")},
// Express to the decoder that the rel16 only applies in 16-bit operand mode.
{"JA rel16", "0F 87 cw"}: {fixAddTag("operand16")},
{"JAE rel16", "0F 83 cw"}: {fixAddTag("operand16")},
{"JB rel16", "0F 82 cw"}: {fixAddTag("operand16")},
{"JBE rel16", "0F 86 cw"}: {fixAddTag("operand16")},
{"JE rel16", "0F 84 cw"}: {fixAddTag("operand16")},
{"JG rel16", "0F 8F cw"}: {fixAddTag("operand16")},
{"JGE rel16", "0F 8D cw"}: {fixAddTag("operand16")},
{"JL rel16", "0F 8C cw"}: {fixAddTag("operand16")},
{"JLE rel16", "0F 8E cw"}: {fixAddTag("operand16")},
{"JNE rel16", "0F 85 cw"}: {fixAddTag("operand16")},
{"JNO rel16", "0F 81 cw"}: {fixAddTag("operand16")},
{"JNP rel16", "0F 8B cw"}: {fixAddTag("operand16")},
{"JNS rel16", "0F 89 cw"}: {fixAddTag("operand16")},
{"JO rel16", "0F 80 cw"}: {fixAddTag("operand16")},
{"JP rel16", "0F 8A cw"}: {fixAddTag("operand16")},
{"JS rel16", "0F 88 cw"}: {fixAddTag("operand16")},
{"JA rel32", "0F 87 cd"}: {fixAddTag("operand32")},
{"JAE rel32", "0F 83 cd"}: {fixAddTag("operand32")},
{"JB rel32", "0F 82 cd"}: {fixAddTag("operand32")},
{"JBE rel32", "0F 86 cd"}: {fixAddTag("operand32")},
{"JE rel32", "0F 84 cd"}: {fixAddTag("operand32")},
{"JG rel32", "0F 8F cd"}: {fixAddTag("operand32")},
{"JGE rel32", "0F 8D cd"}: {fixAddTag("operand32")},
{"JL rel32", "0F 8C cd"}: {fixAddTag("operand32")},
{"JLE rel32", "0F 8E cd"}: {fixAddTag("operand32")},
{"JNE rel32", "0F 85 cd"}: {fixAddTag("operand32")},
{"JNO rel32", "0F 81 cd"}: {fixAddTag("operand32")},
{"JNP rel32", "0F 8B cd"}: {fixAddTag("operand32")},
{"JNS rel32", "0F 89 cd"}: {fixAddTag("operand32")},
{"JO rel32", "0F 80 cd"}: {fixAddTag("operand32")},
{"JP rel32", "0F 8A cd"}: {fixAddTag("operand32")},
{"JS rel32", "0F 88 cd"}: {fixAddTag("operand32")},
{"LSL r16, r/m16", "0F 03 /r"}: {fixAddTag("operand16")},
}
var extraInsts = []*instruction{
// Undocumented.
{syntax: "ICEBP", opcode: "F1", valid32: "V", valid64: "V"},
{syntax: "UD1", opcode: "0F B9", valid32: "V", valid64: "V"},
{syntax: "FFREEP ST(i)", opcode: "DF C0+i", valid32: "V", valid64: "V", action: "w"},
// Where did these come from? They were in version 0.01 of the csv table.
{syntax: "MOVNTSD m64, xmm1", opcode: "F2 0F 2B /r", valid32: "V", valid64: "V", cpuid: "SSE", action: "w,r"},
{syntax: "MOVNTSS m32, xmm1", opcode: "F3 0F 2B /r", valid32: "V", valid64: "V", cpuid: "SSE", action: "w,r"},
// These express to the decoder that in 64-bit mode
// an operand prefix does not affect the size of the relative offset.
{syntax: "CALL rel32", opcode: "E8 cd", valid32: "N.S.", valid64: "V", tags: []string{"operand16", "operand64"}, action: "r"},
{syntax: "JMP rel32", opcode: "E9 cd", valid32: "N.S.", valid64: "V", tags: []string{"operand16", "operand64"}, action: "r"},
{syntax: "JA rel32", opcode: "0F 87 cd", valid32: "N.S.", valid64: "V", tags: []string{"operand16", "operand64"}, action: "r"},
{syntax: "JAE rel32", opcode: "0F 83 cd", valid32: "N.S.", valid64: "V", tags: []string{"operand16", "operand64"}, action: "r"},
{syntax: "JB rel32", opcode: "0F 82 cd", valid32: "N.S.", valid64: "V", tags: []string{"operand16", "operand64"}, action: "r"},
{syntax: "JBE rel32", opcode: "0F 86 cd", valid32: "N.S.", valid64: "V", tags: []string{"operand16", "operand64"}, action: "r"},
{syntax: "JE rel32", opcode: "0F 84 cd", valid32: "N.S.", valid64: "V", tags: []string{"operand16", "operand64"}, action: "r"},
{syntax: "JG rel32", opcode: "0F 8F cd", valid32: "N.S.", valid64: "V", tags: []string{"operand16", "operand64"}, action: "r"},
{syntax: "JGE rel32", opcode: "0F 8D cd", valid32: "N.S.", valid64: "V", tags: []string{"operand16", "operand64"}, action: "r"},
{syntax: "JL rel32", opcode: "0F 8C cd", valid32: "N.S.", valid64: "V", tags: []string{"operand16", "operand64"}, action: "r"},
{syntax: "JLE rel32", opcode: "0F 8E cd", valid32: "N.S.", valid64: "V", tags: []string{"operand16", "operand64"}, action: "r"},
{syntax: "JNE rel32", opcode: "0F 85 cd", valid32: "N.S.", valid64: "V", tags: []string{"operand16", "operand64"}, action: "r"},
{syntax: "JNO rel32", opcode: "0F 81 cd", valid32: "N.S.", valid64: "V", tags: []string{"operand16", "operand64"}, action: "r"},
{syntax: "JNP rel32", opcode: "0F 8B cd", valid32: "N.S.", valid64: "V", tags: []string{"operand16", "operand64"}, action: "r"},
{syntax: "JNS rel32", opcode: "0F 89 cd", valid32: "N.S.", valid64: "V", tags: []string{"operand16", "operand64"}, action: "r"},
{syntax: "JO rel32", opcode: "0F 80 cd", valid32: "N.S.", valid64: "V", tags: []string{"operand16", "operand64"}, action: "r"},
{syntax: "JP rel32", opcode: "0F 8A cd", valid32: "N.S.", valid64: "V", tags: []string{"operand16", "operand64"}, action: "r"},
{syntax: "JS rel32", opcode: "0F 88 cd", valid32: "N.S.", valid64: "V", tags: []string{"operand16", "operand64"}, action: "r"},
// Disassemblers recognize these, but they're not in the manual.
// Not sure if they really exist.
// The 16-16 and 32-32 forms don't really make sense since there's nothing to extend.
{syntax: "MOVSX r16, r/m16", opcode: "0F BF /r", valid32: "V", valid64: "V", tags: []string{"operand16"}, action: "w,r"},
{syntax: "MOVSXD r16, r/m32", opcode: "63 /r", valid32: "N.E.", valid64: "V", tags: []string{"operand16"}, action: "w,r"},
{syntax: "MOVSXD r32, r/m32", opcode: "63 /r", valid32: "N.E.", valid64: "V", tags: []string{"operand32"}, action: "w,r"},
{syntax: "MOVZX r16, r/m16", opcode: "0F B7 /r", valid32: "V", valid64: "V", tags: []string{"operand16"}, action: "w,r"},
{syntax: "LAR r64, r/m16", opcode: "REX.W 0F 02 /r", valid32: "N.E.", valid64: "V", action: "w,r"},
{syntax: "SLDT r32/m16", opcode: "0F 00 /0", valid32: "V", valid64: "V", tags: []string{"operand32"}, action: "w"},
{syntax: "STR r32/m16", opcode: "0F 00 /1", valid32: "V", valid64: "V", tags: []string{"operand32"}, action: "w"},
{syntax: "STR r64/m16", opcode: "REX.W 0F 00 /1", valid32: "N.E.", valid64: "V", action: "w"},
{syntax: "BSWAP r16op", opcode: "0F C8+rd", valid32: "V", valid64: "V", tags: []string{"operand16"}, action: "rw"},
// Do these exist?
// I am not sure where they came from, and xed doesn't recognize them.
//{syntax: "MOV TR0-TR7, rmr32", opcode: "0F 26 /r", valid32: "V", valid64: "N.E.", tags: []string{"modrm_regonly"}, action: "w,r"},
//{syntax: "MOV TR0-TR7, rmr64", opcode: "0F 26 /r", valid32: "N.E.", valid64: "V", tags: []string{"modrm_regonly"}, action: "w,r"},
//{syntax: "MOV rmr32, TR0-TR7", opcode: "0F 24 /r", valid32: "V", valid64: "N.E.", tags: []string{"modrm_regonly"}, action: "w,r"},
//{syntax: "MOV rmr64, TR0-TR7", opcode: "0F 24 /r", valid32: "N.E.", valid64: "V", tags: []string{"modrm_regonly"}, action: "w,r"},
{syntax: "MOV Sreg, r32/m16", opcode: "8E /r", valid32: "V", valid64: "V", tags: []string{"operand32"}, action: "w,r"},
{syntax: "MOV r/m32, Sreg", opcode: "8C /r", valid32: "V", valid64: "V", tags: []string{"operand32"}, action: "w,r"},
}
type fixer func(*instruction)
func fixAddTag(tag string) fixer {
return func(inst *instruction) {
addTag(inst, tag)
}
}
func fixRemoveTag(tag string) fixer {
return func(inst *instruction) {
removeTag(inst, tag)
}
}
func fixRename(op string) fixer {
return func(inst *instruction) {
_, args := splitSyntax(inst.syntax)
inst.syntax = joinSyntax(op, args)
}
}
func fixArg(i int, arg string) fixer {
return func(inst *instruction) {
op, args := splitSyntax(inst.syntax)
args[i] = arg
inst.syntax = joinSyntax(op, args)
}
}
func fixIfValid(valid32, valid64 string, fix fixer) fixer {
return func(inst *instruction) {
if inst.valid32 == valid32 && inst.valid64 == valid64 {
fix(inst)
}
}
}
func fixValid(valid32, valid64 string) fixer {
return func(inst *instruction) {
inst.valid32 = valid32
inst.valid64 = valid64
}
}
func fixOpcode(opcode string) fixer {
return func(inst *instruction) {
inst.opcode = opcode
}
}
func cleanup(insts []*instruction) []*instruction {
var haveOp map[string]bool
if onlySomePages {
haveOp = map[string]bool{}
}
// Clean individual instruction encodings and opcode sequences.
sawJZ := map[string]bool{}
out := insts[:0]
for seq, inst := range insts {
inst.seq = seq
// There are two copies each of JZ rel16 and JZ rel32. Delete the second.
if strings.HasPrefix(inst.syntax, "JZ rel") {
if sawJZ[inst.syntax] {
continue
}
sawJZ[inst.syntax] = true
}
out = append(out, inst)
// Intel CMPXCHG16B and CMPXCHG8B have surprise "m64" or " m128" at end of encoding.
surprises := []string{
" m64",
" m128",
}
for _, s := range surprises {
if strings.HasSuffix(inst.syntax, s) && strings.HasSuffix(inst.opcode, s) {
inst.opcode = strings.TrimSuffix(inst.opcode, s)
}
}
op, args := splitSyntax(inst.syntax)
op = strings.TrimRight(op, "*")
inst.syntax = joinSyntax(op, args)
// Check argument names in syntax against encoding details.
if enc, ok := encodings[inst.syntax]; ok {
inst.args = enc
}
if len(args) == len(inst.args)+1 && args[len(args)-1] == "imm8" {
fixed := make([]string, len(args))
copy(fixed, inst.args)
fixed[len(args)-1] = "imm8"
inst.args = fixed
} else if len(args) == 0 && len(inst.args) == 1 && inst.args[0] == "NA" {
inst.args = []string{}
} else if len(args) != len(inst.args) {
fmt.Fprintf(os.Stderr, "p.%d: %s has %d args but %d encoding details:\n\t%s\n", inst.page, inst.syntax, len(args), len(inst.args), strings.Join(inst.args, "; "))
inst.syntax = joinSyntax(op, args)
continue
}
var action []string
for i, arg := range args {
arg = strings.TrimSpace(arg)
arg = strings.TrimRight(arg, "*")
if (arg == "reg" || strings.HasPrefix(arg, "reg/")) && containsAll(inst.desc, "upper bits", "r64", "zero") {
arg = "r32" + strings.TrimPrefix(arg, "reg")
}
enc := inst.args[i]
enc = strings.TrimSpace(enc)
switch {
case strings.HasSuffix(enc, " (r))"):
enc = strings.TrimSuffix(enc, ")")
case strings.HasSuffix(enc, " (R)"):
enc = strings.TrimSuffix(enc, " (R)") + " (r)"
case strings.HasSuffix(enc, " (W)"):
enc = strings.TrimSuffix(enc, " (W)") + " (w)"
case strings.HasSuffix(enc, " (r,w)"):
enc = strings.TrimSuffix(enc, " (r,w)") + " (r, w)"
case enc == "Imm8":
enc = "imm8"
case enc == "imm8/26/32":
enc = "imm8/16/32"
case enc == "BaseReg (R): VSIB:base, VectorReg(R): VSIB:index":
enc = "vsib (r)"
}
inst.args[i] = enc
switch {
case strings.HasSuffix(enc, " (r)"):
action = append(action, "r")
enc = strings.TrimSuffix(enc, " (r)")
case strings.HasSuffix(enc, " (w)"):
action = append(action, "w")
enc = strings.TrimSuffix(enc, " (w)")
case strings.HasSuffix(enc, " (r, w)"):
action = append(action, "rw")
enc = strings.TrimSuffix(enc, " (r, w)")
case strings.HasPrefix(enc, "imm"), enc == "Offset", enc == "iw", arg == "1", arg == "0", arg == "3":
action = append(action, "r")
case i < len(opAction[op]):
action = append(action, opAction[op][i])
default:
fmt.Fprintf(os.Stderr, "p.%d: %s has encoding %s for %s but no r/w annotations\n", inst.page, inst.syntax, enc, arg)
action = append(action, "?")
}
if arg == "mem" && op == "LDDQU" {
arg = "m128"
}
if arg == "reg" && op == "LAR" {
arg = "r32"
}
if actual := encodeReplace[[2]string{arg, enc}]; actual != "" {
arg = actual
}
if (arg == "r8" || arg == "r16" || arg == "r32" || arg == "r64") && enc == "ModRM:r/m" {
addTag(inst, "modrm_regonly")
arg = "rmr" + arg[1:]
}
if (arg == "xmm2" || arg == "ymm2") && enc == "ModRM:r/m" {
addTag(inst, "modrm_regonly")
}
if (arg == "m8" || arg == "m16" || arg == "m32" || arg == "m64" || arg == "m128" || arg == "m256") && enc == "ModRM:r/m" {
addTag(inst, "modrm_memonly")
}
if arg == "r64" && (inst.syntax == "MOV r64, CR8" || inst.syntax == "MOV CR8, r64") {
arg = "rmr64"
addTag(inst, "modrm_regonly")
}
if arg == "CR8" {
enc = ""
}
if !encodeOK[[2]string{arg, enc}] {
fmt.Fprintf(os.Stderr, "p.%d: %s has invalid encoding %s for %s\n\t{%q, %q}: true,\n", inst.page, inst.syntax, enc, arg, arg, enc)
}
args[i] = arg
// Intel SETcc and others are missing the /r.
// But CALL rel16 and CALL rel32 have a bad encoding table so ignore the ModRM there.
if strings.HasPrefix(enc, "ModRM") && !strings.Contains(inst.opcode, " /") && op != "CALL" {
inst.opcode += " /r"
}
if strings.HasPrefix(enc, "ModRM:reg") && !strings.Contains(inst.opcode, "/r") {
// The opcode is taken up with something else. Bug in table.
fmt.Fprintf(os.Stderr, "p.%d: %s has invalid encoding %s: no reg field in %s\n", inst.page, inst.syntax, arg, inst.opcode)
}
// XBEGIN is missing cw cd.
if enc == "Offset" && arg == "rel16" && !strings.Contains(inst.opcode, " cw") {
inst.opcode += " cw"
}
if enc == "Offset" && arg == "rel32" && !strings.Contains(inst.opcode, " cd") {
inst.opcode += " cd"
}
if enc == "Moffs" && !strings.Contains(inst.opcode, "cm") {
inst.opcode += " cm"
}
inst.action = strings.Join(action, ",")
}
inst.syntax = joinSyntax(op, args)
// The Intel manual lists each XCHG form with arguments in both orders.
// While this is technically correct, it confuses lots of the analysis.
// Change half of them to start with a fake "XX" byte.
if op == "XCHG" && !strings.HasPrefix(args[0], "r/") && !strings.HasSuffix(args[0], "op") {
inst.opcode = "XX " + inst.opcode
}
// Intel manual is not great about disabling REX instructions on 32-bit systems.
if strings.Contains(inst.opcode, "REX") && inst.valid32 == "V" {
inst.valid32 = "N.E."
}
if inst.valid32 == "V" {
switch {
case containsAll(inst.compat, "not supported", "earlier than the Intel486"):
inst.cpuid = "486"
case containsAll(inst.compat, "not supported", "earlier than the Pentium"),
containsAll(inst.compat, "were introduced", "with the Pentium"):
inst.cpuid = "Pentium"
case containsAll(inst.compat, "were introduced", "in the Pentium II"):
inst.cpuid = "PentiumII"
case containsAll(inst.compat, "were introduced", "in the P6 family"),
containsAll(inst.compat, "were introduced in P6 family"):
addTag(inst, "P6")
}
}
if onlySomePages {
op, _ := splitSyntax(inst.syntax)
haveOp[op] = true
}
}
insts = out
sort.Sort(byOpcode(insts))
// Detect operand size dependencies.
var last *instruction
for _, inst := range insts {
if last != nil {
f1, _ := splitOpcode(last.opcode)
f2, _ := splitOpcode(inst.opcode)
if f1 == f2 {
// Conflict: cannot distinguish instructions based on fixed prefix.
if is16vs32pair(last, inst) {
addTag(last, "operand16")
addTag(inst, "operand32")
continue
}
if is16vs32pair(inst, last) {
addTag(last, "operand32")
addTag(inst, "operand16")
last = inst
continue
}
}
}
last = inst
}
// Detect pseudo-ops, defined as opcode entries subsumed by more general ones.
seen := map[string]*instruction{}
for _, inst := range insts {
if strings.HasPrefix(inst.opcode, "9B ") { // FWAIT prefix
addTag(inst, "pseudo")
continue
}
if inst.opcode == "F0" || inst.opcode == "F2" || inst.opcode == "F3" {
addTag(inst, "pseudo")
continue
}
if strings.HasPrefix(inst.syntax, "REP ") || strings.HasPrefix(inst.syntax, "REPE ") || strings.HasPrefix(inst.syntax, "REPNE ") {
addTag(inst, "pseudo")
continue
}
if strings.HasPrefix(inst.syntax, "SAL ") { // SHL is canonical
addTag(inst, "pseudo")
continue
}
if old := seen[inst.opcode]; old != nil {
if condLess(old.syntax, inst.syntax) {
addTag(inst, "pseudo")
continue
}
if xchgLess(inst.syntax, old.syntax) {
old.tags = append(old.tags, "pseudo")
seen[inst.opcode] = inst
continue
}
}
seen[inst.opcode] = inst
if last != nil && canGenerate(last.opcode, inst.opcode) {
addTag(inst, "pseudo")
continue
}
last = inst
}
for _, inst := range insts {
if strings.Contains(inst.opcode, "REX ") {
if old := seen[strings.Replace(inst.opcode, "REX ", "", 1)]; old != nil && old.syntax == inst.syntax {
addTag(inst, "pseudo64")
continue
} else if old != nil && hasTag(old, "pseudo") {
addTag(inst, "pseudo")
continue
}
}
if strings.Contains(inst.opcode, "REX.W ") {
if old := seen[strings.Replace(inst.opcode, "REX.W ", "", -1)]; old != nil && old.syntax == inst.syntax {
addTag(old, "ignoreREXW")
addTag(inst, "pseudo")
continue
} else if old != nil && hasTag(old, "pseudo") {
addTag(inst, "pseudo")
continue
} else if old != nil && !hasTag(old, "operand16") && !hasTag(old, "operand32") {
// There is a 64-bit form of this instruction.
// Mark this one as only valid in the non-64-bit operand modes.
addTag(old, "operand16")
addTag(old, "operand32")
continue
}
}
}
// Undo XCHG hack above.
for _, inst := range insts {
if strings.HasPrefix(inst.opcode, "XX ") {
inst.opcode = strings.TrimPrefix(inst.opcode, "XX ")
addTag(inst, "pseudo")
removeTag(inst, "pseudo64")
}
}
// Last ditch effort. Manual fixes.
// Some things are too hard to infer.
for _, inst := range insts {
for _, fix := range fixup[[2]string{inst.syntax, inst.opcode}] {
fix(inst)
}
sort.Strings(inst.tags)
}
sort.Sort(bySeq(insts))
if onlySomePages {
for _, inst := range extraInsts {
op, _ := splitSyntax(inst.syntax)
if haveOp[op] {
insts = append(insts, inst)
}
}
} else {
insts = append(insts, extraInsts...)
}
return insts
}
func hasTag(inst *instruction, tag string) bool {
for _, t := range inst.tags {
if t == tag {
return true
}
}
return false
}
func removeTag(inst *instruction, tag string) {
if !hasTag(inst, tag) {
return
}
out := inst.tags[:0]
for _, t := range inst.tags {
if t != tag {
out = append(out, t)
}
}
inst.tags = out
}
func addTag(inst *instruction, tag string) {
if !hasTag(inst, tag) {
inst.tags = append(inst.tags, tag)
}
}
type byOpcode []*instruction
func (x byOpcode) Len() int { return len(x) }
func (x byOpcode) Swap(i, j int) { x[i], x[j] = x[j], x[i] }
func (x byOpcode) Less(i, j int) bool {
if x[i].opcode != x[j].opcode {
return opcodeLess(x[i].opcode, x[j].opcode)
}
if condLess(x[i].syntax, x[j].syntax) {
return true
}
if condLess(x[j].syntax, x[i].syntax) {
return false
}
if x[i].syntax != x[j].syntax {
return x[i].syntax < x[j].syntax
}
return x[i].seq < x[j].seq
}
type bySeq []*instruction
func (x bySeq) Len() int { return len(x) }
func (x bySeq) Swap(i, j int) { x[i], x[j] = x[j], x[i] }
func (x bySeq) Less(i, j int) bool {
return x[i].seq < x[j].seq
}
type bySyntax []*instruction
func (x bySyntax) Len() int { return len(x) }
func (x bySyntax) Swap(i, j int) { x[i], x[j] = x[j], x[i] }
func (x bySyntax) Less(i, j int) bool {
if x[i].syntax != x[j].syntax {
return x[i].syntax < x[j].syntax
}
return x[i].opcode < x[j].opcode
}
// condLess reports whether the conditional instruction syntax
// x should be considered less than y.
// We sort condition codes we prefer ahead of condition codes we don't,
// so that the latter are recorded as the pseudo-operations.
func condLess(x, y string) bool {
x, _ = splitSyntax(x)
y, _ = splitSyntax(y)
for _, pref := range condPrefs {
if strings.HasSuffix(x, pref[0]) && strings.HasSuffix(y, pref[1]) && strings.TrimSuffix(x, pref[0]) == strings.TrimSuffix(y, pref[1]) {
return true
}
}
return false
}
// xchgLess reports whether the xchg instruction x should be considered less than y.
func xchgLess(x, y string) bool {
return strings.HasPrefix(x, "XCHG ") && x > y
}
// opcodeLess reports whether opcode string x should be considered less than y.
// We sort wildcard fields like "ib" before literal bytes like "0A".
func opcodeLess(x, y string) bool {
for i := 0; i < len(x) || i < len(y); i++ {
if i >= len(x) {
return true
}
if i >= len(y) {
return false
}
if x[i] != y[i] {
// sort word before doubleword
if x[i] == 'w' && y[i] == 'd' {
return true
}
if x[i] == 'd' && y[i] == 'w' {
return false
}
// Sort lower-case before non-lower-case.
// This sorts "ib" before literal bytes like "0A", for example.
return x[i]-'a' < y[i]-'a'
}
}
return false
}
// splitOpcode splits an opcode into its fixed and variable portions.
// For example "05 iw" splits into "05" and "iw".
func splitOpcode(x string) (fixed, variable string) {
i := 0
for i < len(x) {
c := x[i]
if '0' <= c && c <= '9' || 'A' <= c && c <= 'Z' || c == ' ' || c == '.' || c == '+' {
i++
continue
}
if i+2 <= len(x) && c == '/' {
i += 2
continue
}
break
}
return strings.TrimSpace(x[:i]), x[i:]
}
// canGenerate reports whether opcode string x can generate opcode string y.
// For example "D5 ib" can generate "D5 0A".
// Any string x is not considered to generate itself.
func canGenerate(x, y string) bool {
i := 0
for i < len(x) && i < len(y) && x[i] == y[i] {
i++
}
if i == len(x) || i == len(y) {
return false
}
switch x[i:] {
case "ib":
return len(y[i:]) == 2 && allHex(y[i:])
case "0+i":
return len(y[i:]) == 1 && '0' <= y[i] && y[i] <= '7'
case "8+i":
return len(y[i:]) == 1 && (y[i] == '8' || y[i] == '9' || 'A' <= y[i] && y[i] <= 'F')
}
return false
}
// allHex reports whether s is entirely hex digits.
func allHex(s string) bool {
for _, c := range s {
if '0' <= c && c <= '9' || 'A' <= c && c <= 'F' {
continue
}
return false
}
return true
}
// is16vs32pair reports whether x and y are the 16- and 32-bit variants of the same instruction,
// based on analysis of the mnemonic syntax.
func is16vs32pair(x, y *instruction) bool {
return conv16.Replace(x.syntax) == y.syntax ||
strings.Replace(x.syntax, "r16, r/", "r32, r32/", -1) == y.syntax || // LSL etc
strings.Replace(x.syntax, "r16", "r32", 1) == y.syntax // MOVSXD, MOVSX, etc
}
func containsAll(x string, targ ...string) bool {
for _, y := range targ {
i := strings.Index(x, y)
if i < 0 {
return false
}
x = x[i+len(y):]
}
return true
}