x86/x86spec/xeddata: add XED datafiles reader xeddata package implements Intel XED datafiles reader. The implementation is very x86spec-specific as it implements only a few data types, enumerations and file format parsers that are required to generate "x86.csv" from XED tables. See "doc.go" for detailed package overview. The XED version (commit hash) specified in doc.go is not strict dependency, but rather a hint that helps to select revision that is guaranteed to work. Change-Id: I848eeb159d9353d533f13bbf0e83a47a5f7772ce Reviewed-on: https://go-review.googlesource.com/88015 Run-TryBot: Iskander Sharipov <iskander.sharipov@intel.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Cherry Zhang <cherryyz@google.com>

commit: afdde57c62b3d0a4931f5e6f8091f0336eaba828 [log] [tgz]
author: isharipo <iskander.sharipov@intel.com> Wed Jan 17 13:39:34 2018 +0300
committer: Ilya Tocar <ilya.tocar@intel.com> Tue Apr 03 14:27:31 2018 +0000
tree: 535a9fe2b862c171c939914fd6cff450a240ab44
parent: a91cccae75b8ebb5c8303204e33e61ea84c7aea3 [diff]
diff --git a/x86/x86spec/xeddata/database.go b/x86/x86spec/xeddata/database.go
new file mode 100644
index 0000000..115e22e
--- /dev/null
+++ b/x86/x86spec/xeddata/database.go

@@ -0,0 +1,270 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package xeddata
+
+import (
+	"bytes"
+	"fmt"
+	"io"
+	"io/ioutil"
+	"os"
+	"path/filepath"
+	"regexp"
+	"strings"
+)
+
+// Types for XED enum-like constants.
+type (
+	// OperandSizeMode describes operand size mode (66H prefix).
+	OperandSizeMode int
+
+	// AddressSizeMode describes address size mode (67H prefix).
+	AddressSizeMode int
+
+	// CPUMode describes availability in certain CPU mode.
+	CPUMode int
+)
+
+// Possible operand size modes. XED calls it OSZ.
+const (
+	OpSize16 OperandSizeMode = iota
+	OpSize32
+	OpSize64
+)
+
+// Possible address size modes. XED calls it ASZ.
+const (
+	AddrSize16 AddressSizeMode = iota
+	AddrSize32
+	AddrSize64
+)
+
+// Possible CPU modes. XED calls it MODE.
+const (
+	Mode16 CPUMode = iota
+	Mode32
+	Mode64
+)
+
+var sizeStrings = [...]string{"16", "32", "64"}
+
+// sizeString maps size enumeration value to it's string representation.
+func sizeString(size int) string {
+	// Panic more gracefully than with "index out of range".
+	// If client code specified invalid size enumeration,
+	// this is programming error that should be fixed, not "handled".
+	if size >= len(sizeStrings) {
+		panic(fmt.Sprintf("illegal size value: %d", size))
+	}
+	return sizeStrings[size]
+}
+
+// String returns osz bit size string. Panics on illegal enumerations.
+func (osz OperandSizeMode) String() string { return sizeString(int(osz)) }
+
+// String returns asz bit size string. Panics on illegal enumerations.
+func (asz AddressSizeMode) String() string { return sizeString(int(asz)) }
+
+// Database holds information that is required to
+// properly handle XED datafiles.
+type Database struct {
+	widths map[string]*width // all-widths.txt
+	states map[string]string // all-state.txt
+	xtypes map[string]*xtype // all-element-types.txt
+}
+
+// width is a "all-width.txt" record.
+type width struct {
+	// Default xtype name (examples: int, i8, f32).
+	xtype string
+
+	// 16, 32 and 64 bit sizes (all may have same value).
+	sizes [3]string
+}
+
+// xtype is a "all-element-type.txt" record.
+type xtype struct {
+	// Name is xtype identifier.
+	name string
+
+	// baseType specifies xtype base type.
+	// See "all-element-type-base.txt".
+	baseType string
+
+	// Size is an operand data size in bits.
+	size string
+}
+
+// NewDatabase returns Database that loads everything
+// it can find in xedPath.
+// Missing lookup file is not an error, but error during
+// parsing of found file is.
+//
+// Lookup:
+//	"$xedPath/all-state.txt" => db.LoadStates()
+//	"$xedPath/all-widths.txt" => db.LoadWidths()
+//	"$xedPath/all-element-types.txt" => db.LoadXtypes()
+// $xedPath is the interpolated value of function argument.
+//
+// The call NewDatabase("") is valid and returns empty database.
+// Load methods can be used to read lookup files one-by-one.
+func NewDatabase(xedPath string) (*Database, error) {
+	var db Database
+
+	states, err := os.Open(filepath.Join(xedPath, "all-state.txt"))
+	if err == nil {
+		err = db.LoadStates(states)
+		if err != nil {
+			return &db, err
+		}
+	}
+
+	widths, err := os.Open(filepath.Join(xedPath, "all-widths.txt"))
+	if err == nil {
+		err = db.LoadWidths(widths)
+		if err != nil {
+			return &db, err
+		}
+	}
+
+	xtypes, err := os.Open(filepath.Join(xedPath, "all-element-types.txt"))
+	if err == nil {
+		err = db.LoadXtypes(xtypes)
+		if err != nil {
+			return &db, err
+		}
+	}
+
+	return &db, nil
+}
+
+// LoadWidths reads XED widths definitions from r and updates db.
+// "widths" are 16/32/64 bit mode type sizes.
+// See "$XED/obj/dgen/all-widths.txt".
+func (db *Database) LoadWidths(r io.Reader) error {
+	var err error
+	db.widths, err = parseWidths(r)
+	return err
+}
+
+// LoadStates reads XED states definitions from r and updates db.
+// "states" are simple macro substitutions without parameters.
+// See "$XED/obj/dgen/all-state.txt".
+func (db *Database) LoadStates(r io.Reader) error {
+	var err error
+	db.states, err = parseStates(r)
+	return err
+}
+
+// LoadXtypes reads XED xtypes definitions from r and updates db.
+// "xtypes" are low-level XED type names.
+// See "$XED/obj/dgen/all-element-types.txt".
+// See "$XED/obj/dgen/all-element-type-base.txt".
+func (db *Database) LoadXtypes(r io.Reader) error {
+	var err error
+	db.xtypes, err = parseXtypes(r)
+	return err
+}
+
+// WidthSize translates width string to size string using desired
+// SizeMode m. For some widths output is the same for any valid value of m.
+func (db *Database) WidthSize(width string, m OperandSizeMode) string {
+	info := db.widths[width]
+	if info == nil {
+		return ""
+	}
+	return info.sizes[m]
+}
+
+func parseWidths(r io.Reader) (map[string]*width, error) {
+	data, err := ioutil.ReadAll(r)
+	if err != nil {
+		return nil, fmt.Errorf("parse widths: %v", err)
+	}
+
+	// Lines have two forms:
+	// 1. name xtype size [# comment]
+	// 2. name xtype size16, size32, size64 [# comment]
+	reLine := regexp.MustCompile(`(^\s*\w+\s+\w+\s+\w+\s+\w+\s+\w+)|(^\s*\w+\s+\w+\s+\w+)`)
+
+	widths := make(map[string]*width, 128)
+	for _, l := range bytes.Split(data, []byte("\n")) {
+		var name, xtype, size16, size32, size64 string
+
+		if m := reLine.FindSubmatch(l); m != nil {
+			var f [][]byte
+			if m[1] != nil {
+				f = bytes.Fields(m[1])
+			} else {
+				f = bytes.Fields(m[2])
+			}
+
+			name = string(f[0])
+			xtype = string(f[1])
+			if len(f) > 3 {
+				size16 = string(f[2])
+				size32 = string(f[3])
+				size64 = string(f[4])
+			} else {
+				size16 = string(f[2])
+				size32 = size16
+				size64 = size16
+			}
+		}
+		if name != "" {
+			widths[name] = &width{
+				xtype: xtype,
+				sizes: [3]string{size16, size32, size64},
+			}
+		}
+	}
+
+	return widths, nil
+}
+
+func parseStates(r io.Reader) (map[string]string, error) {
+	data, err := ioutil.ReadAll(r)
+	if err != nil {
+		return nil, fmt.Errorf("parse states: %v", err)
+	}
+
+	// Lines have form of "name ...replacements [# comment]".
+	// This regexp captures the name and everything until line end or comment.
+	lineRE := regexp.MustCompile(`^\s*(\w+)\s+([^#]+)`)
+
+	states := make(map[string]string, 128)
+	for _, l := range strings.Split(string(data), "\n") {
+		if m := lineRE.FindStringSubmatch(l); m != nil {
+			name, replacements := m[1], m[2]
+			states[name] = strings.TrimSpace(replacements)
+		}
+	}
+
+	return states, nil
+}
+
+func parseXtypes(r io.Reader) (map[string]*xtype, error) {
+	data, err := ioutil.ReadAll(r)
+	if err != nil {
+		return nil, fmt.Errorf("parse xtypes: %v", err)
+	}
+
+	// Lines have form of "name baseType size [# comment]".
+	lineRE := regexp.MustCompile(`^\s*(\w+)\s+(\w+)\s*(\d+)`)
+
+	xtypes := make(map[string]*xtype)
+	for _, l := range strings.Split(string(data), "\n") {
+		if m := lineRE.FindStringSubmatch(l); m != nil {
+			name, baseType, size := m[1], m[2], m[3]
+			xtypes[name] = &xtype{
+				name:     name,
+				baseType: baseType,
+				size:     size,
+			}
+		}
+	}
+
+	return xtypes, nil
+}

diff --git a/x86/x86spec/xeddata/doc.go b/x86/x86spec/xeddata/doc.go
new file mode 100644
index 0000000..8022857
--- /dev/null
+++ b/x86/x86spec/xeddata/doc.go

@@ -0,0 +1,49 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Package xeddata provides utilities to work with XED datafiles.
+//
+// Main features:
+//	* Fundamental XED enumerations (CPU modes, operand sizes, ...)
+//	* XED objects and their components
+//	* XED datafiles reader (see below)
+//	* Utility functions like ExpandStates
+//
+// The amount of file formats that is understood is a minimal
+// set required to generate x86.csv from XED tables:
+//	* states - simple macro substitutions used in patterns
+//	* widths - mappings from width names to their size
+//	* element-types - XED xtype information
+//	* objects - XED objects that constitute "the tables"
+// Collectively, those files are called "datafiles".
+//
+// Terminology is borrowed from XED itself,
+// where appropriate, x86csv names are provided
+// as an alternative.
+//
+// "$XED/foo/bar.txt" notation is used to specify a path to "foo/bar.txt"
+// file under local XED source repository folder.
+//
+// The default usage scheme:
+//	1. Open "XED database" to load required metadata.
+//	2. Read XED file with objects definitions.
+//	3. Operate on XED objects.
+//
+// See example_test.go for complete examples.
+//
+// It is required to build Intel XED before attempting to use
+// it's datafiles, as this package expects "all" versions that
+// are a concatenated final versions of datafiles.
+// If "$XED/obj/dgen/" does not contain relevant files,
+// then either this documentation is stale or your XED is not built.
+//
+// To see examples of "XED objects" see "testdata/xed_objects.txt".
+//
+// Intel XED https://github.com/intelxed/xed provides all documentation
+// that can be required to understand datafiles.
+// The "$XED/misc/engineering-notes.txt" is particullary useful.
+// For convenience, the most important notes are spread across package comments.
+//
+// Tested with XED 088c48a2efa447872945168272bcd7005a7ddd91.
+package xeddata

diff --git a/x86/x86spec/xeddata/example_test.go b/x86/x86spec/xeddata/example_test.go
new file mode 100644
index 0000000..d9496da
--- /dev/null
+++ b/x86/x86spec/xeddata/example_test.go

@@ -0,0 +1,180 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package xeddata_test
+
+import (
+	"fmt"
+	"log"
+	"strings"
+
+	"golang.org/x/arch/x86/x86spec/xeddata"
+)
+
+// The "testdata/xedpath" directory contains XED metadata files
+// that are supposed to be used for Database initialization.
+
+// Note that XED objects in this file are not real,
+// instructions they describe are fictional.
+
+// This example shows how to print raw XED objects using Reader.
+// Objects are called "raw" because some of their fields may
+// require additional transformations like macro (states) expansion.
+func ExampleReader() {
+	const xedPath = "testdata/xedpath"
+
+	input := strings.NewReader(`
+{
+ICLASS: VEXADD
+EXCEPTIONS: avx-type-zero
+CPL: 2000
+CATEGORY: AVX-Q
+EXTENSION: AVX-Q
+ATTRIBUTES: A B C
+PATTERN: VV1 0x07 VL128 V66 V0F MOD[mm] MOD!=3 REG[rrr] RM[nnn] MODRM()
+OPERANDS: REG0=XMM_R():w:width_dq:fword64 REG1=XMM_N():r:width_dq:fword64 MEM0:r:width_dq:fword64
+}
+
+{
+ICLASS: COND_MOV_Z
+CPL: 210
+CATEGORY: MOV_IF_COND_MET
+EXTENSION: BASE
+ISA_SET: COND_MOV
+FLAGS: READONLY [ zf-tst ]
+
+PATTERN: 0x0F 0x4F MOD[mm] MOD!=3 REG[rrr] RM[nnn] MODRM()
+OPERANDS: REG0=GPRv_R():cw MEM0:r:width_v
+PATTERN: 0x0F 0x4F MOD[0b11] MOD=3 REG[rrr] RM[nnn]
+OPERANDS: REG0=GPRv_R():cw REG1=GPRv_B():r
+}`)
+
+	objects, err := xeddata.NewReader(input).ReadAll()
+	if err != nil {
+		log.Fatal(err)
+	}
+
+	for _, o := range objects {
+		fmt.Printf("%s (%s):\n", o.Opcode(), o.Extension)
+		for _, inst := range o.Insts {
+			fmt.Printf("\t[%d] %s\n", inst.Index, inst.Operands)
+		}
+	}
+
+	//Output:
+	// VEXADD (AVX-Q):
+	// 	[0] REG0=XMM_R():w:width_dq:fword64 REG1=XMM_N():r:width_dq:fword64 MEM0:r:width_dq:fword64
+	// COND_MOV_Z (BASE):
+	// 	[0] REG0=GPRv_R():cw MEM0:r:width_v
+	// 	[1] REG0=GPRv_R():cw REG1=GPRv_B():r
+}
+
+// This example shows how to use ExpandStates and it's effects.
+func ExampleExpandStates() {
+	const xedPath = "testdata/xedpath"
+
+	input := strings.NewReader(`
+{
+ICLASS: VEXADD
+CPL: 3
+CATEGORY: ?
+EXTENSION: ?
+ATTRIBUTES: AT_A AT_B
+
+PATTERN: _M_VV_TRUE 0x58  _M_VEX_P_66 _M_VLEN_128 _M_MAP_0F MOD[mm] MOD!=3 REG[rrr] RM[nnn] MODRM()
+OPERANDS: REG0=XMM_R():w:width_dq:fword64 REG1=XMM_N():r:width_dq:fword64 MEM0:r:width_dq:fword64
+
+PATTERN: _M_VV_TRUE 0x58  _M_VEX_P_66 _M_VLEN_128 _M_MAP_0F MOD[0b11] MOD=3 REG[rrr] RM[nnn]
+OPERANDS: REG0=XMM_R():w:width_dq:fword64 REG1=XMM_N():r:width_dq:fword64 REG2=XMM_B():r:width_dq:fword64
+
+PATTERN: _M_VV_TRUE 0x58  _M_VEX_P_66 _M_VLEN_256 _M_MAP_0F MOD[mm] MOD!=3 REG[rrr] RM[nnn] MODRM()
+OPERANDS: REG0=YMM_R():w:qq:fword64 REG1=YMM_N():r:qq:fword64 MEM0:r:qq:fword64
+
+PATTERN: _M_VV_TRUE 0x58  _M_VEX_P_66 _M_VLEN_256 _M_MAP_0F MOD[0b11] MOD=3 REG[rrr] RM[nnn]
+OPERANDS: REG0=YMM_R():w:qq:fword64 REG1=YMM_N():r:qq:fword64 REG2=YMM_B():r:qq:fword64
+}`)
+
+	objects, err := xeddata.NewReader(input).ReadAll()
+	if err != nil {
+		log.Fatal(err)
+	}
+	db, err := xeddata.NewDatabase(xedPath)
+	if err != nil {
+		log.Fatal(err)
+	}
+
+	for _, o := range objects {
+		for _, inst := range o.Insts {
+			fmt.Printf("old: %q\n", inst.Pattern)
+			fmt.Printf("new: %q\n", xeddata.ExpandStates(db, inst.Pattern))
+		}
+	}
+
+	//Output:
+	// old: "_M_VV_TRUE 0x58  _M_VEX_P_66 _M_VLEN_128 _M_MAP_0F MOD[mm] MOD!=3 REG[rrr] RM[nnn] MODRM()"
+	// new: "VEXVALID=1 0x58 VEX_PREFIX=1 VL=0 MAP=1 MOD[mm] MOD!=3 REG[rrr] RM[nnn] MODRM()"
+	// old: "_M_VV_TRUE 0x58  _M_VEX_P_66 _M_VLEN_128 _M_MAP_0F MOD[0b11] MOD=3 REG[rrr] RM[nnn]"
+	// new: "VEXVALID=1 0x58 VEX_PREFIX=1 VL=0 MAP=1 MOD[0b11] MOD=3 REG[rrr] RM[nnn]"
+	// old: "_M_VV_TRUE 0x58  _M_VEX_P_66 _M_VLEN_256 _M_MAP_0F MOD[mm] MOD!=3 REG[rrr] RM[nnn] MODRM()"
+	// new: "VEXVALID=1 0x58 VEX_PREFIX=1 VL=1 MAP=1 MOD[mm] MOD!=3 REG[rrr] RM[nnn] MODRM()"
+	// old: "_M_VV_TRUE 0x58  _M_VEX_P_66 _M_VLEN_256 _M_MAP_0F MOD[0b11] MOD=3 REG[rrr] RM[nnn]"
+	// new: "VEXVALID=1 0x58 VEX_PREFIX=1 VL=1 MAP=1 MOD[0b11] MOD=3 REG[rrr] RM[nnn]"
+}
+
+// This example shows how to handle Inst "OPERANDS" field.
+func ExampleOperand() {
+	const xedPath = "testdata/xedpath"
+
+	input := strings.NewReader(`
+{
+ICLASS: ADD_N_TIMES # Like IMUL
+CPL: 3
+CATEGORY: BINARY
+EXTENSION: BASE
+ISA_SET: I86
+FLAGS: MUST [ of-mod sf-u zf-u af-u pf-u cf-mod ]
+
+PATTERN: 0xAA MOD[mm] MOD!=3 REG[0b101] RM[nnn] MODRM()
+OPERANDS: MEM0:r:width_v REG0=AX:rw:SUPP REG1=DX:w:SUPP
+}`)
+
+	objects, err := xeddata.NewReader(input).ReadAll()
+	if err != nil {
+		log.Fatal(err)
+	}
+	db, err := xeddata.NewDatabase(xedPath)
+	if err != nil {
+		log.Fatal(err)
+	}
+
+	inst := objects[0].Insts[0] // Single instruction is enough for this example
+	for i, rawOperand := range strings.Fields(inst.Operands) {
+		operand, err := xeddata.NewOperand(db, strings.Split(rawOperand, ":"))
+		if err != nil {
+			log.Fatalf("parse operand #%d: %+v", i, err)
+		}
+
+		visibility := "implicit"
+		if operand.IsVisible() {
+			visibility = "explicit"
+		}
+		fmt.Printf("(%s) %s:\n", visibility, rawOperand)
+
+		fmt.Printf("\tname: %q\n", operand.Name)
+		if operand.IsVisible() {
+			fmt.Printf("\t32/64bit width: %s/%s bytes\n",
+				db.WidthSize(operand.Width, xeddata.OpSize32),
+				db.WidthSize(operand.Width, xeddata.OpSize64))
+		}
+	}
+
+	//Output:
+	// (explicit) MEM0:r:width_v:
+	// 	name: "MEM0"
+	// 	32/64bit width: 4/8 bytes
+	// (implicit) REG0=AX:rw:SUPP:
+	// 	name: "REG0=AX"
+	// (implicit) REG1=DX:w:SUPP:
+	// 	name: "REG1=DX"
+}

diff --git a/x86/x86spec/xeddata/object.go b/x86/x86spec/xeddata/object.go
new file mode 100644
index 0000000..4a73973
--- /dev/null
+++ b/x86/x86spec/xeddata/object.go

@@ -0,0 +1,261 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package xeddata
+
+import (
+	"encoding/json"
+	"strings"
+)
+
+// An Object is a single "dec/enc-instruction" XED object from datafiles.
+//
+// Field names and their comments are borrowed from Intel XED
+// engineering notes (see "$XED/misc/engineering-notes.txt").
+//
+// Field values are always trimmed (i.e. no leading/trailing whitespace).
+//
+// Missing optional members are expressed with an empty string.
+//
+// Object contains multiple Inst elements that represent concrete
+// instruction with encoding pattern and operands description.
+type Object struct {
+	// Iclass is instruction class name (opcode).
+	// Iclass alone is not enough to uniquely identify machine instructions.
+	// Example: "PSRLW".
+	Iclass string
+
+	// Disasm is substituted name when a simple conversion
+	// from iclass is inappropriate.
+	// Never combined with DisasmIntel or DisasmATTSV.
+	// Example: "syscall".
+	//
+	// Optional.
+	Disasm string
+
+	// DisasmIntel is like Disasm, but with Intel syntax.
+	// If present, usually comes with DisasmATTSV.
+	// Example: "jmp far".
+	//
+	// Optional.
+	DisasmIntel string
+
+	// DisasmATTSV is like Disasm, but with AT&T/SysV syntax.
+	// If present, usually comes with DisasmIntel.
+	// Example: "ljmp".
+	//
+	// Optional.
+	DisasmATTSV string
+
+	// Attributes describes name set for bits in the binary attributes field.
+	// Example: "NOP X87_CONTROL NOTSX".
+	//
+	// Optional. If not present, zero attribute set is implied.
+	Attributes string
+
+	// Uname is unique name used for deleting / replacing instructions.
+	//
+	// Optional. Provided for completeness, mostly useful for XED internal usage.
+	Uname string
+
+	// CPL is instruction current privilege level restriction.
+	// Can have value of "0" or "3".
+	CPL string
+
+	// Category is an ad-hoc categorization of instructions.
+	// Example: "SEMAPHORE".
+	Category string
+
+	// Extension is an ad-hoc grouping of instructions.
+	// If no ISASet is specified, this is used instead.
+	// Example: "3DNOW"
+	Extension string
+
+	// Exceptions is an exception set name.
+	// Example: "SSE_TYPE_7".
+	//
+	// Optional. Empty exception category generally means that
+	// instruction generates no exceptions.
+	Exceptions string
+
+	// ISASet is a name for the group of instructions that
+	// introduced this feature.
+	// Example: "I286PROTECTED".
+	//
+	// Older objects only defined Extension field.
+	// Newer objects may contain both Extension and ISASet fields.
+	// For some objects Extension==ISASet.
+	// Both fields are required to do precise CPUID-like decisions.
+	//
+	// Optional.
+	ISASet string
+
+	// Flags describes read/written flag bit values.
+	// Example: "MUST [ of-u sf-u af-u pf-u cf-mod ]".
+	//
+	// Optional. If not present, no flags are neither read nor written.
+	Flags string
+
+	// A hopefully useful comment.
+	//
+	// Optional.
+	Comment string
+
+	// The object revision.
+	//
+	// Optional.
+	Version string
+
+	// RealOpcode marks unstable (not in SDM yet) instructions with "N".
+	// Normally, always "Y" or not present at all.
+	//
+	// Optional.
+	RealOpcode string
+
+	// Insts are concrete instruction templates that are derived from containing Object.
+	// Inst contains fields PATTERN, OPERANDS, IFORM in enc/dec instruction.
+	Insts []*Inst
+}
+
+// Inst represents a single instruction template.
+//
+// Some templates contain expandable (macro) pattern and operands
+// which tells that there are more than one real instructions
+// that are expressed by the template.
+type Inst struct {
+	// Object that contains properties that are shared with multiple
+	// Inst objects.
+	*Object
+
+	// Index is the position inside XED object.
+	// Object.Insts[Index] returns this inst.
+	Index int
+
+	// Pattern is the sequence of bits and nonterminals used to
+	// decode/encode an instruction.
+	// Example: "0x0F 0x28 no_refining_prefix MOD[0b11] MOD=3 REG[rrr] RM[nnn]".
+	Pattern string
+
+	// Operands are instruction arguments, typicall registers,
+	// memory operands and pseudo-resources. Separated by space.
+	// Example: "MEM0:rcw:b REG0=GPR8_R():r REG1=XED_REG_AL:rcw:SUPP".
+	Operands string
+
+	// Iform is a name for the pattern that starts with the
+	// iclass and bakes in the operands. If omitted, XED
+	// tries to generate one. We often add custom suffixes
+	// to these to disambiguate certain combinations.
+	// Example: "MOVAPS_XMMps_XMMps_0F28".
+	//
+	// Optional.
+	Iform string
+}
+
+// Opcode returns instruction name or empty string,
+// if appropriate Object fields are not initialized.
+func (o *Object) Opcode() string {
+	switch {
+	case o.Iclass != "":
+		return o.Iclass
+	case o.Disasm != "":
+		return o.Disasm
+	case o.DisasmIntel != "":
+		return o.DisasmIntel
+	case o.DisasmATTSV != "":
+		return o.DisasmATTSV
+	case o.Uname != "":
+		return o.Uname
+	}
+	return ""
+}
+
+// HasAttribute checks that o has attribute with specified name.
+// Note that check is done at "word" level, substring names will not match.
+func (o *Object) HasAttribute(name string) bool {
+	return containsWord(o.Attributes, name)
+}
+
+// String returns pretty-printed inst representation.
+//
+// Outputs valid JSON string. This property is
+// not guaranteed to be preserved.
+func (inst *Inst) String() string {
+	// Do not use direct inst marshalling to achieve
+	// flat object printed representation.
+	// Map is avoided to ensure consistent props order.
+	type flatObject struct {
+		Iclass      string
+		Disasm      string `json:",omitempty"`
+		DisasmIntel string `json:",omitempty"`
+		DisasmATTSV string `json:",omitempty"`
+		Attributes  string `json:",omitempty"`
+		Uname       string `json:",omitempty"`
+		CPL         string
+		Category    string
+		Extension   string
+		Exceptions  string `json:",omitempty"`
+		ISASet      string `json:",omitempty"`
+		Flags       string `json:",omitempty"`
+		Comment     string `json:",omitempty"`
+		Version     string `json:",omitempty"`
+		RealOpcode  string `json:",omitempty"`
+		Pattern     string
+		Operands    string
+		Iform       string `json:",omitempty"`
+	}
+
+	flat := flatObject{
+		Iclass:      inst.Iclass,
+		Disasm:      inst.Disasm,
+		DisasmIntel: inst.DisasmIntel,
+		DisasmATTSV: inst.DisasmATTSV,
+		Attributes:  inst.Attributes,
+		Uname:       inst.Uname,
+		CPL:         inst.CPL,
+		Category:    inst.Category,
+		Extension:   inst.Extension,
+		Exceptions:  inst.Exceptions,
+		ISASet:      inst.ISASet,
+		Flags:       inst.Flags,
+		Comment:     inst.Comment,
+		Version:     inst.Version,
+		RealOpcode:  inst.RealOpcode,
+		Pattern:     inst.Pattern,
+		Operands:    inst.Operands,
+		Iform:       inst.Iform,
+	}
+
+	b, err := json.MarshalIndent(flat, "", "  ")
+	if err != nil {
+		panic(err)
+	}
+	return string(b)
+}
+
+// ExpandStates returns a copy of s where all state macros
+// are expanded.
+// This requires db "states" to be loaded.
+func ExpandStates(db *Database, s string) string {
+	substs := db.states
+	parts := strings.Fields(s)
+	for i := range parts {
+		if repl := substs[parts[i]]; repl != "" {
+			parts[i] = repl
+		}
+	}
+	return strings.Join(parts, " ")
+}
+
+// containsWord searches for whole word match in s.
+func containsWord(s, word string) bool {
+	i := strings.Index(s, word)
+	if i == -1 {
+		return false
+	}
+	leftOK := i == 0 ||
+		(s[i-1] == ' ')
+	rigthOK := i+len(word) == len(s) ||
+		(s[i+len(word)] == ' ')
+	return leftOK && rigthOK
+}

diff --git a/x86/x86spec/xeddata/operand.go b/x86/x86spec/xeddata/operand.go
new file mode 100644
index 0000000..0d75f58
--- /dev/null
+++ b/x86/x86spec/xeddata/operand.go

@@ -0,0 +1,159 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package xeddata
+
+import (
+	"errors"
+	"strings"
+)
+
+// OperandVisibility describes operand visibility in XED terms.
+type OperandVisibility int
+
+const (
+	// VisExplicit is a default operand visibility.
+	// Explicit operand is "real" kind of operands that
+	// is shown in syntax and can be specified by the programmer.
+	VisExplicit OperandVisibility = iota
+
+	// VisImplicit is for fixed arg (like EAX); usually shown in syntax.
+	VisImplicit
+
+	// VisSuppressed is like VisImplicit, but not shown in syntax.
+	// In some very rare exceptions, they are also shown in syntax string.
+	VisSuppressed
+
+	// VisEcond is encoder-only conditions. Can be ignored.
+	VisEcond
+)
+
+// Operand holds data that is encoded inside
+// instruction's "OPERANDS" field.
+//
+// Use NewOperand function to decode operand fields into Operand object.
+type Operand struct {
+	// Name is an ID with optional nonterminal name part.
+	//
+	// Possible values: "REG0=GPRv_B", "REG1", "MEM0", ...
+	//
+	// If nonterminal part is present, name
+	// can be split into LHS and RHS with NonTerminalName method.
+	Name string
+
+	// Action describes argument types.
+	//
+	// Possible values: "r", "w", "rw", "cr", "cw", "crw".
+	// Optional "c" prefix represents conditional access.
+	Action string
+
+	// Width descriptor. It can express simple width like "w" (word, 16bit)
+	// or meta-width like "v", which corresponds to {16, 32, 64} bits.
+	//
+	// Possible values: "", "q", "ds", "dq", ...
+	// Optional.
+	Width string
+
+	// Xtype holds XED-specific type information.
+	//
+	// Possible values: "", "f64", "i32", ...
+	// Optional.
+	Xtype string
+
+	// Attributes serves as container for all other properties.
+	//
+	// Possible values:
+	//   EVEX.b context {
+	//     TXT=ZEROSTR  - zeroing
+	//     TXT=SAESTR   - surpress all exceptions
+	//     TXT=ROUNDC   - rounding
+	//     TXT=BCASTSTR - broadcasting
+	//   }
+	//   MULTISOURCE4 - 4FMA multi-register operand.
+	//
+	// Optional. For most operands, it's nil.
+	Attributes map[string]bool
+
+	// Visibility tells if operand is explicit, implicit or suspended.
+	Visibility OperandVisibility
+}
+
+var xedVisibilities = map[string]OperandVisibility{
+	"EXPL":  VisExplicit,
+	"IMPL":  VisImplicit,
+	"SUPP":  VisSuppressed,
+	"ECOND": VisEcond,
+}
+
+// NewOperand packs operand fields into Operand.
+// Fields are colon (":") separated parts of the OPERANDS column.
+//
+// At least two fixed-position fields are expected:
+//   [0] - name
+//   [1] - r/w action
+//
+// See "$XED/pysrc/opnds.py" to learn about fields format
+// and valid combinations.
+//
+// Requires database with xtypes and widths info.
+func NewOperand(db *Database, fields []string) (Operand, error) {
+	var op Operand
+
+	if db.widths == nil {
+		return op, errors.New("Database.widths is nil")
+	}
+	if len(fields) < 2 {
+		return op, errors.New("fields should have at least 2 elements")
+	}
+
+	// First two fields are fixed and mandatory.
+	op.Name = fields[0]
+	op.Action = fields[1]
+
+	// Optional fields.
+	for _, f := range fields[2:] {
+		if db.widths[f] != nil && op.Width == "" {
+			op.Width = f
+		} else if vis, ok := xedVisibilities[f]; ok {
+			op.Visibility = vis
+		} else if xtype := db.xtypes[f]; xtype != nil {
+			op.Xtype = f
+		} else {
+			if op.Attributes == nil {
+				op.Attributes = make(map[string]bool)
+			}
+			op.Attributes[f] = true
+		}
+	}
+
+	return op, nil
+}
+
+// NonterminalName returns true if op.Name consist
+// of LHS and RHS parts.
+//
+// RHS is non-terminal name lookup function expression.
+// Example: "REG0=GPRv()" has "GPRv()" name lookup function.
+func (op *Operand) NonterminalName() bool {
+	return strings.Contains(op.Name, "=")
+}
+
+// NameLHS returns left hand side part of the non-terminal name.
+// Example: NameLHS("REG0=GPRv()") => "REG0".
+func (op *Operand) NameLHS() string {
+	return strings.Split(op.Name, "=")[0]
+}
+
+// NameRHS returns right hand side part of the non-terminal name.
+// Example: NameLHS("REG0=GPRv()") => "GPRv()".
+func (op *Operand) NameRHS() string {
+	return strings.Split(op.Name, "=")[1]
+}
+
+// IsVisible returns true for operands that are usually
+// shown in syntax strings.
+func (op *Operand) IsVisible() bool {
+	return op.Visibility == VisExplicit ||
+		op.Visibility == VisImplicit
+}

diff --git a/x86/x86spec/xeddata/reader.go b/x86/x86spec/xeddata/reader.go
new file mode 100644
index 0000000..4176b66
--- /dev/null
+++ b/x86/x86spec/xeddata/reader.go

@@ -0,0 +1,210 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package xeddata
+
+import (
+	"bufio"
+	"errors"
+	"fmt"
+	"io"
+	"regexp"
+	"strings"
+)
+
+// Reader reads enc/dec-instruction objects from XED datafile.
+type Reader struct {
+	scanner *bufio.Scanner
+
+	lines []string // Re-used between Read calls
+
+	// True if last line ends with newline escape (backslash).
+	joinLines bool
+}
+
+// NewReader returns a new Reader that reads from r.
+func NewReader(r io.Reader) *Reader {
+	return newReader(bufio.NewScanner(r))
+}
+
+func newReader(scanner *bufio.Scanner) *Reader {
+	r := &Reader{
+		lines:   make([]string, 0, 64),
+		scanner: scanner,
+	}
+	scanner.Split(r.split)
+	return r
+}
+
+// split implements bufio.SplitFunc for Reader.
+func (r *Reader) split(data []byte, atEOF bool) (int, []byte, error) {
+	// Wrapping bufio.ScanLines to handle \-style newline escapes.
+	// joinLines flag affects Reader.scanLine behavior.
+	advance, tok, err := bufio.ScanLines(data, atEOF)
+	if err == nil && len(tok) >= 1 {
+		r.joinLines = tok[len(tok)-1] == '\\'
+	}
+	return advance, tok, err
+}
+
+// Read reads single XED instruction object from
+// the stream backed by reader.
+//
+// If there is no data left to be read,
+// returned error is io.EOF.
+func (r *Reader) Read() (*Object, error) {
+	for line := r.scanLine(); line != ""; line = r.scanLine() {
+		if line[0] != '{' {
+			continue
+		}
+		lines := r.lines[:0] // Object lines
+		for line := r.scanLine(); line != ""; line = r.scanLine() {
+			if line[0] == '}' {
+				return r.parseLines(lines)
+			}
+			lines = append(lines, line)
+		}
+		return nil, errors.New("no matching '}' found")
+	}
+
+	return nil, io.EOF
+}
+
+// ReadAll reads all the remaining objects from r.
+// A successful call returns err == nil, not err == io.EOF,
+// just like csv.Reader.ReadAll().
+func (r *Reader) ReadAll() ([]*Object, error) {
+	objects := []*Object{}
+	for {
+		o, err := r.Read()
+		if err == io.EOF {
+			return objects, nil
+		}
+		if err != nil {
+			return objects, err
+		}
+		objects = append(objects, o)
+	}
+}
+
+// instLineRE matches valid XED object/inst line.
+// It expects lines that are joined by '\' to be concatenated.
+//
+// The format can be described as:
+//	unquoted field name "[A-Z_]+" (captured)
+//	field value delimiter ":"
+//	field value string (captured)
+//	optional trailing comment that is ignored "[^#]*"
+var instLineRE = regexp.MustCompile(`^([A-Z_]+)\s*:\s*([^#]*)`)
+
+// parseLines turns collected object lines into Object.
+func (r *Reader) parseLines(lines []string) (*Object, error) {
+	o := &Object{}
+
+	// Repeatable tokens.
+	// We can not assign them eagerly, because these fields
+	// are not guaranteed to follow strict order.
+	var (
+		operands []string
+		iforms   []string
+		patterns []string
+	)
+
+	for _, l := range lines {
+		if l[0] == '#' { // Skip comment lines.
+			continue
+		}
+		m := instLineRE.FindStringSubmatch(l)
+		if len(m) == 0 {
+			return nil, fmt.Errorf("malformed line: %s", l)
+		}
+		key, val := m[1], m[2]
+		val = strings.TrimSpace(val)
+
+		switch key {
+		case "ICLASS":
+			o.Iclass = val
+		case "DISASM":
+			o.Disasm = val
+		case "DISASM_INTEL":
+			o.DisasmIntel = val
+		case "DISASM_ATTSV":
+			o.DisasmATTSV = val
+		case "ATTRIBUTES":
+			o.Attributes = val
+		case "UNAME":
+			o.Uname = val
+		case "CPL":
+			o.CPL = val
+		case "CATEGORY":
+			o.Category = val
+		case "EXTENSION":
+			o.Extension = val
+		case "EXCEPTIONS":
+			o.Exceptions = val
+		case "ISA_SET":
+			o.ISASet = val
+		case "FLAGS":
+			o.Flags = val
+		case "COMMENT":
+			o.Comment = val
+		case "VERSION":
+			o.Version = val
+		case "REAL_OPCODE":
+			o.RealOpcode = val
+
+		case "OPERANDS":
+			operands = append(operands, val)
+		case "PATTERN":
+			patterns = append(patterns, val)
+		case "IFORM":
+			iforms = append(iforms, val)
+
+		default:
+			// Being strict about unknown field names gives a nice
+			// XED file validation diagnostics.
+			// Also defends against typos in test files.
+			return nil, fmt.Errorf("unknown key token: %s", key)
+		}
+	}
+
+	if len(operands) != len(patterns) {
+		return nil, fmt.Errorf("%s: OPERANDS and PATTERN lines mismatch", o.Opcode())
+	}
+
+	insts := make([]*Inst, len(operands))
+	for i := range operands {
+		insts[i] = &Inst{
+			Object:   o,
+			Index:    i,
+			Pattern:  patterns[i],
+			Operands: operands[i],
+		}
+		// There can be less IFORMs than insts.
+		if i < len(iforms) {
+			insts[i].Iform = iforms[i]
+		}
+	}
+	o.Insts = insts
+
+	return o, nil
+}
+
+// scanLine tries to fetch non-empty line from scanner.
+//
+// Returns empty line when scanner.Scan() returns false
+// before non-empty line is found.
+func (r *Reader) scanLine() string {
+	for r.scanner.Scan() {
+		line := r.scanner.Text()
+		if line == "" {
+			continue
+		}
+		if r.joinLines {
+			return line[:len(line)-len("\\")] + r.scanLine()
+		}
+		return line
+	}
+	return ""
+}

diff --git a/x86/x86spec/xeddata/testdata/xed_objects.txt b/x86/x86spec/xeddata/testdata/xed_objects.txt
new file mode 100644
index 0000000..9d0c52e
--- /dev/null
+++ b/x86/x86spec/xeddata/testdata/xed_objects.txt

@@ -0,0 +1,289 @@
+------ empty input
+====
+[]
+
+------ only newlines
+
+
+
+====
+[]
+
+------ only comments and newlines
+# {
+# ICLASS : ADD
+# }
+====
+[]
+
+------ join lines
+{
+ICLASS : i\
+ cla\
+  ss1
+VERSION : 1.\
+0
+FLAGS:\
+\
+\
+NOP
+
+REAL_OPCODE    : \Y
+CPL            : \3
+PATTERN: A B
+OPERANDS:
+}
+====
+[{
+  "Iclass": "i cla  ss1",
+  "Version": "1.0",
+  "Flags": "NOP",
+  "RealOpcode": "\\Y",
+  "CPL": "\\3",
+  "Pattern": "A B"
+}]
+
+------ 1 variant; no iform
+{
+ICLASS:iclass1 # comment
+DISASM : disasm1
+
+PATTERN :pat1 pat1
+OPERANDS :  ops1  ops1
+}
+# comment
+
+{ # comment
+# comment
+ICLASS  : iclass2
+OPERANDS:ops2
+PATTERN:pat2 # comment
+}
+====
+[{
+  "Iclass": "iclass1",
+  "Disasm": "disasm1",
+  "Pattern": "pat1 pat1",
+  "Operands": "ops1  ops1"
+}, {
+  "Iclass": "iclass2",
+  "Operands": "ops2",
+  "Pattern": "pat2"
+}]
+
+------ 2 variants; no iform
+{
+PATTERN    : pat1_1
+COMMENT    : comment1
+OPERANDS   : ops1_1
+OPERANDS   : ops1_2
+PATTERN    : pat1_2
+}
+{
+PATTERN    : pat2_1
+PATTERN    : pat2_2
+OPERANDS   : ops2_1
+OPERANDS   : ops2_2
+}
+====
+[{
+  "Comment": "comment1",
+  "Pattern": "pat1_1",
+  "Operands": "ops1_1"
+}, {
+  "Comment": "comment1",
+  "Pattern": "pat1_2",
+  "Operands": "ops1_2"
+}, {
+  "Pattern": "pat2_1",
+  "Operands": "ops2_1"
+}, {
+  "Pattern": "pat2_2",
+  "Operands": "ops2_2"
+}]
+
+------ 3 variants
+{
+
+PATTERN  : pat1_1
+OPERANDS : ops1_1
+IFORM    : iform1_1
+
+PATTERN  : pat1_2# comment
+OPERANDS : ops1_2# comment
+IFORM    : iform1_2# comment
+
+# comment
+PATTERN  : pat1_3
+OPERANDS : ops1_3
+IFORM    : iform1_3
+
+}
+
+{
+PATTERN  : pat2_1
+OPERANDS : ops2_1
+IFORM    : iform2_1
+PATTERN  : pat2_2
+OPERANDS : ops2_2
+PATTERN  : pat2_3
+OPERANDS : ops2_3
+}
+====
+[{
+  "Iform": "iform1_1",
+  "Pattern": "pat1_1",
+  "Operands": "ops1_1"
+}, {
+  "Iform": "iform1_2",
+  "Pattern": "pat1_2",
+  "Operands": "ops1_2"
+}, {
+  "Iform": "iform1_3",
+  "Pattern": "pat1_3",
+  "Operands": "ops1_3"
+}, {
+  "Iform": "iform2_1",
+  "Pattern": "pat2_1",
+  "Operands": "ops2_1"
+}, {
+  "Pattern": "pat2_2",
+  "Operands": "ops2_2"
+}, {
+  "Pattern": "pat2_3",
+  "Operands": "ops2_3"
+}]
+
+------ stable and unstable instructions (REAL_OPCODE)
+{
+ICLASS: STABLE
+REAL_OPCODE: Y
+PATTERN : x y z
+OPERANDS :
+}
+{
+ICLASS: UNSTABLE
+REAL_OPCODE:   N
+PATTERN : x y z
+OPERANDS :
+}
+====
+[{
+  "Iclass": "STABLE",
+  "RealOpcode": "Y",
+  "Pattern": "x y z",
+  "Operands": ""
+}, {
+  "Iclass": "UNSTABLE",
+  "RealOpcode": "N",
+  "Pattern": "x y z",
+  "Operands": ""
+}]
+
+------ AVXAES objects
+# Emitting VAESENCLAST
+{
+ICLASS    : VAESENCLAST
+EXCEPTIONS: avx-type-4
+CPL       : 3
+CATEGORY  : AES
+EXTENSION : AVXAES
+PATTERN : VV1 0xDD V66 V0F38 MOD[0b11] MOD=3  REG[rrr] RM[nnn] VL128
+OPERANDS  : REG0=XMM_R():w:dq REG1=XMM_N():r:dq  REG2=XMM_B():r:dq
+PATTERN : VV1 0xDD  V66 V0F38 MOD[mm] MOD!=3 REG[rrr] RM[nnn] MODRM() VL128
+OPERANDS  : REG0=XMM_R():w:dq REG1=XMM_N():r:dq  MEM0:r:dq
+}
+# Emitting VAESDEC
+{
+ICLASS    : VAESDEC
+EXCEPTIONS: avx-type-4
+CPL       : 3
+CATEGORY  : AES
+EXTENSION : AVXAES
+PATTERN : VV1 0xDE V66 V0F38 MOD[0b11] MOD=3  REG[rrr] RM[nnn] VL128
+OPERANDS  : REG0=XMM_R():w:dq REG1=XMM_N():r:dq  REG2=XMM_B():r:dq
+PATTERN : VV1 0xDE V66 V0F38 MOD[mm] MOD!=3 REG[rrr] RM[nnn] MODRM() VL128
+OPERANDS  : REG0=XMM_R():w:dq REG1=XMM_N():r:dq  MEM0:r:dq
+}
+====
+[{
+  "Iclass": "VAESENCLAST",
+  "Exceptions": "avx-type-4",
+  "CPL": "3",
+  "Category": "AES",
+  "Extension": "AVXAES",
+  "Pattern": "VV1 0xDD V66 V0F38 MOD[0b11] MOD=3  REG[rrr] RM[nnn] VL128",
+  "Operands": "REG0=XMM_R():w:dq REG1=XMM_N():r:dq  REG2=XMM_B():r:dq"
+}, {
+  "Iclass": "VAESENCLAST",
+  "Exceptions": "avx-type-4",
+  "CPL": "3",
+  "Category": "AES",
+  "Extension": "AVXAES",
+  "Pattern": "VV1 0xDD  V66 V0F38 MOD[mm] MOD!=3 REG[rrr] RM[nnn] MODRM() VL128",
+  "Operands": "REG0=XMM_R():w:dq REG1=XMM_N():r:dq  MEM0:r:dq"
+}, {
+  "Iclass": "VAESDEC",
+  "Exceptions": "avx-type-4",
+  "CPL": "3",
+  "Category": "AES",
+  "Extension": "AVXAES",
+  "Pattern": "VV1 0xDE V66 V0F38 MOD[0b11] MOD=3  REG[rrr] RM[nnn] VL128",
+  "Operands": "REG0=XMM_R():w:dq REG1=XMM_N():r:dq  REG2=XMM_B():r:dq"
+}, {
+  "Iclass": "VAESDEC",
+  "Exceptions": "avx-type-4",
+  "CPL": "3",
+  "Category": "AES",
+  "Extension": "AVXAES",
+  "Pattern": "VV1 0xDE V66 V0F38 MOD[mm] MOD!=3 REG[rrr] RM[nnn] MODRM() VL128",
+  "Operands": "REG0=XMM_R():w:dq REG1=XMM_N():r:dq  MEM0:r:dq"
+}]
+
+------ Two-word disasm
+{
+ICLASS    : JMP_FAR
+DISASM_INTEL: jmp far
+DISASM_ATTSV: ljmp
+CPL       : 3
+CATEGORY  : UNCOND_BR
+ATTRIBUTES : FAR_XFER NOTSX
+EXTENSION : BASE
+ISA_SET   : I86
+PATTERN   : 0xEA not64 BRDISPz() UIMM16()
+OPERANDS  : PTR:r:p IMM0:r:w REG0=XED_REG_EIP:w:SUPP
+}
+====
+[{
+  "Iclass": "JMP_FAR",
+  "DisasmIntel": "jmp far",
+  "DisasmATTSV": "ljmp",
+  "CPL": "3",
+  "Attributes": "FAR_XFER NOTSX",
+  "Extension": "BASE",
+  "ISASet": "I86",
+  "Pattern": "0xEA not64 BRDISPz() UIMM16()",
+  "Operands": "PTR:r:p IMM0:r:w REG0=XED_REG_EIP:w:SUPP"
+}]
+
+------ INVALID key token
+{
+FOO : 111
+}
+====
+unknown key token: FOO
+
+------ INVALID unterminated object
+{
+====
+no matching '}' found
+
+------ INVALID pat+ops
+{
+ICLASS: foobar
+PATTERN : 1
+PATTERN : 2
+OPERANDS : 3
+}
+====
+foobar: OPERANDS and PATTERN lines mismatch

diff --git a/x86/x86spec/xeddata/testdata/xedpath/all-element-types.txt b/x86/x86spec/xeddata/testdata/xedpath/all-element-types.txt
new file mode 100644
index 0000000..b11498a
--- /dev/null
+++ b/x86/x86spec/xeddata/testdata/xedpath/all-element-types.txt

@@ -0,0 +1,5 @@
+# Copyright 2018 The Go Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style
+# license that can be found in the LICENSE file.
+
+fword64 T_DOUBLE 64

diff --git a/x86/x86spec/xeddata/testdata/xedpath/all-state.txt b/x86/x86spec/xeddata/testdata/xedpath/all-state.txt
new file mode 100644
index 0000000..0dc1080
--- /dev/null
+++ b/x86/x86spec/xeddata/testdata/xedpath/all-state.txt

@@ -0,0 +1,17 @@
+# Copyright 2018 The Go Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style
+# license that can be found in the LICENSE file.
+
+_M_VV_TRUE VEXVALID=1
+_M_VV_FALSE VEXVALID=0
+
+_M_VEX_P_66 VEX_PREFIX=1
+_M_VEX_P_F2 VEX_PREFIX=2
+_M_VEX_P_F3 VEX_PREFIX=3
+
+_M_VLEN_128 VL=0
+_M_VLEN_256 VL=1
+
+_M_MAP_0F    MAP=1
+_M_MAP_0F38  MAP=2
+_M_MAP_0F3A  MAP=3

diff --git a/x86/x86spec/xeddata/testdata/xedpath/all-widths.txt b/x86/x86spec/xeddata/testdata/xedpath/all-widths.txt
new file mode 100644
index 0000000..e03e5ea
--- /dev/null
+++ b/x86/x86spec/xeddata/testdata/xedpath/all-widths.txt

@@ -0,0 +1,8 @@
+# Copyright 2018 The Go Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style
+# license that can be found in the LICENSE file.
+
+width_dq i32 16
+width_qq i32 32
+width_v int 2 4 8
+width_f64 f64 8

diff --git a/x86/x86spec/xeddata/xeddata_test.go b/x86/x86spec/xeddata/xeddata_test.go
new file mode 100644
index 0000000..b3d93fd
--- /dev/null
+++ b/x86/x86spec/xeddata/xeddata_test.go

@@ -0,0 +1,488 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package xeddata
+
+import (
+	"bytes"
+	"encoding/json"
+	"fmt"
+	"io"
+	"io/ioutil"
+	"path"
+	"reflect"
+	"strings"
+	"testing"
+)
+
+// Small database to generate state/xtype/width input files and validate parse results.
+//
+// Tests should use only those symbols that are defined inside test maps.
+// For example, if {"foo"=>"bar"} element is not in statesMap, tests
+// can't expect that "foo" get's replaced by "bar".
+var (
+	statesMap = map[string]string{
+		"not64":         "MODE!=2",
+		"mode64":        "MODE=2",
+		"mode32":        "MODE=1",
+		"mode16":        "MODE=0",
+		"rexw_prefix":   "REXW=1 SKIP_OSZ=1",
+		"norexw_prefix": "REXW=0 SKIP_OSZ=1",
+		"W1":            "REXW=1 SKIP_OSZ=1",
+		"W0":            "REXW=0 SKIP_OSZ=1",
+		"VV1":           "VEXVALID=1",
+		"V66":           "VEX_PREFIX=1",
+		"VF2":           "VEX_PREFIX=2",
+		"VF3":           "VEX_PREFIX=3",
+		"V0F":           "MAP=1",
+		"V0F38":         "MAP=2",
+		"V0F3A":         "MAP=3",
+		"VL128":         "VL=0",
+		"VL256":         "VL=1",
+	}
+
+	xtypesMap = map[string]*xtype{
+		"int": {name: "int", baseType: "INT", size: "0"},
+		"i8":  {name: "i8", baseType: "INT", size: "8"},
+		"i64": {name: "i64", baseType: "INT", size: "64"},
+		"i32": {name: "i32", baseType: "INT", size: "32"},
+		"u8":  {name: "u8", baseType: "UINT", size: "8"},
+		"f32": {name: "f32", baseType: "SIGNLE", size: "32"},
+		"f64": {name: "f64", baseType: "DOUBLE", size: "64"},
+		"var": {name: "var", baseType: "VARIABLE", size: "0"},
+	}
+
+	widthsMap = map[string]*width{
+		"q":         {xtype: "i64", sizes: [3]string{"8", "8", "8"}},
+		"z":         {xtype: "int", sizes: [3]string{"2", "4", "4"}},
+		"b":         {xtype: "u8", sizes: [3]string{"1", "1", "1"}},
+		"d":         {xtype: "i32", sizes: [3]string{"4", "4", "4"}},
+		"ps":        {xtype: "f32", sizes: [3]string{"16", "16", "16"}},
+		"dq":        {xtype: "i32", sizes: [3]string{"16", "16", "16"}},
+		"i32":       {xtype: "i32", sizes: [3]string{"4", "4", "4"}},
+		"i64":       {xtype: "i64", sizes: [3]string{"8", "8", "8"}},
+		"vv":        {xtype: "var", sizes: [3]string{"0", "0", "0"}},
+		"mskw":      {xtype: "i1", sizes: [3]string{"64bits", "64bits", "64bits"}},
+		"zf32":      {xtype: "f32", sizes: [3]string{"512bits", "512bits", "512bits"}},
+		"zf64":      {xtype: "f64", sizes: [3]string{"512bits", "512bits", "512bits"}},
+		"mem80real": {xtype: "f80", sizes: [3]string{"10", "10", "10"}},
+		"mfpxenv":   {xtype: "struct", sizes: [3]string{"512", "512", "512"}},
+	}
+)
+
+// newStatesSource returns a reader that mocks "all-state.txt" file.
+// Input content is generated based on statesMap.
+func newStatesSource() io.Reader {
+	var buf bytes.Buffer
+	i := 0
+	for k, v := range statesMap {
+		buf.WriteString("# Line comment\n")
+		buf.WriteString("#\n\n\n")
+		fmt.Fprintf(&buf, "\t%-20s%s", k, v)
+		if i%3 == 0 {
+			buf.WriteString("\t# Trailing comment")
+		}
+		buf.WriteByte('\n')
+		i++
+	}
+
+	return &buf
+}
+
+// newWidthsSource returns a reader that mocks "all-widths.txt" file.
+// Input content is generated based on widthsMap.
+func newWidthsSource() io.Reader {
+	var buf bytes.Buffer
+	i := 0
+	for name, width := range widthsMap {
+		buf.WriteString("# Line comment\n")
+		buf.WriteString("#\n\n\n")
+		eqSizes := width.sizes[0] == width.sizes[1] &&
+			width.sizes[0] == width.sizes[2]
+		if i%2 == 0 && eqSizes {
+			fmt.Fprintf(&buf, "\t%-16s%-12s%-8s",
+				name, width.xtype, width.sizes[0])
+		} else {
+			fmt.Fprintf(&buf, "\t%-16s%-12s%-8s%-8s%-8s",
+				name, width.xtype,
+				width.sizes[0], width.sizes[1], width.sizes[2])
+		}
+		if i%3 == 0 {
+			buf.WriteString("\t# Trailing comment")
+		}
+		buf.WriteByte('\n')
+		i++
+	}
+
+	return &buf
+}
+
+// newXtypesSource returns a reader that mocks "all-element-types.txt" file.
+// Input content is generated based on xtypesMap.
+func newXtypesSource() io.Reader {
+	var buf bytes.Buffer
+	i := 0
+	for _, v := range xtypesMap {
+		buf.WriteString("# Line comment\n")
+		buf.WriteString("#\n\n\n")
+
+		fmt.Fprintf(&buf, "\t%s %s %s",
+			v.name, v.baseType, v.size)
+
+		if i%3 == 0 {
+			buf.WriteString("\t# Trailing comment")
+		}
+		buf.WriteByte('\n')
+		i++
+	}
+
+	return &buf
+}
+
+func newTestDatabase(t *testing.T) *Database {
+	var db Database
+	err := db.LoadStates(newStatesSource())
+	if err != nil {
+		t.Fatal(err)
+	}
+	err = db.LoadWidths(newWidthsSource())
+	if err != nil {
+		t.Fatal(err)
+	}
+	err = db.LoadXtypes(newXtypesSource())
+	if err != nil {
+		t.Fatal(err)
+	}
+	return &db
+}
+
+func TestContainsWord(t *testing.T) {
+	tests := []struct {
+		attrs    string
+		attrName string
+		output   bool
+	}{
+		{"ATT1", "ATT1", true},
+		{" ATT1", "ATT1", true},
+		{"ATT1 ", "ATT1", true},
+		{" ATT1 ", "ATT1", true},
+		{"ATT1 ATT2 ATT3", "ATT1", true},
+		{"ATT1 ATT2 ATT3", "ATT2", true},
+		{"ATT1 ATT2 ATT3", "ATT2", true},
+		{"ATT1 ATT2 ATT3", "ATT4", false},
+		{"ATT1ATT1", "ATT1", false},
+		{".ATT1", "ATT1", false},
+		{".ATT1.", "ATT1", false},
+		{"ATT1.", "ATT1", false},
+		{"", "ATT1", false},
+		{"AT", "ATT1", false},
+		{"ATT 1", "ATT1", false},
+		{" ATT1 ", "TT", false},
+		{" ATT1 ", "T1", false},
+		{" ATT1 ", "AT", false},
+	}
+
+	for _, test := range tests {
+		output := containsWord(test.attrs, test.attrName)
+		if output != test.output {
+			t.Errorf("containsWord(%q, %q)):\nhave: %v\nwant: %v",
+				test.attrs, test.attrName, output, test.output)
+		}
+	}
+}
+
+func TestParseWidths(t *testing.T) {
+	have, err := parseWidths(newWidthsSource())
+	if err != nil {
+		t.Fatal(err)
+	}
+	for k := range widthsMap {
+		if have[k] == nil {
+			t.Fatalf("missing key %s", k)
+		}
+		if *have[k] != *widthsMap[k] {
+			t.Fatalf("key %s:\nhave: %#v\nwant: %#v",
+				k, have[k], widthsMap[k])
+		}
+	}
+	if !reflect.DeepEqual(have, widthsMap) {
+		t.Errorf("widths output mismatch:\nhave: %#v\nwant: %#v",
+			have, widthsMap)
+	}
+}
+
+func TestParseStates(t *testing.T) {
+	have, err := parseStates(newStatesSource())
+	if err != nil {
+		t.Fatal(err)
+	}
+	want := statesMap
+	if !reflect.DeepEqual(have, want) {
+		t.Errorf("states output mismatch:\nhave: %v\nwant: %v", have, want)
+	}
+}
+
+func TestParseXtypes(t *testing.T) {
+	have, err := parseXtypes(newXtypesSource())
+	if err != nil {
+		t.Fatal(err)
+	}
+	for k := range xtypesMap {
+		if have[k] == nil {
+			t.Fatalf("missing key %s", k)
+		}
+		if *have[k] != *xtypesMap[k] {
+			t.Fatalf("key %s:\nhave: %#v\nwant: %#v",
+				k, have[k], xtypesMap[k])
+		}
+	}
+	if !reflect.DeepEqual(have, xtypesMap) {
+		t.Fatalf("xtype maps are not equal")
+	}
+}
+
+func TestNewOperand(t *testing.T) {
+	tests := []struct {
+		input string
+		op    Operand
+	}{
+		// Simple cases.
+		{
+			"REG0=XMM_R():r",
+			Operand{Name: "REG0=XMM_R()", Action: "r"},
+		},
+		{
+			"REG0=XMM_R:w",
+			Operand{Name: "REG0=XMM_R", Action: "w"},
+		},
+		{
+			"MEM0:rw:q",
+			Operand{Name: "MEM0", Action: "rw", Width: "q"},
+		},
+		{
+			"REG0=XMM_R():rcw:ps:f32",
+			Operand{Name: "REG0=XMM_R()", Action: "rcw", Width: "ps", Xtype: "f32"},
+		},
+		{
+			"IMM0:r:z",
+			Operand{Name: "IMM0", Action: "r", Width: "z"},
+		},
+		{
+			"IMM1:cw:b:i8",
+			Operand{Name: "IMM1", Action: "cw", Width: "b", Xtype: "i8"},
+		},
+
+		// Optional fields and visibility.
+		{
+			"REG2:r:EXPL",
+			Operand{Name: "REG2", Action: "r", Visibility: VisExplicit},
+		},
+		{
+			"MEM1:w:d:IMPL",
+			Operand{Name: "MEM1", Action: "w", Width: "d", Visibility: VisImplicit},
+		},
+		{
+			"MEM1:w:IMPL:d",
+			Operand{Name: "MEM1", Action: "w", Width: "d", Visibility: VisImplicit},
+		},
+		{
+			"MEM1:w:d:SUPP:i32",
+			Operand{Name: "MEM1", Action: "w", Width: "d", Visibility: VisSuppressed, Xtype: "i32"},
+		},
+		{
+			"MEM1:w:SUPP:d:i32",
+			Operand{Name: "MEM1", Action: "w", Width: "d", Visibility: VisSuppressed, Xtype: "i32"},
+		},
+
+		// Ambiguity: xtypes that look like widths.
+		{
+			"REG0=XMM_R():w:dq:i64",
+			Operand{Name: "REG0=XMM_R()", Action: "w", Width: "dq", Xtype: "i64"},
+		},
+
+		// TXT=X field.
+		{
+			"REG1=MASK1():r:mskw:TXT=ZEROSTR",
+			Operand{Name: "REG1=MASK1()", Action: "r", Width: "mskw",
+				Attributes: map[string]bool{"TXT=ZEROSTR": true}},
+		},
+		{
+			"MEM0:r:vv:f64:TXT=BCASTSTR",
+			Operand{Name: "MEM0", Action: "r", Width: "vv", Xtype: "f64",
+				Attributes: map[string]bool{"TXT=BCASTSTR": true}},
+		},
+		{
+			"REG0=ZMM_R3():w:zf32:TXT=SAESTR",
+			Operand{Name: "REG0=ZMM_R3()", Action: "w", Width: "zf32",
+				Attributes: map[string]bool{"TXT=SAESTR": true}},
+		},
+		{
+			"REG0=ZMM_R3():w:zf64:TXT=ROUNDC",
+			Operand{Name: "REG0=ZMM_R3()", Action: "w", Width: "zf64",
+				Attributes: map[string]bool{"TXT=ROUNDC": true}},
+		},
+
+		// Multi-source.
+		{
+			"REG2=ZMM_N3():r:zf32:MULTISOURCE4",
+			Operand{Name: "REG2=ZMM_N3()", Action: "r", Width: "zf32",
+				Attributes: map[string]bool{"MULTISOURCE4": true}},
+		},
+
+		// Multi-source + EVEX.b context.
+		{
+			"REG2=ZMM_N3():r:zf32:MULTISOURCE4:TXT=SAESTR",
+			Operand{Name: "REG2=ZMM_N3()", Action: "r", Width: "zf32",
+				Attributes: map[string]bool{"MULTISOURCE4": true, "TXT=SAESTR": true}},
+		},
+	}
+
+	db := newTestDatabase(t)
+	for _, test := range tests {
+		op, err := NewOperand(db, strings.Split(test.input, ":"))
+		if err != nil {
+			t.Fatal(err)
+		}
+		if !reflect.DeepEqual(op, test.op) {
+			t.Errorf("parse(`%s`): output mismatch\nhave: %#v\nwant: %#v",
+				test.input, op, test.op,
+			)
+		}
+	}
+}
+
+func TestReader(t *testing.T) {
+	type test struct {
+		name   string
+		input  string
+		output string
+	}
+
+	var tests []test
+	{
+		b, err := ioutil.ReadFile(path.Join("testdata", "xed_objects.txt"))
+		if err != nil {
+			t.Fatal(err)
+		}
+		cases := strings.Split(string(b), "------")[1:]
+		for _, c := range cases {
+			name := c[:strings.Index(c, "\n")]
+			parts := strings.Split(c[len(name):], "====")
+
+			tests = append(tests, test{
+				name:   strings.TrimSpace(name),
+				input:  strings.TrimSpace(parts[0]),
+				output: strings.TrimSpace(parts[1]),
+			})
+		}
+	}
+
+	for _, test := range tests {
+		r := NewReader(strings.NewReader(test.input))
+		objects, err := r.ReadAll()
+		if strings.Contains(test.name, "INVALID") {
+			if err == nil {
+				t.Errorf("%s: expected non-nil error", test.name)
+				continue
+			}
+			if err.Error() != test.output {
+				t.Errorf("%s: error mismatch\nhave: `%s`\nwant: `%s`\n",
+					test.name, err.Error(), test.output)
+			}
+			t.Logf("PASS: %s", test.name)
+			continue
+		}
+		if err != nil {
+			t.Fatal(err)
+		}
+
+		var have []map[string]string
+		for _, o := range objects {
+			for _, inst := range o.Insts {
+				var result map[string]string
+				err := json.Unmarshal([]byte(inst.String()), &result)
+				if err != nil {
+					t.Fatal(err)
+				}
+				have = append(have, result)
+			}
+		}
+		var want []map[string]string
+		err = json.Unmarshal([]byte(test.output), &want)
+		if err != nil {
+			t.Fatal(err)
+		}
+		for i := range want {
+			for k := range want[i] {
+				if want[i][k] == have[i][k] {
+					continue
+				}
+				// i - index inside array of JSON objects.
+				// k - i'th object key (example: "Iclass").
+				t.Errorf("%s: insts[%d].%s mismatch\nhave: `%s`\nwant: `%s`",
+					test.name, i, k, have[i][k], want[i][k])
+			}
+		}
+		if !t.Failed() {
+			t.Logf("PASS: %s", test.name)
+		}
+	}
+}
+
+func TestMacroExpand(t *testing.T) {
+	tests := [...]struct {
+		input  string
+		output string
+	}{
+		0: {
+			"a not64 b c",
+			"a MODE!=2 b c",
+		},
+		1: {
+			"mode16 W0",
+			"MODE=0 REXW=0 SKIP_OSZ=1",
+		},
+		2: {
+			"W1 mode32",
+			"REXW=1 SKIP_OSZ=1 MODE=1",
+		},
+		3: {
+			"W1 W1",
+			"REXW=1 SKIP_OSZ=1 REXW=1 SKIP_OSZ=1",
+		},
+		4: {
+			"W1W1",
+			"W1W1",
+		},
+		5: {
+			"mode64 1 2 3 rexw_prefix",
+			"MODE=2 1 2 3 REXW=1 SKIP_OSZ=1",
+		},
+		6: {
+			"a  b  c",
+			"a b c",
+		},
+		7: {
+			"mode16 mode32 mode16 mode16",
+			"MODE=0 MODE=1 MODE=0 MODE=0",
+		},
+		8: {
+			"V0F38 V0FV0F V0FV0F38",
+			"MAP=2 V0FV0F V0FV0F38",
+		},
+		9: {
+			"VV1 0x2E V66 V0F38 VL128  norexw_prefix MOD[mm] MOD!=3 REG[rrr] RM[nnn] MODRM()",
+			"VEXVALID=1 0x2E VEX_PREFIX=1 MAP=2 VL=0 REXW=0 SKIP_OSZ=1 MOD[mm] MOD!=3 REG[rrr] RM[nnn] MODRM()",
+		},
+	}
+
+	db := newTestDatabase(t)
+	for id, test := range tests {
+		have := ExpandStates(db, test.input)
+		if test.output != have {
+			t.Errorf("test %d: output mismatch:\nhave: `%s`\nwant: `%s`",
+				id, have, test.output)
+		}
+	}
+}
commit	afdde57c62b3d0a4931f5e6f8091f0336eaba828	[log] [tgz]
author	isharipo <iskander.sharipov@intel.com>	Wed Jan 17 13:39:34 2018 +0300
committer	Ilya Tocar <ilya.tocar@intel.com>	Tue Apr 03 14:27:31 2018 +0000
tree	535a9fe2b862c171c939914fd6cff450a240ab44
parent	a91cccae75b8ebb5c8303204e33e61ea84c7aea3 [diff]