x86: add x86csv package

x86csv package provides simple CSV reader
that serializes rows into spec row objects.

This way, it is more convenient to build tools
that rely on x86.csv and it is easier to
control x86.csv version changes over time
(at least there will be no dependency on columns order, or their count).

Row type fields documentation is taken from x86spec/spec.go
package comment.

It is implied that Row represents layout of the most
recent x86.csv version.

x86/x86avxgen depends on this package.

Change-Id: Icde4f6c1e80aeb69d34bd4e38fbfd3bbd7165c62
Reviewed-on: https://go-review.googlesource.com/66971
Run-TryBot: Iskander Sharipov <iskander.sharipov@intel.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Russ Cox <rsc@golang.org>
diff --git a/x86/x86csv/reader.go b/x86/x86csv/reader.go
new file mode 100644
index 0000000..9d4d9df
--- /dev/null
+++ b/x86/x86csv/reader.go
@@ -0,0 +1,69 @@
+// Copyright 2017 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package x86csv
+
+import (
+	"encoding/csv"
+	"io"
+)
+
+// A Reader reads entries from an "x86.csv" file.
+type Reader struct {
+	csv *csv.Reader
+}
+
+// NewReader returns a Reader reading from r, which should
+// be of the content of the "x86.csv" (format version=0.2).
+func NewReader(r io.Reader) *Reader {
+	rcsv := csv.NewReader(r)
+	rcsv.Comment = '#'
+	return &Reader{csv: rcsv}
+}
+
+// ReadAll reads all remaining rows from r.
+//
+// If error is occured, still returns all rows
+// that have been read during method execution.
+//
+// A successful call returns err == nil, not err == io.EOF.
+// Because ReadAll is defined to read until EOF,
+// it does not treat end of file as an error to be reported.
+func (r *Reader) ReadAll() ([]*Inst, error) {
+	var err error
+	var insts []*Inst
+	for inst, err := r.Read(); err == nil; inst, err = r.Read() {
+		insts = append(insts, inst)
+	}
+	if err == io.EOF {
+		return insts, nil
+	}
+	return insts, err
+}
+
+// Read reads and returns the next Row from the "x86.csv" file.
+// If there is no data left to be read, Read returns {nil, io.EOF}.
+func (r *Reader) Read() (*Inst, error) {
+	cols, err := r.csv.Read()
+	if err != nil {
+		return nil, err
+	}
+
+	// This should be the only place where indexes
+	// are used. Everything else should rely on Row records.
+	inst := &Inst{
+		Intel:     cols[0],
+		Go:        cols[1],
+		GNU:       cols[2],
+		Encoding:  cols[3],
+		Mode32:    cols[4],
+		Mode64:    cols[5],
+		CPUID:     cols[6],
+		Tags:      cols[7],
+		Action:    cols[8],
+		Multisize: cols[9],
+		Size:      cols[10],
+	}
+	return inst, nil
+}
diff --git a/x86/x86csv/x86csv.go b/x86/x86csv/x86csv.go
new file mode 100644
index 0000000..14d5e5c
--- /dev/null
+++ b/x86/x86csv/x86csv.go
@@ -0,0 +1,98 @@
+// Copyright 2017 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Package x86csv provides means to work with "x86.csv".
+// Only latest version of "x86.csv" format is supported.
+//
+// Terminology:
+//   given "OPCODE [ARGS...]" line;
+// Opcode - instruction name/mnemonic/class.
+// Args   - instruction operands.
+// Syntax - Opcode with Args.
+package x86csv
+
+import (
+	"strings"
+)
+
+// An Inst describes single x86 instruction encoding form.
+type Inst struct {
+	// Intel syntax (example: "SHR r/m32, imm8").
+	Intel string
+
+	// Go assembler syntax (example: "SHRL imm8, r/m32").
+	Go string
+
+	// GNU binutils syntax (example: "shrl imm8, r/m32").
+	GNU string
+
+	// Binary encoding (example: "C1 /4 ib").
+	Encoding string
+
+	// Validity in 32bit mode ("V", "I" or "N.E.").
+	Mode32 string
+
+	// Validity in 64bit mode ("V", "I", "N.E.", "N.P.", "N.I." or "N.S.").
+	Mode64 string
+
+	// CPUID feature flags required (comma-separated).
+	CPUID string
+
+	// Hints about instruction (comma-separated).
+	// See "x86spec" package to see detailed overview of possible
+	// tags and their meaning.
+	Tags string
+
+	// Read/write action of the instruction on its arguments, in Intel order.
+	// For example, "rw,r" denotes that "SHR r/m32, imm8" reads and writes
+	// its first argument but only reads its second argument.
+	Action string
+
+	// Whether Intel syntax has encoding forms distinguished only by
+	// operand size, like most arithmetic instructions ("" or "Y").
+	Multisize string
+
+	// Size of the data operation in bits ("8" for MOVB, "16" for MOVW, and so on)
+	Size string
+}
+
+// IntelOpcode returns the opcode in the Intel syntax.
+func (inst *Inst) IntelOpcode() string { return instOpcode(inst.Intel) }
+
+// GoOpcode returns the opcode in Go (Plan9) syntax.
+func (inst *Inst) GoOpcode() string { return instOpcode(inst.Go) }
+
+// GNUOpcode returns the opcode in GNU binutils (mostly AT&T) syntax.
+func (inst *Inst) GNUOpcode() string { return instOpcode(inst.GNU) }
+
+// IntelArgs returns the arguments in the Intel syntax.
+func (inst *Inst) IntelArgs() []string { return instArgs(inst.Intel) }
+
+// GoArgs returns the arguments in Go (Plan9) syntax.
+func (inst *Inst) GoArgs() []string { return instArgs(inst.Go) }
+
+// GNUArgs returns the arguments in GNU binutils (mostly AT&T) syntax.
+func (inst *Inst) GNUArgs() []string { return instArgs(inst.GNU) }
+
+// instOpcode returns the opcode from an instruction syntax.
+func instOpcode(syntax string) string {
+	i := strings.Index(syntax, " ")
+	if i == -1 {
+		return syntax
+	}
+	return syntax[:i]
+}
+
+// instArgs returns the arguments from an instruction syntax.
+func instArgs(syntax string) []string {
+	i := strings.Index(syntax, " ")
+	if i < 0 {
+		return nil
+	}
+	args := strings.Split(syntax[i+1:], ",")
+	for i := range args {
+		args[i] = strings.TrimSpace(args[i])
+	}
+	return args
+}
diff --git a/x86/x86csv/x86csv_test.go b/x86/x86csv/x86csv_test.go
new file mode 100644
index 0000000..2793f13
--- /dev/null
+++ b/x86/x86csv/x86csv_test.go
@@ -0,0 +1,108 @@
+// Copyright 2017 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package x86csv
+
+import (
+	"reflect"
+	"strings"
+	"testing"
+)
+
+// This test makes it harder to break Reader unintentionally.
+//
+// Deeper testing is probably not required because 99% of the job is
+// done by csv.Reader.
+func TestReader(t *testing.T) {
+	input := `# x86.csv v0.2
+"ADDSUBPD xmm1, xmm2/m128","ADDSUBPD xmm2/m128, xmm1","addsubpd xmm2/m128, xmm1","66 0F D0 /r","V","V","SSE3","","rw,r","",""
+"VPEXTRQ r/m64, xmm1, imm8","VPEXTRQ imm8, xmm1, r/m64","vpextrq imm8, xmm1, r/m64","VEX.128.66.0F3A.W1 16 /r ib","I","V","AVX","","w,r,r","",""
+"XOR r8, r/m8","XORB r/m8, r8","xorb r/m8, r8","REX 32 /r","N.E.","V","","pseudo64","rw,r","Y","8"
+`
+	want := []Inst{
+		{
+			Intel:    "ADDSUBPD xmm1, xmm2/m128",
+			Go:       "ADDSUBPD xmm2/m128, xmm1",
+			GNU:      "addsubpd xmm2/m128, xmm1",
+			Encoding: "66 0F D0 /r",
+			Mode32:   "V",
+			Mode64:   "V",
+			CPUID:    "SSE3",
+			Action:   "rw,r",
+		},
+		{
+			Intel:    "VPEXTRQ r/m64, xmm1, imm8",
+			Go:       "VPEXTRQ imm8, xmm1, r/m64",
+			GNU:      "vpextrq imm8, xmm1, r/m64",
+			Encoding: "VEX.128.66.0F3A.W1 16 /r ib",
+			Mode32:   "I",
+			Mode64:   "V",
+			CPUID:    "AVX",
+			Action:   "w,r,r",
+		},
+		{
+			Intel:     "XOR r8, r/m8",
+			Go:        "XORB r/m8, r8",
+			GNU:       "xorb r/m8, r8",
+			Encoding:  "REX 32 /r",
+			Mode32:    "N.E.",
+			Mode64:    "V",
+			Tags:      "pseudo64",
+			Action:    "rw,r",
+			Multisize: "Y",
+			Size:      "8",
+		},
+	}
+
+	r := NewReader(strings.NewReader(input))
+	inst, err := r.Read()
+	if err != nil {
+		t.Fatalf("Read(): %v", err)
+	}
+	restInsts, err := r.ReadAll()
+	if err != nil {
+		t.Fatalf("ReadAll(): %v", err)
+	}
+	if remainder, err := r.ReadAll(); remainder != nil || err != nil {
+		t.Errorf("ReadAll() on exhausted r failed")
+	}
+	have := append([]*Inst{inst}, restInsts...)
+
+	if len(want) != len(have) {
+		t.Fatalf("len(have) is %d, want %d\n", len(have), len(want))
+	}
+	lines := strings.Split(input, "\n")
+	lines = lines[1:] // Drop comment line
+	for i := range want {
+		if want[i] != *have[i] {
+			t.Errorf("%s:\nhave: %v\nwant: %v", lines[i], have[i], want[i])
+		}
+	}
+}
+
+func TestSyntaxSplit(t *testing.T) {
+	tests := []struct {
+		syntax string
+		opcode string
+		args   []string
+	}{
+		{"RET", "RET", nil},
+		{"CALLW* r/m16", "CALLW*", []string{"r/m16"}},
+		{"JMP_FAR m16:16", "JMP_FAR", []string{"m16:16"}},
+		{"movl CR0-CR7, rmr32", "movl", []string{"CR0-CR7", "rmr32"}},
+		{"VFMSUBADD132PD xmm1, xmmV, xmm2/m128", "VFMSUBADD132PD", []string{"xmm1", "xmmV", "xmm2/m128"}},
+	}
+
+	for _, tt := range tests {
+		op, args := instOpcode(tt.syntax), instArgs(tt.syntax)
+		if op != tt.opcode {
+			t.Errorf("%s: opcode mismatch (have `%s`, want `%s`)",
+				tt.syntax, op, tt.opcode)
+		}
+		if !reflect.DeepEqual(args, tt.args) {
+			t.Errorf("%s: args mismatch (have %v, want %s)",
+				tt.syntax, args, tt.args)
+		}
+	}
+}