x86/xeddata/reader.go - arch - Git at Google

 // Copyright 2018 The Go Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.

 package xeddata

 import (
 	"bufio"
 	"errors"
 	"fmt"
 	"io"
 	"regexp"
 	"strings"
 )

 // Reader reads enc/dec-instruction objects from XED datafile.
 type Reader struct {
 	scanner *bufio.Scanner

 	lines []string // Re-used between Read calls

 	// True if last line ends with newline escape (backslash).
 	joinLines bool
 }

 // NewReader returns a new Reader that reads from r.
 func NewReader(r io.Reader) *Reader {
 	return newReader(bufio.NewScanner(r))
 }

 func newReader(scanner *bufio.Scanner) *Reader {
 	r := &Reader{
 		lines:   make([]string, 0, 64),
 		scanner: scanner,
 	}
 	scanner.Split(r.split)
 	return r
 }

 // split implements bufio.SplitFunc for Reader.
 func (r *Reader) split(data []byte, atEOF bool) (int, []byte, error) {
 	// Wrapping bufio.ScanLines to handle \-style newline escapes.
 	// joinLines flag affects Reader.scanLine behavior.
 	advance, tok, err := bufio.ScanLines(data, atEOF)
 	if err == nil && len(tok) >= 1 {
 		r.joinLines = tok[len(tok)-1] == '\\'
 	}
 	return advance, tok, err
 }

 // Read reads single XED instruction object from
 // the stream backed by reader.
 //
 // If there is no data left to be read,
 // returned error is io.EOF.
 func (r *Reader) Read() (*Object, error) {
 	for line := r.scanLine(); line != ""; line = r.scanLine() {
 		if line[0] != '{' {
 			continue
 		}
 		lines := r.lines[:0] // Object lines
 		for line := r.scanLine(); line != ""; line = r.scanLine() {
 			if line[0] == '}' {
 				return r.parseLines(lines)
 			}
 			lines = append(lines, line)
 		}
 		return nil, errors.New("no matching '}' found")
 	}

 	return nil, io.EOF
 }

 // ReadAll reads all the remaining objects from r.
 // A successful call returns err == nil, not err == io.EOF,
 // just like csv.Reader.ReadAll().
 func (r *Reader) ReadAll() ([]*Object, error) {
 	objects := []*Object{}
 	for {
 		o, err := r.Read()
 		if err == io.EOF {
 			return objects, nil
 		}
 		if err != nil {
 			return objects, err
 		}
 		objects = append(objects, o)
 	}
 }

 // instLineRE matches valid XED object/inst line.
 // It expects lines that are joined by '\' to be concatenated.
 //
 // The format can be described as:
 //
 //	unquoted field name "[A-Z_]+" (captured)
 //	field value delimiter ":"
 //	field value string (captured)
 //	optional trailing comment that is ignored "[^#]*"
 var instLineRE = regexp.MustCompile(`^([A-Z_]+)\s*:\s*([^#]*)`)

 // parseLines turns collected object lines into Object.
 func (r *Reader) parseLines(lines []string) (*Object, error) {
 	o := &Object{}

 	// Repeatable tokens.
 	// We can not assign them eagerly, because these fields
 	// are not guaranteed to follow strict order.
 	var (
 		operands []string
 		iforms   []string
 		patterns []string
 	)

 	for _, l := range lines {
 		l = strings.TrimLeft(l, " ")
 		if l[0] == '#' { // Skip comment lines.
 			continue
 		}
 		m := instLineRE.FindStringSubmatch(l)
 		if len(m) == 0 {
 			return nil, fmt.Errorf("malformed line: %s", l)
 		}
 		key, val := m[1], m[2]
 		val = strings.TrimSpace(val)

 		switch key {
 		case "ICLASS":
 			o.Iclass = val
 		case "DISASM":
 			o.Disasm = val
 		case "DISASM_INTEL":
 			o.DisasmIntel = val
 		case "DISASM_ATTSV":
 			o.DisasmATTSV = val
 		case "ATTRIBUTES":
 			o.Attributes = val
 		case "UNAME":
 			o.Uname = val
 		case "CPL":
 			o.CPL = val
 		case "CATEGORY":
 			o.Category = val
 		case "EXTENSION":
 			o.Extension = val
 		case "EXCEPTIONS":
 			o.Exceptions = val
 		case "ISA_SET":
 			o.ISASet = val
 		case "FLAGS":
 			o.Flags = val
 		case "COMMENT":
 			o.Comment = val
 		case "VERSION":
 			o.Version = val
 		case "REAL_OPCODE":
 			o.RealOpcode = val

 		case "OPERANDS":
 			operands = append(operands, val)
 		case "PATTERN":
 			patterns = append(patterns, val)
 		case "IFORM":
 			iforms = append(iforms, val)

 		default:
 			// Being strict about unknown field names gives a nice
 			// XED file validation diagnostics.
 			// Also defends against typos in test files.
 			return nil, fmt.Errorf("unknown key token: %s", key)
 		}
 	}

 	if len(operands) != len(patterns) {
 		return nil, fmt.Errorf("%s: OPERANDS and PATTERN lines mismatch", o.Opcode())
 	}

 	insts := make([]*Inst, len(operands))
 	for i := range operands {
 		insts[i] = &Inst{
 			Object:   o,
 			Index:    i,
 			Pattern:  patterns[i],
 			Operands: operands[i],
 		}
 		// There can be less IFORMs than insts.
 		if i < len(iforms) {
 			insts[i].Iform = iforms[i]
 		}
 	}
 	o.Insts = insts

 	return o, nil
 }

 // scanLine tries to fetch non-empty line from scanner.
 //
 // Returns empty line when scanner.Scan() returns false
 // before non-empty line is found.
 func (r *Reader) scanLine() string {
 	for r.scanner.Scan() {
 		line := r.scanner.Text()
 		if line == "" {
 			continue
 		}
 		if r.joinLines {
 			return line[:len(line)-len("\\")] + r.scanLine()
 		}
 		return line
 	}
 	return ""
 }
	// Copyright 2018 The Go Authors. All rights reserved.
	// Use of this source code is governed by a BSD-style
	// license that can be found in the LICENSE file.

	package xeddata

	import (
	"bufio"
	"errors"
	"fmt"
	"io"
	"regexp"
	"strings"
	)

	// Reader reads enc/dec-instruction objects from XED datafile.
	type Reader struct {
	scanner *bufio.Scanner

	lines []string // Re-used between Read calls

	// True if last line ends with newline escape (backslash).
	joinLines bool
	}

	// NewReader returns a new Reader that reads from r.
	func NewReader(r io.Reader) *Reader {
	return newReader(bufio.NewScanner(r))
	}

	func newReader(scanner bufio.Scanner) Reader {
	r := &Reader{
	lines: make([]string, 0, 64),
	scanner: scanner,
	}
	scanner.Split(r.split)
	return r
	}

	// split implements bufio.SplitFunc for Reader.
	func (r *Reader) split(data []byte, atEOF bool) (int, []byte, error) {
	// Wrapping bufio.ScanLines to handle \-style newline escapes.
	// joinLines flag affects Reader.scanLine behavior.
	advance, tok, err := bufio.ScanLines(data, atEOF)
	if err == nil && len(tok) >= 1 {
	r.joinLines = tok[len(tok)-1] == '\\'
	}
	return advance, tok, err
	}

	// Read reads single XED instruction object from
	// the stream backed by reader.
	//
	// If there is no data left to be read,
	// returned error is io.EOF.
	func (r Reader) Read() (Object, error) {
	for line := r.scanLine(); line != ""; line = r.scanLine() {
	if line[0] != '{' {
	continue
	}
	lines := r.lines[:0] // Object lines
	for line := r.scanLine(); line != ""; line = r.scanLine() {
	if line[0] == '}' {
	return r.parseLines(lines)
	}
	lines = append(lines, line)
	}
	return nil, errors.New("no matching '}' found")
	}

	return nil, io.EOF
	}

	// ReadAll reads all the remaining objects from r.
	// A successful call returns err == nil, not err == io.EOF,
	// just like csv.Reader.ReadAll().
	func (r Reader) ReadAll() ([]Object, error) {
	objects := []*Object{}
	for {
	o, err := r.Read()
	if err == io.EOF {
	return objects, nil
	}
	if err != nil {
	return objects, err
	}
	objects = append(objects, o)
	}
	}

	// instLineRE matches valid XED object/inst line.
	// It expects lines that are joined by '\' to be concatenated.
	//
	// The format can be described as:
	//
	// unquoted field name "[A-Z_]+" (captured)
	// field value delimiter ":"
	// field value string (captured)
	// optional trailing comment that is ignored "[^#]*"
	var instLineRE = regexp.MustCompile(`^([A-Z_]+)\s:\s([^#]*)`)

	// parseLines turns collected object lines into Object.
	func (r Reader) parseLines(lines []string) (Object, error) {
	o := &Object{}

	// Repeatable tokens.
	// We can not assign them eagerly, because these fields
	// are not guaranteed to follow strict order.
	var (
	operands []string
	iforms []string
	patterns []string
	)

	for _, l := range lines {
	l = strings.TrimLeft(l, " ")
	if l[0] == '#' { // Skip comment lines.
	continue
	}
	m := instLineRE.FindStringSubmatch(l)
	if len(m) == 0 {
	return nil, fmt.Errorf("malformed line: %s", l)
	}
	key, val := m[1], m[2]
	val = strings.TrimSpace(val)

	switch key {
	case "ICLASS":
	o.Iclass = val
	case "DISASM":
	o.Disasm = val
	case "DISASM_INTEL":
	o.DisasmIntel = val
	case "DISASM_ATTSV":
	o.DisasmATTSV = val
	case "ATTRIBUTES":
	o.Attributes = val
	case "UNAME":
	o.Uname = val
	case "CPL":
	o.CPL = val
	case "CATEGORY":
	o.Category = val
	case "EXTENSION":
	o.Extension = val
	case "EXCEPTIONS":
	o.Exceptions = val
	case "ISA_SET":
	o.ISASet = val
	case "FLAGS":
	o.Flags = val
	case "COMMENT":
	o.Comment = val
	case "VERSION":
	o.Version = val
	case "REAL_OPCODE":
	o.RealOpcode = val

	case "OPERANDS":
	operands = append(operands, val)
	case "PATTERN":
	patterns = append(patterns, val)
	case "IFORM":
	iforms = append(iforms, val)

	default:
	// Being strict about unknown field names gives a nice
	// XED file validation diagnostics.
	// Also defends against typos in test files.
	return nil, fmt.Errorf("unknown key token: %s", key)
	}
	}

	if len(operands) != len(patterns) {
	return nil, fmt.Errorf("%s: OPERANDS and PATTERN lines mismatch", o.Opcode())
	}

	insts := make([]*Inst, len(operands))
	for i := range operands {
	insts[i] = &Inst{
	Object: o,
	Index: i,
	Pattern: patterns[i],
	Operands: operands[i],
	}
	// There can be less IFORMs than insts.
	if i < len(iforms) {
	insts[i].Iform = iforms[i]
	}
	}
	o.Insts = insts

	return o, nil
	}

	// scanLine tries to fetch non-empty line from scanner.
	//
	// Returns empty line when scanner.Scan() returns false
	// before non-empty line is found.
	func (r *Reader) scanLine() string {
	for r.scanner.Scan() {
	line := r.scanner.Text()
	if line == "" {
	continue
	}
	if r.joinLines {
	return line[:len(line)-len("\\")] + r.scanLine()
	}
	return line
	}
	return ""
	}