blob: be616ec0bcc576840bcf97c1bb6594d5deec797b [file] [log] [blame]
// Copyright 2015 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Package asm implements the parser and instruction generator for the assembler.
// TODO: Split apart?
package asm
import (
"fmt"
"log"
"os"
"strconv"
"text/scanner"
"cmd/asm/internal/addr"
"cmd/asm/internal/arch"
"cmd/asm/internal/lex"
"cmd/internal/obj"
)
type Parser struct {
lex lex.TokenReader
lineNum int // Line number in source file.
histLineNum int // Cumulative line number across source files.
errorLine int // (Cumulative) line number of last error.
errorCount int // Number of errors.
pc int64 // virtual PC; count of Progs; doesn't advance for GLOBL or DATA.
input []lex.Token
inputPos int
pendingLabels []string // Labels to attach to next instruction.
labels map[string]*obj.Prog
toPatch []Patch
addr []addr.Addr
arch *arch.Arch
linkCtxt *obj.Link
firstProg *obj.Prog
lastProg *obj.Prog
dataAddr map[string]int64 // Most recent address for DATA for this symbol.
}
type Patch struct {
prog *obj.Prog
label string
}
func NewParser(ctxt *obj.Link, ar *arch.Arch, lexer lex.TokenReader) *Parser {
return &Parser{
linkCtxt: ctxt,
arch: ar,
lex: lexer,
labels: make(map[string]*obj.Prog),
dataAddr: make(map[string]int64),
}
}
func (p *Parser) errorf(format string, args ...interface{}) {
if p.histLineNum == p.errorLine {
// Only one error per line.
return
}
p.errorLine = p.histLineNum
// Put file and line information on head of message.
format = "%s:%d: " + format + "\n"
args = append([]interface{}{p.lex.File(), p.lineNum}, args...)
fmt.Fprintf(os.Stderr, format, args...)
p.errorCount++
if p.errorCount > 10 {
log.Fatal("too many errors")
}
}
func (p *Parser) Parse() (*obj.Prog, bool) {
for p.line() {
}
if p.errorCount > 0 {
return nil, false
}
p.patch()
return p.firstProg, true
}
// WORD [ arg {, arg} ] '\n'
func (p *Parser) line() bool {
// Skip newlines.
var tok lex.ScanToken
for {
tok = p.lex.Next()
// We save the line number here so error messages from this instruction
// are labeled with this line. Otherwise we complain after we've absorbed
// the terminating newline and the line numbers are off by one in errors.
p.lineNum = p.lex.Line()
p.histLineNum = lex.HistLine()
switch tok {
case '\n':
continue
case scanner.EOF:
return false
}
break
}
// First item must be an identifier.
if tok != scanner.Ident {
p.errorf("expected identifier, found %q", p.lex.Text())
return false // Might as well stop now.
}
word := p.lex.Text()
operands := make([][]lex.Token, 0, 3)
// Zero or more comma-separated operands, one per loop.
for tok != '\n' && tok != ';' {
// Process one operand.
items := make([]lex.Token, 0, 3)
for {
tok = p.lex.Next()
if tok == ':' && len(operands) == 0 && len(items) == 0 { // First token.
p.pendingLabels = append(p.pendingLabels, word)
return true
}
if tok == scanner.EOF {
p.errorf("unexpected EOF")
return false
}
if tok == '\n' || tok == ';' || tok == ',' {
break
}
items = append(items, lex.Make(tok, p.lex.Text()))
}
if len(items) > 0 {
operands = append(operands, items)
} else if len(operands) > 0 || tok == ',' {
// Had a comma with nothing after.
p.errorf("missing operand")
}
}
i := p.arch.Pseudos[word]
if i != 0 {
p.pseudo(i, word, operands)
return true
}
i = p.arch.Instructions[word]
if i != 0 {
p.instruction(i, word, operands)
return true
}
p.errorf("unrecognized instruction %s", word)
return true
}
func (p *Parser) instruction(op int, word string, operands [][]lex.Token) {
p.addr = p.addr[0:0]
for _, op := range operands {
p.addr = append(p.addr, p.address(op))
}
// Is it a jump? TODO
if word[0] == 'J' || word == "CALL" {
p.asmJump(op, p.addr)
return
}
p.asmInstruction(op, p.addr)
}
func (p *Parser) pseudo(op int, word string, operands [][]lex.Token) {
switch op {
case p.arch.ATEXT:
p.asmText(word, operands)
case p.arch.ADATA:
p.asmData(word, operands)
case p.arch.AGLOBL:
p.asmGlobl(word, operands)
case p.arch.APCDATA:
p.asmPCData(word, operands)
case p.arch.AFUNCDATA:
p.asmFuncData(word, operands)
default:
p.errorf("unimplemented: %s", word)
}
}
func (p *Parser) start(operand []lex.Token) {
p.input = operand
p.inputPos = 0
}
// address parses the operand into a link address structure.
func (p *Parser) address(operand []lex.Token) addr.Addr {
p.start(operand)
addr := addr.Addr{}
p.operand(&addr)
return addr
}
// parse (R). The opening paren is known to be there.
// The return value states whether it was a scaled mode.
func (p *Parser) parenRegister(a *addr.Addr) bool {
p.next()
tok := p.next()
if tok.ScanToken != scanner.Ident {
p.errorf("expected register, got %s", tok)
}
r, present := p.arch.Registers[tok.String()]
if !present {
p.errorf("expected register, found %s", tok.String())
}
a.IsIndirect = true
scaled := p.peek() == '*'
if scaled {
// (R*2)
p.next()
tok := p.get(scanner.Int)
a.Scale = p.scale(tok.String())
a.Index = int16(r) // TODO: r should have type int16 but is uint8.
} else {
if a.HasRegister {
p.errorf("multiple indirections")
}
a.HasRegister = true
a.Register = int16(r)
}
p.expect(')')
p.next()
return scaled
}
// scale converts a decimal string into a valid scale factor.
func (p *Parser) scale(s string) int8 {
switch s {
case "1", "2", "4", "8":
return int8(s[0] - '0')
}
p.errorf("bad scale: %s", s)
return 0
}
// parse (R) or (R)(R*scale). The opening paren is known to be there.
func (p *Parser) addressMode(a *addr.Addr) {
scaled := p.parenRegister(a)
if !scaled && p.peek() == '(' {
p.parenRegister(a)
}
}
// operand parses a general operand and stores the result in *a.
func (p *Parser) operand(a *addr.Addr) bool {
if len(p.input) == 0 {
p.errorf("empty operand: cannot happen")
return false
}
switch p.peek() {
case '$':
p.next()
switch p.peek() {
case scanner.Ident:
a.IsImmediateAddress = true
p.operand(a) // TODO
case scanner.String:
a.IsImmediateConstant = true
a.HasString = true
a.String = p.atos(p.next().String())
case scanner.Int, scanner.Float, '+', '-', '~', '(':
a.IsImmediateConstant = true
if p.have(scanner.Float) {
a.HasFloat = true
a.Float = p.floatExpr()
} else {
a.HasOffset = true
a.Offset = int64(p.expr())
}
default:
p.errorf("illegal %s in immediate operand", p.next().String())
}
case '*':
p.next()
tok := p.next()
r, present := p.arch.Registers[tok.String()]
if !present {
p.errorf("expected register; got %s", tok.String())
}
a.HasRegister = true
a.Register = int16(r)
case '(':
p.next()
if p.peek() == scanner.Ident {
p.back()
p.addressMode(a)
break
}
p.back()
fallthrough
case '+', '-', '~', scanner.Int, scanner.Float:
if p.have(scanner.Float) {
a.HasFloat = true
a.Float = p.floatExpr()
} else {
a.HasOffset = true
a.Offset = int64(p.expr())
}
if p.peek() != scanner.EOF {
p.expect('(')
p.addressMode(a)
}
case scanner.Ident:
tok := p.next()
// Either R or (most general) ident<>+4(SB)(R*scale).
if r, present := p.arch.Registers[tok.String()]; present {
a.HasRegister = true
a.Register = int16(r)
// Possibly register pair: DX:AX.
if p.peek() == ':' {
p.next()
tok = p.get(scanner.Ident)
a.HasRegister2 = true
a.Register2 = int16(p.arch.Registers[tok.String()])
}
break
}
// Weirdness with statics: Might now have "<>".
if p.peek() == '<' {
p.next()
p.get('>')
a.IsStatic = true
}
if p.peek() == '+' || p.peek() == '-' {
a.HasOffset = true
a.Offset = int64(p.expr())
}
a.Symbol = tok.String()
if p.peek() == scanner.EOF {
break
}
// Expect (SB) or (FP)
p.expect('(')
p.parenRegister(a)
if a.Register != arch.RSB && a.Register != arch.RFP && a.Register != arch.RSP {
p.errorf("expected SB, FP, or SP offset for %s", tok)
}
// Possibly have scaled register (CX*8).
if p.peek() != scanner.EOF {
p.expect('(')
p.addressMode(a)
}
default:
p.errorf("unexpected %s in operand", p.next())
}
p.expect(scanner.EOF)
return true
}
// Note: There are two changes in the expression handling here
// compared to the old yacc/C implemenatations. Neither has
// much practical consequence because the expressions we
// see in assembly code are simple, but for the record:
//
// 1) Evaluation uses uint64; the old one used int64.
// 2) Precedence uses Go rules not C rules.
// expr = term | term ('+' | '-' | '|' | '^') term.
func (p *Parser) expr() uint64 {
value := p.term()
for {
switch p.peek() {
case '+':
p.next()
x := p.term()
if addOverflows(x, value) {
p.errorf("overflow in %d+%d", value, x)
}
value += x
case '-':
p.next()
value -= p.term()
case '|':
p.next()
value |= p.term()
case '^':
p.next()
value ^= p.term()
default:
return value
}
}
}
// floatExpr = fconst | '-' floatExpr | '+' floatExpr | '(' floatExpr ')'
func (p *Parser) floatExpr() float64 {
tok := p.next()
switch tok.ScanToken {
case '(':
v := p.floatExpr()
if p.next().ScanToken != ')' {
p.errorf("missing closing paren")
}
return v
case '+':
return +p.floatExpr()
case '-':
return -p.floatExpr()
case scanner.Float:
return p.atof(tok.String())
}
p.errorf("unexpected %s evaluating float expression", tok)
return 0
}
// term = factor | factor ('*' | '/' | '%' | '>>' | '<<' | '&') factor
func (p *Parser) term() uint64 {
value := p.factor()
for {
switch p.peek() {
case '*':
p.next()
x := p.factor()
if mulOverflows(value, x) {
p.errorf("%d * %d overflows", value, x)
}
value *= x
case '/':
p.next()
value /= p.factor()
case '%':
p.next()
value %= p.factor()
case lex.LSH:
p.next()
shift := p.factor()
if int64(shift) < 0 {
p.errorf("negative left shift %d", shift)
}
if shiftOverflows(value, shift) {
p.errorf("%d << %d overflows", value, shift)
}
return value << shift
case lex.RSH:
p.next()
shift := p.term()
if shift < 0 {
p.errorf("negative right shift %d", shift)
}
value >>= uint(shift)
case '&':
p.next()
value &= p.factor()
default:
return value
}
}
p.errorf("unexpected %s evaluating expression", p.peek())
return 0
}
// factor = const | '+' factor | '-' factor | '~' factor | '(' expr ')'
func (p *Parser) factor() uint64 {
tok := p.next()
switch tok.ScanToken {
case scanner.Int:
return p.atoi(tok.String())
case '+':
return +p.factor()
case '-':
return -p.factor()
case '~':
return ^p.factor()
case '(':
v := p.expr()
if p.next().ScanToken != ')' {
p.errorf("missing closing paren")
}
return v
}
p.errorf("unexpected %s evaluating expression", tok)
return 0
}
// positiveAtoi returns an int64 that must be >= 0.
func (p *Parser) positiveAtoi(str string) int64 {
value, err := strconv.ParseInt(str, 0, 64)
if err != nil {
p.errorf("%s", err)
}
if value < 0 {
p.errorf("%s overflows int64", str)
}
return value
}
func (p *Parser) atoi(str string) uint64 {
value, err := strconv.ParseUint(str, 0, 64)
if err != nil {
p.errorf("%s", err)
}
return value
}
func (p *Parser) atof(str string) float64 {
value, err := strconv.ParseFloat(str, 64)
if err != nil {
p.errorf("%s", err)
}
return value
}
func (p *Parser) atos(str string) string {
value, err := strconv.Unquote(str)
if err != nil {
p.errorf("%s", err)
}
return value
}
// EOF represents the end of input.
var EOF = lex.Make(scanner.EOF, "EOF")
func (p *Parser) next() lex.Token {
if !p.more() {
return EOF
}
tok := p.input[p.inputPos]
p.inputPos++
return tok
}
func (p *Parser) back() {
p.inputPos--
}
func (p *Parser) peek() lex.ScanToken {
if p.more() {
return p.input[p.inputPos].ScanToken
}
return scanner.EOF
}
func (p *Parser) more() bool {
return p.inputPos < len(p.input)
}
// get verifies that the next item has the expected type and returns it.
func (p *Parser) get(expected lex.ScanToken) lex.Token {
p.expect(expected)
return p.next()
}
// expect verifies that the next item has the expected type. It does not consume it.
func (p *Parser) expect(expected lex.ScanToken) {
if p.peek() != expected {
p.errorf("expected %s, found %s", expected, p.next())
}
}
// have reports whether the remaining tokens contain the specified token.
func (p *Parser) have(token lex.ScanToken) bool {
for i := p.inputPos; i < len(p.input); i++ {
if p.input[i].ScanToken == token {
return true
}
}
return false
}