blob: 7dedb531a51a770f1c7fd5ba76668df92696f10a [file] [log] [blame]
// Copyright 2009 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package datafmt
import (
"container/vector"
"go/scanner"
"go/token"
"os"
"strconv"
"strings"
)
// ----------------------------------------------------------------------------
// Parsing
type parser struct {
scanner.ErrorVector
scanner scanner.Scanner
file *token.File
pos token.Pos // token position
tok token.Token // one token look-ahead
lit string // token literal
packs map[string]string // PackageName -> ImportPath
rules map[string]expr // RuleName -> Expression
}
func (p *parser) next() {
p.pos, p.tok, p.lit = p.scanner.Scan()
switch p.tok {
case token.CHAN, token.FUNC, token.INTERFACE, token.MAP, token.STRUCT:
// Go keywords for composite types are type names
// returned by reflect. Accept them as identifiers.
p.tok = token.IDENT // p.lit is already set correctly
}
}
func (p *parser) init(fset *token.FileSet, filename string, src []byte) {
p.ErrorVector.Reset()
p.file = fset.AddFile(filename, fset.Base(), len(src))
p.scanner.Init(p.file, src, p, scanner.AllowIllegalChars) // return '@' as token.ILLEGAL w/o error message
p.next() // initializes pos, tok, lit
p.packs = make(map[string]string)
p.rules = make(map[string]expr)
}
func (p *parser) error(pos token.Pos, msg string) {
p.Error(p.file.Position(pos), msg)
}
func (p *parser) errorExpected(pos token.Pos, msg string) {
msg = "expected " + msg
if pos == p.pos {
// the error happened at the current position;
// make the error message more specific
msg += ", found '" + p.tok.String() + "'"
if p.tok.IsLiteral() {
msg += " " + p.lit
}
}
p.error(pos, msg)
}
func (p *parser) expect(tok token.Token) token.Pos {
pos := p.pos
if p.tok != tok {
p.errorExpected(pos, "'"+tok.String()+"'")
}
p.next() // make progress in any case
return pos
}
func (p *parser) parseIdentifier() string {
name := p.lit
p.expect(token.IDENT)
return name
}
func (p *parser) parseTypeName() (string, bool) {
pos := p.pos
name, isIdent := p.parseIdentifier(), true
if p.tok == token.PERIOD {
// got a package name, lookup package
if importPath, found := p.packs[name]; found {
name = importPath
} else {
p.error(pos, "package not declared: "+name)
}
p.next()
name, isIdent = name+"."+p.parseIdentifier(), false
}
return name, isIdent
}
// Parses a rule name and returns it. If the rule name is
// a package-qualified type name, the package name is resolved.
// The 2nd result value is true iff the rule name consists of a
// single identifier only (and thus could be a package name).
//
func (p *parser) parseRuleName() (string, bool) {
name, isIdent := "", false
switch p.tok {
case token.IDENT:
name, isIdent = p.parseTypeName()
case token.DEFAULT:
name = "default"
p.next()
case token.QUO:
name = "/"
p.next()
default:
p.errorExpected(p.pos, "rule name")
p.next() // make progress in any case
}
return name, isIdent
}
func (p *parser) parseString() string {
s := ""
if p.tok == token.STRING {
s, _ = strconv.Unquote(p.lit)
// Unquote may fail with an error, but only if the scanner found
// an illegal string in the first place. In this case the error
// has already been reported.
p.next()
return s
} else {
p.expect(token.STRING)
}
return s
}
func (p *parser) parseLiteral() literal {
s := []byte(p.parseString())
// A string literal may contain %-format specifiers. To simplify
// and speed up printing of the literal, split it into segments
// that start with "%" possibly followed by a last segment that
// starts with some other character.
var list vector.Vector
i0 := 0
for i := 0; i < len(s); i++ {
if s[i] == '%' && i+1 < len(s) {
// the next segment starts with a % format
if i0 < i {
// the current segment is not empty, split it off
list.Push(s[i0:i])
i0 = i
}
i++ // skip %; let loop skip over char after %
}
}
// the final segment may start with any character
// (it is empty iff the string is empty)
list.Push(s[i0:])
// convert list into a literal
lit := make(literal, list.Len())
for i := 0; i < list.Len(); i++ {
lit[i] = list.At(i).([]byte)
}
return lit
}
func (p *parser) parseField() expr {
var fname string
switch p.tok {
case token.ILLEGAL:
if p.lit != "@" {
return nil
}
fname = "@"
p.next()
case token.MUL:
fname = "*"
p.next()
case token.IDENT:
fname = p.parseIdentifier()
default:
return nil
}
var ruleName string
if p.tok == token.COLON {
p.next()
ruleName, _ = p.parseRuleName()
}
return &field{fname, ruleName}
}
func (p *parser) parseOperand() (x expr) {
switch p.tok {
case token.STRING:
x = p.parseLiteral()
case token.LPAREN:
p.next()
x = p.parseExpression()
if p.tok == token.SHR {
p.next()
x = &group{x, p.parseExpression()}
}
p.expect(token.RPAREN)
case token.LBRACK:
p.next()
x = &option{p.parseExpression()}
p.expect(token.RBRACK)
case token.LBRACE:
p.next()
x = p.parseExpression()
var div expr
if p.tok == token.QUO {
p.next()
div = p.parseExpression()
}
x = &repetition{x, div}
p.expect(token.RBRACE)
default:
x = p.parseField() // may be nil
}
return x
}
func (p *parser) parseSequence() expr {
var list vector.Vector
for x := p.parseOperand(); x != nil; x = p.parseOperand() {
list.Push(x)
}
// no need for a sequence if list.Len() < 2
switch list.Len() {
case 0:
return nil
case 1:
return list.At(0).(expr)
}
// convert list into a sequence
seq := make(sequence, list.Len())
for i := 0; i < list.Len(); i++ {
seq[i] = list.At(i).(expr)
}
return seq
}
func (p *parser) parseExpression() expr {
var list vector.Vector
for {
x := p.parseSequence()
if x != nil {
list.Push(x)
}
if p.tok != token.OR {
break
}
p.next()
}
// no need for an alternatives if list.Len() < 2
switch list.Len() {
case 0:
return nil
case 1:
return list.At(0).(expr)
}
// convert list into a alternatives
alt := make(alternatives, list.Len())
for i := 0; i < list.Len(); i++ {
alt[i] = list.At(i).(expr)
}
return alt
}
func (p *parser) parseFormat() {
for p.tok != token.EOF {
pos := p.pos
name, isIdent := p.parseRuleName()
switch p.tok {
case token.STRING:
// package declaration
importPath := p.parseString()
// add package declaration
if !isIdent {
p.error(pos, "illegal package name: "+name)
} else if _, found := p.packs[name]; !found {
p.packs[name] = importPath
} else {
p.error(pos, "package already declared: "+name)
}
case token.ASSIGN:
// format rule
p.next()
x := p.parseExpression()
// add rule
if _, found := p.rules[name]; !found {
p.rules[name] = x
} else {
p.error(pos, "format rule already declared: "+name)
}
default:
p.errorExpected(p.pos, "package declaration or format rule")
p.next() // make progress in any case
}
if p.tok == token.SEMICOLON {
p.next()
} else {
break
}
}
p.expect(token.EOF)
}
func remap(p *parser, name string) string {
i := strings.Index(name, ".")
if i >= 0 {
packageName, suffix := name[0:i], name[i:]
// lookup package
if importPath, found := p.packs[packageName]; found {
name = importPath + suffix
} else {
var invalidPos token.Position
p.Error(invalidPos, "package not declared: "+packageName)
}
}
return name
}
// Parse parses a set of format productions from source src. Custom
// formatters may be provided via a map of formatter functions. If
// there are no errors, the result is a Format and the error is nil.
// Otherwise the format is nil and a non-empty ErrorList is returned.
//
func Parse(fset *token.FileSet, filename string, src []byte, fmap FormatterMap) (Format, os.Error) {
// parse source
var p parser
p.init(fset, filename, src)
p.parseFormat()
// add custom formatters, if any
for name, form := range fmap {
name = remap(&p, name)
if _, found := p.rules[name]; !found {
p.rules[name] = &custom{name, form}
} else {
var invalidPos token.Position
p.Error(invalidPos, "formatter already declared: "+name)
}
}
return p.rules, p.GetError(scanner.NoMultiples)
}