blob: ede4f7073a825cbf1ebd61d877519e95591209ae [file] [log] [blame]
// Copyright 2009 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package ebnf
import (
"go/scanner"
"go/token"
"os"
"strconv"
)
type parser struct {
fset *token.FileSet
scanner.ErrorVector
scanner scanner.Scanner
pos token.Pos // token position
tok token.Token // one token look-ahead
lit string // token literal
}
func (p *parser) next() {
p.pos, p.tok, p.lit = p.scanner.Scan()
if p.tok.IsKeyword() {
// TODO Should keyword mapping always happen outside scanner?
// Or should there be a flag to scanner to enable keyword mapping?
p.tok = token.IDENT
}
}
func (p *parser) error(pos token.Pos, msg string) {
p.Error(p.fset.Position(pos), msg)
}
func (p *parser) errorExpected(pos token.Pos, msg string) {
msg = "expected " + msg
if pos == p.pos {
// the error happened at the current position;
// make the error message more specific
msg += ", found '" + p.tok.String() + "'"
if p.tok.IsLiteral() {
msg += " " + p.lit
}
}
p.error(pos, msg)
}
func (p *parser) expect(tok token.Token) token.Pos {
pos := p.pos
if p.tok != tok {
p.errorExpected(pos, "'"+tok.String()+"'")
}
p.next() // make progress in any case
return pos
}
func (p *parser) parseIdentifier() *Name {
pos := p.pos
name := p.lit
p.expect(token.IDENT)
return &Name{pos, name}
}
func (p *parser) parseToken() *Token {
pos := p.pos
value := ""
if p.tok == token.STRING {
value, _ = strconv.Unquote(p.lit)
// Unquote may fail with an error, but only if the scanner found
// an illegal string in the first place. In this case the error
// has already been reported.
p.next()
} else {
p.expect(token.STRING)
}
return &Token{pos, value}
}
// ParseTerm returns nil if no term was found.
func (p *parser) parseTerm() (x Expression) {
pos := p.pos
switch p.tok {
case token.IDENT:
x = p.parseIdentifier()
case token.STRING:
tok := p.parseToken()
x = tok
const ellipsis = "…" // U+2026, the horizontal ellipsis character
if p.tok == token.ILLEGAL && p.lit == ellipsis {
p.next()
x = &Range{tok, p.parseToken()}
}
case token.LPAREN:
p.next()
x = &Group{pos, p.parseExpression()}
p.expect(token.RPAREN)
case token.LBRACK:
p.next()
x = &Option{pos, p.parseExpression()}
p.expect(token.RBRACK)
case token.LBRACE:
p.next()
x = &Repetition{pos, p.parseExpression()}
p.expect(token.RBRACE)
}
return x
}
func (p *parser) parseSequence() Expression {
var list Sequence
for x := p.parseTerm(); x != nil; x = p.parseTerm() {
list = append(list, x)
}
// no need for a sequence if list.Len() < 2
switch len(list) {
case 0:
p.errorExpected(p.pos, "term")
return &Bad{p.pos, "term expected"}
case 1:
return list[0]
}
return list
}
func (p *parser) parseExpression() Expression {
var list Alternative
for {
list = append(list, p.parseSequence())
if p.tok != token.OR {
break
}
p.next()
}
// len(list) > 0
// no need for an Alternative node if list.Len() < 2
if len(list) == 1 {
return list[0]
}
return list
}
func (p *parser) parseProduction() *Production {
name := p.parseIdentifier()
p.expect(token.ASSIGN)
var expr Expression
if p.tok != token.PERIOD {
expr = p.parseExpression()
}
p.expect(token.PERIOD)
return &Production{name, expr}
}
func (p *parser) parse(fset *token.FileSet, filename string, src []byte) Grammar {
// initialize parser
p.fset = fset
p.ErrorVector.Reset()
p.scanner.Init(fset.AddFile(filename, fset.Base(), len(src)), src, p, scanner.AllowIllegalChars)
p.next() // initializes pos, tok, lit
grammar := make(Grammar)
for p.tok != token.EOF {
prod := p.parseProduction()
name := prod.Name.String
if _, found := grammar[name]; !found {
grammar[name] = prod
} else {
p.error(prod.Pos(), name+" declared already")
}
}
return grammar
}
// Parse parses a set of EBNF productions from source src.
// It returns a set of productions. Errors are reported
// for incorrect syntax and if a production is declared
// more than once. Position information is recorded relative
// to the file set fset.
//
func Parse(fset *token.FileSet, filename string, src []byte) (Grammar, os.Error) {
var p parser
grammar := p.parse(fset, filename, src)
return grammar, p.GetError(scanner.Sorted)
}