| // Copyright 2009 The Go Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style |
| // license that can be found in the LICENSE file. |
| |
| package datafmt |
| |
| import ( |
| "container/vector" |
| "go/scanner" |
| "go/token" |
| "os" |
| "strconv" |
| "strings" |
| ) |
| |
| // ---------------------------------------------------------------------------- |
| // Parsing |
| |
| type parser struct { |
| scanner.ErrorVector |
| scanner scanner.Scanner |
| file *token.File |
| pos token.Pos // token position |
| tok token.Token // one token look-ahead |
| lit string // token literal |
| |
| packs map[string]string // PackageName -> ImportPath |
| rules map[string]expr // RuleName -> Expression |
| } |
| |
| |
| func (p *parser) next() { |
| p.pos, p.tok, p.lit = p.scanner.Scan() |
| switch p.tok { |
| case token.CHAN, token.FUNC, token.INTERFACE, token.MAP, token.STRUCT: |
| // Go keywords for composite types are type names |
| // returned by reflect. Accept them as identifiers. |
| p.tok = token.IDENT // p.lit is already set correctly |
| } |
| } |
| |
| |
| func (p *parser) init(fset *token.FileSet, filename string, src []byte) { |
| p.ErrorVector.Reset() |
| p.file = fset.AddFile(filename, fset.Base(), len(src)) |
| p.scanner.Init(p.file, src, p, scanner.AllowIllegalChars) // return '@' as token.ILLEGAL w/o error message |
| p.next() // initializes pos, tok, lit |
| p.packs = make(map[string]string) |
| p.rules = make(map[string]expr) |
| } |
| |
| |
| func (p *parser) error(pos token.Pos, msg string) { |
| p.Error(p.file.Position(pos), msg) |
| } |
| |
| |
| func (p *parser) errorExpected(pos token.Pos, msg string) { |
| msg = "expected " + msg |
| if pos == p.pos { |
| // the error happened at the current position; |
| // make the error message more specific |
| msg += ", found '" + p.tok.String() + "'" |
| if p.tok.IsLiteral() { |
| msg += " " + p.lit |
| } |
| } |
| p.error(pos, msg) |
| } |
| |
| |
| func (p *parser) expect(tok token.Token) token.Pos { |
| pos := p.pos |
| if p.tok != tok { |
| p.errorExpected(pos, "'"+tok.String()+"'") |
| } |
| p.next() // make progress in any case |
| return pos |
| } |
| |
| |
| func (p *parser) parseIdentifier() string { |
| name := p.lit |
| p.expect(token.IDENT) |
| return name |
| } |
| |
| |
| func (p *parser) parseTypeName() (string, bool) { |
| pos := p.pos |
| name, isIdent := p.parseIdentifier(), true |
| if p.tok == token.PERIOD { |
| // got a package name, lookup package |
| if importPath, found := p.packs[name]; found { |
| name = importPath |
| } else { |
| p.error(pos, "package not declared: "+name) |
| } |
| p.next() |
| name, isIdent = name+"."+p.parseIdentifier(), false |
| } |
| return name, isIdent |
| } |
| |
| |
| // Parses a rule name and returns it. If the rule name is |
| // a package-qualified type name, the package name is resolved. |
| // The 2nd result value is true iff the rule name consists of a |
| // single identifier only (and thus could be a package name). |
| // |
| func (p *parser) parseRuleName() (string, bool) { |
| name, isIdent := "", false |
| switch p.tok { |
| case token.IDENT: |
| name, isIdent = p.parseTypeName() |
| case token.DEFAULT: |
| name = "default" |
| p.next() |
| case token.QUO: |
| name = "/" |
| p.next() |
| default: |
| p.errorExpected(p.pos, "rule name") |
| p.next() // make progress in any case |
| } |
| return name, isIdent |
| } |
| |
| |
| func (p *parser) parseString() string { |
| s := "" |
| if p.tok == token.STRING { |
| s, _ = strconv.Unquote(p.lit) |
| // Unquote may fail with an error, but only if the scanner found |
| // an illegal string in the first place. In this case the error |
| // has already been reported. |
| p.next() |
| return s |
| } else { |
| p.expect(token.STRING) |
| } |
| return s |
| } |
| |
| |
| func (p *parser) parseLiteral() literal { |
| s := []byte(p.parseString()) |
| |
| // A string literal may contain %-format specifiers. To simplify |
| // and speed up printing of the literal, split it into segments |
| // that start with "%" possibly followed by a last segment that |
| // starts with some other character. |
| var list vector.Vector |
| i0 := 0 |
| for i := 0; i < len(s); i++ { |
| if s[i] == '%' && i+1 < len(s) { |
| // the next segment starts with a % format |
| if i0 < i { |
| // the current segment is not empty, split it off |
| list.Push(s[i0:i]) |
| i0 = i |
| } |
| i++ // skip %; let loop skip over char after % |
| } |
| } |
| // the final segment may start with any character |
| // (it is empty iff the string is empty) |
| list.Push(s[i0:]) |
| |
| // convert list into a literal |
| lit := make(literal, list.Len()) |
| for i := 0; i < list.Len(); i++ { |
| lit[i] = list.At(i).([]byte) |
| } |
| |
| return lit |
| } |
| |
| |
| func (p *parser) parseField() expr { |
| var fname string |
| switch p.tok { |
| case token.ILLEGAL: |
| if p.lit != "@" { |
| return nil |
| } |
| fname = "@" |
| p.next() |
| case token.MUL: |
| fname = "*" |
| p.next() |
| case token.IDENT: |
| fname = p.parseIdentifier() |
| default: |
| return nil |
| } |
| |
| var ruleName string |
| if p.tok == token.COLON { |
| p.next() |
| ruleName, _ = p.parseRuleName() |
| } |
| |
| return &field{fname, ruleName} |
| } |
| |
| |
| func (p *parser) parseOperand() (x expr) { |
| switch p.tok { |
| case token.STRING: |
| x = p.parseLiteral() |
| |
| case token.LPAREN: |
| p.next() |
| x = p.parseExpression() |
| if p.tok == token.SHR { |
| p.next() |
| x = &group{x, p.parseExpression()} |
| } |
| p.expect(token.RPAREN) |
| |
| case token.LBRACK: |
| p.next() |
| x = &option{p.parseExpression()} |
| p.expect(token.RBRACK) |
| |
| case token.LBRACE: |
| p.next() |
| x = p.parseExpression() |
| var div expr |
| if p.tok == token.QUO { |
| p.next() |
| div = p.parseExpression() |
| } |
| x = &repetition{x, div} |
| p.expect(token.RBRACE) |
| |
| default: |
| x = p.parseField() // may be nil |
| } |
| |
| return x |
| } |
| |
| |
| func (p *parser) parseSequence() expr { |
| var list vector.Vector |
| |
| for x := p.parseOperand(); x != nil; x = p.parseOperand() { |
| list.Push(x) |
| } |
| |
| // no need for a sequence if list.Len() < 2 |
| switch list.Len() { |
| case 0: |
| return nil |
| case 1: |
| return list.At(0).(expr) |
| } |
| |
| // convert list into a sequence |
| seq := make(sequence, list.Len()) |
| for i := 0; i < list.Len(); i++ { |
| seq[i] = list.At(i).(expr) |
| } |
| return seq |
| } |
| |
| |
| func (p *parser) parseExpression() expr { |
| var list vector.Vector |
| |
| for { |
| x := p.parseSequence() |
| if x != nil { |
| list.Push(x) |
| } |
| if p.tok != token.OR { |
| break |
| } |
| p.next() |
| } |
| |
| // no need for an alternatives if list.Len() < 2 |
| switch list.Len() { |
| case 0: |
| return nil |
| case 1: |
| return list.At(0).(expr) |
| } |
| |
| // convert list into a alternatives |
| alt := make(alternatives, list.Len()) |
| for i := 0; i < list.Len(); i++ { |
| alt[i] = list.At(i).(expr) |
| } |
| return alt |
| } |
| |
| |
| func (p *parser) parseFormat() { |
| for p.tok != token.EOF { |
| pos := p.pos |
| |
| name, isIdent := p.parseRuleName() |
| switch p.tok { |
| case token.STRING: |
| // package declaration |
| importPath := p.parseString() |
| |
| // add package declaration |
| if !isIdent { |
| p.error(pos, "illegal package name: "+name) |
| } else if _, found := p.packs[name]; !found { |
| p.packs[name] = importPath |
| } else { |
| p.error(pos, "package already declared: "+name) |
| } |
| |
| case token.ASSIGN: |
| // format rule |
| p.next() |
| x := p.parseExpression() |
| |
| // add rule |
| if _, found := p.rules[name]; !found { |
| p.rules[name] = x |
| } else { |
| p.error(pos, "format rule already declared: "+name) |
| } |
| |
| default: |
| p.errorExpected(p.pos, "package declaration or format rule") |
| p.next() // make progress in any case |
| } |
| |
| if p.tok == token.SEMICOLON { |
| p.next() |
| } else { |
| break |
| } |
| } |
| p.expect(token.EOF) |
| } |
| |
| |
| func remap(p *parser, name string) string { |
| i := strings.Index(name, ".") |
| if i >= 0 { |
| packageName, suffix := name[0:i], name[i:] |
| // lookup package |
| if importPath, found := p.packs[packageName]; found { |
| name = importPath + suffix |
| } else { |
| var invalidPos token.Position |
| p.Error(invalidPos, "package not declared: "+packageName) |
| } |
| } |
| return name |
| } |
| |
| |
| // Parse parses a set of format productions from source src. Custom |
| // formatters may be provided via a map of formatter functions. If |
| // there are no errors, the result is a Format and the error is nil. |
| // Otherwise the format is nil and a non-empty ErrorList is returned. |
| // |
| func Parse(fset *token.FileSet, filename string, src []byte, fmap FormatterMap) (Format, os.Error) { |
| // parse source |
| var p parser |
| p.init(fset, filename, src) |
| p.parseFormat() |
| |
| // add custom formatters, if any |
| for name, form := range fmap { |
| name = remap(&p, name) |
| if _, found := p.rules[name]; !found { |
| p.rules[name] = &custom{name, form} |
| } else { |
| var invalidPos token.Position |
| p.Error(invalidPos, "formatter already declared: "+name) |
| } |
| } |
| |
| return p.rules, p.GetError(scanner.NoMultiples) |
| } |