| // Copyright 2009 The Go Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style |
| // license that can be found in the LICENSE file. |
| |
| package datafmt |
| |
| import ( |
| "container/vector"; |
| "go/scanner"; |
| "go/token"; |
| "os"; |
| "strconv"; |
| "strings"; |
| ) |
| |
| // ---------------------------------------------------------------------------- |
| // Parsing |
| |
| type parser struct { |
| scanner.ErrorVector; |
| scanner scanner.Scanner; |
| pos token.Position; // token position |
| tok token.Token; // one token look-ahead |
| lit []byte; // token literal |
| |
| packs map[string]string; // PackageName -> ImportPath |
| rules map[string]expr; // RuleName -> Expression |
| } |
| |
| |
| func (p *parser) next() { |
| p.pos, p.tok, p.lit = p.scanner.Scan(); |
| switch p.tok { |
| case token.CHAN, token.FUNC, token.INTERFACE, token.MAP, token.STRUCT: |
| // Go keywords for composite types are type names |
| // returned by reflect. Accept them as identifiers. |
| p.tok = token.IDENT // p.lit is already set correctly |
| } |
| } |
| |
| |
| func (p *parser) init(filename string, src []byte) { |
| p.ErrorVector.Init(); |
| p.scanner.Init(filename, src, p, scanner.AllowIllegalChars); // return '@' as token.ILLEGAL w/o error message |
| p.next(); // initializes pos, tok, lit |
| p.packs = make(map[string]string); |
| p.rules = make(map[string]expr); |
| } |
| |
| |
| func (p *parser) errorExpected(pos token.Position, msg string) { |
| msg = "expected " + msg; |
| if pos.Offset == p.pos.Offset { |
| // the error happened at the current position; |
| // make the error message more specific |
| msg += ", found '" + p.tok.String() + "'"; |
| if p.tok.IsLiteral() { |
| msg += " " + string(p.lit) |
| } |
| } |
| p.Error(pos, msg); |
| } |
| |
| |
| func (p *parser) expect(tok token.Token) token.Position { |
| pos := p.pos; |
| if p.tok != tok { |
| p.errorExpected(pos, "'"+tok.String()+"'") |
| } |
| p.next(); // make progress in any case |
| return pos; |
| } |
| |
| |
| func (p *parser) parseIdentifier() string { |
| name := string(p.lit); |
| p.expect(token.IDENT); |
| return name; |
| } |
| |
| |
| func (p *parser) parseTypeName() (string, bool) { |
| pos := p.pos; |
| name, isIdent := p.parseIdentifier(), true; |
| if p.tok == token.PERIOD { |
| // got a package name, lookup package |
| if importPath, found := p.packs[name]; found { |
| name = importPath |
| } else { |
| p.Error(pos, "package not declared: "+name) |
| } |
| p.next(); |
| name, isIdent = name+"."+p.parseIdentifier(), false; |
| } |
| return name, isIdent; |
| } |
| |
| |
| // Parses a rule name and returns it. If the rule name is |
| // a package-qualified type name, the package name is resolved. |
| // The 2nd result value is true iff the rule name consists of a |
| // single identifier only (and thus could be a package name). |
| // |
| func (p *parser) parseRuleName() (string, bool) { |
| name, isIdent := "", false; |
| switch p.tok { |
| case token.IDENT: |
| name, isIdent = p.parseTypeName() |
| case token.DEFAULT: |
| name = "default"; |
| p.next(); |
| case token.QUO: |
| name = "/"; |
| p.next(); |
| default: |
| p.errorExpected(p.pos, "rule name"); |
| p.next(); // make progress in any case |
| } |
| return name, isIdent; |
| } |
| |
| |
| func (p *parser) parseString() string { |
| s := ""; |
| if p.tok == token.STRING { |
| s, _ = strconv.Unquote(string(p.lit)); |
| // Unquote may fail with an error, but only if the scanner found |
| // an illegal string in the first place. In this case the error |
| // has already been reported. |
| p.next(); |
| return s; |
| } else { |
| p.expect(token.STRING) |
| } |
| return s; |
| } |
| |
| |
| func (p *parser) parseLiteral() literal { |
| s := strings.Bytes(p.parseString()); |
| |
| // A string literal may contain %-format specifiers. To simplify |
| // and speed up printing of the literal, split it into segments |
| // that start with "%" possibly followed by a last segment that |
| // starts with some other character. |
| var list vector.Vector; |
| list.Init(0); |
| i0 := 0; |
| for i := 0; i < len(s); i++ { |
| if s[i] == '%' && i+1 < len(s) { |
| // the next segment starts with a % format |
| if i0 < i { |
| // the current segment is not empty, split it off |
| list.Push(s[i0:i]); |
| i0 = i; |
| } |
| i++; // skip %; let loop skip over char after % |
| } |
| } |
| // the final segment may start with any character |
| // (it is empty iff the string is empty) |
| list.Push(s[i0:len(s)]); |
| |
| // convert list into a literal |
| lit := make(literal, list.Len()); |
| for i := 0; i < list.Len(); i++ { |
| lit[i] = list.At(i).([]byte) |
| } |
| |
| return lit; |
| } |
| |
| |
| func (p *parser) parseField() expr { |
| var fname string; |
| switch p.tok { |
| case token.ILLEGAL: |
| if string(p.lit) != "@" { |
| return nil |
| } |
| fname = "@"; |
| p.next(); |
| case token.MUL: |
| fname = "*"; |
| p.next(); |
| case token.IDENT: |
| fname = p.parseIdentifier() |
| default: |
| return nil |
| } |
| |
| var ruleName string; |
| if p.tok == token.COLON { |
| p.next(); |
| ruleName, _ = p.parseRuleName(); |
| } |
| |
| return &field{fname, ruleName}; |
| } |
| |
| |
| func (p *parser) parseOperand() (x expr) { |
| switch p.tok { |
| case token.STRING: |
| x = p.parseLiteral() |
| |
| case token.LPAREN: |
| p.next(); |
| x = p.parseExpression(); |
| if p.tok == token.SHR { |
| p.next(); |
| x = &group{x, p.parseExpression()}; |
| } |
| p.expect(token.RPAREN); |
| |
| case token.LBRACK: |
| p.next(); |
| x = &option{p.parseExpression()}; |
| p.expect(token.RBRACK); |
| |
| case token.LBRACE: |
| p.next(); |
| x = p.parseExpression(); |
| var div expr; |
| if p.tok == token.QUO { |
| p.next(); |
| div = p.parseExpression(); |
| } |
| x = &repetition{x, div}; |
| p.expect(token.RBRACE); |
| |
| default: |
| x = p.parseField() // may be nil |
| } |
| |
| return x; |
| } |
| |
| |
| func (p *parser) parseSequence() expr { |
| var list vector.Vector; |
| list.Init(0); |
| |
| for x := p.parseOperand(); x != nil; x = p.parseOperand() { |
| list.Push(x) |
| } |
| |
| // no need for a sequence if list.Len() < 2 |
| switch list.Len() { |
| case 0: |
| return nil |
| case 1: |
| return list.At(0).(expr) |
| } |
| |
| // convert list into a sequence |
| seq := make(sequence, list.Len()); |
| for i := 0; i < list.Len(); i++ { |
| seq[i] = list.At(i).(expr) |
| } |
| return seq; |
| } |
| |
| |
| func (p *parser) parseExpression() expr { |
| var list vector.Vector; |
| list.Init(0); |
| |
| for { |
| x := p.parseSequence(); |
| if x != nil { |
| list.Push(x) |
| } |
| if p.tok != token.OR { |
| break |
| } |
| p.next(); |
| } |
| |
| // no need for an alternatives if list.Len() < 2 |
| switch list.Len() { |
| case 0: |
| return nil |
| case 1: |
| return list.At(0).(expr) |
| } |
| |
| // convert list into a alternatives |
| alt := make(alternatives, list.Len()); |
| for i := 0; i < list.Len(); i++ { |
| alt[i] = list.At(i).(expr) |
| } |
| return alt; |
| } |
| |
| |
| func (p *parser) parseFormat() { |
| for p.tok != token.EOF { |
| pos := p.pos; |
| |
| name, isIdent := p.parseRuleName(); |
| switch p.tok { |
| case token.STRING: |
| // package declaration |
| importPath := p.parseString(); |
| |
| // add package declaration |
| if !isIdent { |
| p.Error(pos, "illegal package name: "+name) |
| } else if _, found := p.packs[name]; !found { |
| p.packs[name] = importPath |
| } else { |
| p.Error(pos, "package already declared: "+name) |
| } |
| |
| case token.ASSIGN: |
| // format rule |
| p.next(); |
| x := p.parseExpression(); |
| |
| // add rule |
| if _, found := p.rules[name]; !found { |
| p.rules[name] = x |
| } else { |
| p.Error(pos, "format rule already declared: "+name) |
| } |
| |
| default: |
| p.errorExpected(p.pos, "package declaration or format rule"); |
| p.next(); // make progress in any case |
| } |
| |
| if p.tok == token.SEMICOLON { |
| p.next() |
| } else { |
| break |
| } |
| } |
| p.expect(token.EOF); |
| } |
| |
| |
| func remap(p *parser, name string) string { |
| i := strings.Index(name, "."); |
| if i >= 0 { |
| packageName, suffix := name[0:i], name[i:len(name)]; |
| // lookup package |
| if importPath, found := p.packs[packageName]; found { |
| name = importPath + suffix |
| } else { |
| var invalidPos token.Position; |
| p.Error(invalidPos, "package not declared: "+packageName); |
| } |
| } |
| return name; |
| } |
| |
| |
| // Parse parses a set of format productions from source src. Custom |
| // formatters may be provided via a map of formatter functions. If |
| // there are no errors, the result is a Format and the error is nil. |
| // Otherwise the format is nil and a non-empty ErrorList is returned. |
| // |
| func Parse(filename string, src []byte, fmap FormatterMap) (Format, os.Error) { |
| // parse source |
| var p parser; |
| p.init(filename, src); |
| p.parseFormat(); |
| |
| // add custom formatters, if any |
| for name, form := range fmap { |
| name = remap(&p, name); |
| if _, found := p.rules[name]; !found { |
| p.rules[name] = &custom{name, form} |
| } else { |
| var invalidPos token.Position; |
| p.Error(invalidPos, "formatter already declared: "+name); |
| } |
| } |
| |
| return p.rules, p.GetError(scanner.NoMultiples); |
| } |