blob: 54a6493fb47948894128ef60d64c5e1f6d55853e [file] [log] [blame]
// Package parser implements a parser for HCL (HashiCorp Configuration
// Language)
package parser
import (
"errors"
"fmt"
"strings"
"github.com/hashicorp/hcl/hcl/ast"
"github.com/hashicorp/hcl/hcl/scanner"
"github.com/hashicorp/hcl/hcl/token"
)
type Parser struct {
sc *scanner.Scanner
// Last read token
tok token.Token
commaPrev token.Token
comments []*ast.CommentGroup
leadComment *ast.CommentGroup // last lead comment
lineComment *ast.CommentGroup // last line comment
enableTrace bool
indent int
n int // buffer size (max = 1)
}
func newParser(src []byte) *Parser {
return &Parser{
sc: scanner.New(src),
}
}
// Parse returns the fully parsed source and returns the abstract syntax tree.
func Parse(src []byte) (*ast.File, error) {
p := newParser(src)
return p.Parse()
}
var errEofToken = errors.New("EOF token found")
// Parse returns the fully parsed source and returns the abstract syntax tree.
func (p *Parser) Parse() (*ast.File, error) {
f := &ast.File{}
var err, scerr error
p.sc.Error = func(pos token.Pos, msg string) {
scerr = &PosError{Pos: pos, Err: errors.New(msg)}
}
f.Node, err = p.objectList(false)
if scerr != nil {
return nil, scerr
}
if err != nil {
return nil, err
}
f.Comments = p.comments
return f, nil
}
// objectList parses a list of items within an object (generally k/v pairs).
// The parameter" obj" tells this whether to we are within an object (braces:
// '{', '}') or just at the top level. If we're within an object, we end
// at an RBRACE.
func (p *Parser) objectList(obj bool) (*ast.ObjectList, error) {
defer un(trace(p, "ParseObjectList"))
node := &ast.ObjectList{}
for {
if obj {
tok := p.scan()
p.unscan()
if tok.Type == token.RBRACE {
break
}
}
n, err := p.objectItem()
if err == errEofToken {
break // we are finished
}
// we don't return a nil node, because might want to use already
// collected items.
if err != nil {
return node, err
}
node.Add(n)
// object lists can be optionally comma-delimited e.g. when a list of maps
// is being expressed, so a comma is allowed here - it's simply consumed
tok := p.scan()
if tok.Type != token.COMMA {
p.unscan()
}
}
return node, nil
}
func (p *Parser) consumeComment() (comment *ast.Comment, endline int) {
endline = p.tok.Pos.Line
// count the endline if it's multiline comment, ie starting with /*
if len(p.tok.Text) > 1 && p.tok.Text[1] == '*' {
// don't use range here - no need to decode Unicode code points
for i := 0; i < len(p.tok.Text); i++ {
if p.tok.Text[i] == '\n' {
endline++
}
}
}
comment = &ast.Comment{Start: p.tok.Pos, Text: p.tok.Text}
p.tok = p.sc.Scan()
return
}
func (p *Parser) consumeCommentGroup(n int) (comments *ast.CommentGroup, endline int) {
var list []*ast.Comment
endline = p.tok.Pos.Line
for p.tok.Type == token.COMMENT && p.tok.Pos.Line <= endline+n {
var comment *ast.Comment
comment, endline = p.consumeComment()
list = append(list, comment)
}
// add comment group to the comments list
comments = &ast.CommentGroup{List: list}
p.comments = append(p.comments, comments)
return
}
// objectItem parses a single object item
func (p *Parser) objectItem() (*ast.ObjectItem, error) {
defer un(trace(p, "ParseObjectItem"))
keys, err := p.objectKey()
if len(keys) > 0 && err == errEofToken {
// We ignore eof token here since it is an error if we didn't
// receive a value (but we did receive a key) for the item.
err = nil
}
if len(keys) > 0 && err != nil && p.tok.Type == token.RBRACE {
// This is a strange boolean statement, but what it means is:
// We have keys with no value, and we're likely in an object
// (since RBrace ends an object). For this, we set err to nil so
// we continue and get the error below of having the wrong value
// type.
err = nil
// Reset the token type so we don't think it completed fine. See
// objectType which uses p.tok.Type to check if we're done with
// the object.
p.tok.Type = token.EOF
}
if err != nil {
return nil, err
}
o := &ast.ObjectItem{
Keys: keys,
}
if p.leadComment != nil {
o.LeadComment = p.leadComment
p.leadComment = nil
}
switch p.tok.Type {
case token.ASSIGN:
o.Assign = p.tok.Pos
o.Val, err = p.object()
if err != nil {
return nil, err
}
case token.LBRACE:
o.Val, err = p.objectType()
if err != nil {
return nil, err
}
default:
keyStr := make([]string, 0, len(keys))
for _, k := range keys {
keyStr = append(keyStr, k.Token.Text)
}
return nil, fmt.Errorf(
"key '%s' expected start of object ('{') or assignment ('=')",
strings.Join(keyStr, " "))
}
// do a look-ahead for line comment
p.scan()
if len(keys) > 0 && o.Val.Pos().Line == keys[0].Pos().Line && p.lineComment != nil {
o.LineComment = p.lineComment
p.lineComment = nil
}
p.unscan()
return o, nil
}
// objectKey parses an object key and returns a ObjectKey AST
func (p *Parser) objectKey() ([]*ast.ObjectKey, error) {
keyCount := 0
keys := make([]*ast.ObjectKey, 0)
for {
tok := p.scan()
switch tok.Type {
case token.EOF:
// It is very important to also return the keys here as well as
// the error. This is because we need to be able to tell if we
// did parse keys prior to finding the EOF, or if we just found
// a bare EOF.
return keys, errEofToken
case token.ASSIGN:
// assignment or object only, but not nested objects. this is not
// allowed: `foo bar = {}`
if keyCount > 1 {
return nil, &PosError{
Pos: p.tok.Pos,
Err: fmt.Errorf("nested object expected: LBRACE got: %s", p.tok.Type),
}
}
if keyCount == 0 {
return nil, &PosError{
Pos: p.tok.Pos,
Err: errors.New("no object keys found!"),
}
}
return keys, nil
case token.LBRACE:
var err error
// If we have no keys, then it is a syntax error. i.e. {{}} is not
// allowed.
if len(keys) == 0 {
err = &PosError{
Pos: p.tok.Pos,
Err: fmt.Errorf("expected: IDENT | STRING got: %s", p.tok.Type),
}
}
// object
return keys, err
case token.IDENT, token.STRING:
keyCount++
keys = append(keys, &ast.ObjectKey{Token: p.tok})
case token.ILLEGAL:
fmt.Println("illegal")
default:
return keys, &PosError{
Pos: p.tok.Pos,
Err: fmt.Errorf("expected: IDENT | STRING | ASSIGN | LBRACE got: %s", p.tok.Type),
}
}
}
}
// object parses any type of object, such as number, bool, string, object or
// list.
func (p *Parser) object() (ast.Node, error) {
defer un(trace(p, "ParseType"))
tok := p.scan()
switch tok.Type {
case token.NUMBER, token.FLOAT, token.BOOL, token.STRING, token.HEREDOC:
return p.literalType()
case token.LBRACE:
return p.objectType()
case token.LBRACK:
return p.listType()
case token.COMMENT:
// implement comment
case token.EOF:
return nil, errEofToken
}
return nil, &PosError{
Pos: tok.Pos,
Err: fmt.Errorf("Unknown token: %+v", tok),
}
}
// objectType parses an object type and returns a ObjectType AST
func (p *Parser) objectType() (*ast.ObjectType, error) {
defer un(trace(p, "ParseObjectType"))
// we assume that the currently scanned token is a LBRACE
o := &ast.ObjectType{
Lbrace: p.tok.Pos,
}
l, err := p.objectList(true)
// if we hit RBRACE, we are good to go (means we parsed all Items), if it's
// not a RBRACE, it's an syntax error and we just return it.
if err != nil && p.tok.Type != token.RBRACE {
return nil, err
}
// No error, scan and expect the ending to be a brace
if tok := p.scan(); tok.Type != token.RBRACE {
return nil, fmt.Errorf("object expected closing RBRACE got: %s", tok.Type)
}
o.List = l
o.Rbrace = p.tok.Pos // advanced via parseObjectList
return o, nil
}
// listType parses a list type and returns a ListType AST
func (p *Parser) listType() (*ast.ListType, error) {
defer un(trace(p, "ParseListType"))
// we assume that the currently scanned token is a LBRACK
l := &ast.ListType{
Lbrack: p.tok.Pos,
}
needComma := false
for {
tok := p.scan()
if needComma {
switch tok.Type {
case token.COMMA, token.RBRACK:
default:
return nil, &PosError{
Pos: tok.Pos,
Err: fmt.Errorf(
"error parsing list, expected comma or list end, got: %s",
tok.Type),
}
}
}
switch tok.Type {
case token.NUMBER, token.FLOAT, token.STRING, token.HEREDOC:
node, err := p.literalType()
if err != nil {
return nil, err
}
// If there is a lead comment, apply it
if p.leadComment != nil {
node.LeadComment = p.leadComment
p.leadComment = nil
}
l.Add(node)
needComma = true
case token.COMMA:
// get next list item or we are at the end
// do a look-ahead for line comment
p.scan()
if p.lineComment != nil && len(l.List) > 0 {
lit, ok := l.List[len(l.List)-1].(*ast.LiteralType)
if ok {
lit.LineComment = p.lineComment
l.List[len(l.List)-1] = lit
p.lineComment = nil
}
}
p.unscan()
needComma = false
continue
case token.LBRACE:
// Looks like a nested object, so parse it out
node, err := p.objectType()
if err != nil {
return nil, &PosError{
Pos: tok.Pos,
Err: fmt.Errorf(
"error while trying to parse object within list: %s", err),
}
}
l.Add(node)
needComma = true
case token.BOOL:
// TODO(arslan) should we support? not supported by HCL yet
case token.LBRACK:
// TODO(arslan) should we support nested lists? Even though it's
// written in README of HCL, it's not a part of the grammar
// (not defined in parse.y)
case token.RBRACK:
// finished
l.Rbrack = p.tok.Pos
return l, nil
default:
return nil, &PosError{
Pos: tok.Pos,
Err: fmt.Errorf("unexpected token while parsing list: %s", tok.Type),
}
}
}
}
// literalType parses a literal type and returns a LiteralType AST
func (p *Parser) literalType() (*ast.LiteralType, error) {
defer un(trace(p, "ParseLiteral"))
return &ast.LiteralType{
Token: p.tok,
}, nil
}
// scan returns the next token from the underlying scanner. If a token has
// been unscanned then read that instead. In the process, it collects any
// comment groups encountered, and remembers the last lead and line comments.
func (p *Parser) scan() token.Token {
// If we have a token on the buffer, then return it.
if p.n != 0 {
p.n = 0
return p.tok
}
// Otherwise read the next token from the scanner and Save it to the buffer
// in case we unscan later.
prev := p.tok
p.tok = p.sc.Scan()
if p.tok.Type == token.COMMENT {
var comment *ast.CommentGroup
var endline int
// fmt.Printf("p.tok.Pos.Line = %+v prev: %d endline %d \n",
// p.tok.Pos.Line, prev.Pos.Line, endline)
if p.tok.Pos.Line == prev.Pos.Line {
// The comment is on same line as the previous token; it
// cannot be a lead comment but may be a line comment.
comment, endline = p.consumeCommentGroup(0)
if p.tok.Pos.Line != endline {
// The next token is on a different line, thus
// the last comment group is a line comment.
p.lineComment = comment
}
}
// consume successor comments, if any
endline = -1
for p.tok.Type == token.COMMENT {
comment, endline = p.consumeCommentGroup(1)
}
if endline+1 == p.tok.Pos.Line && p.tok.Type != token.RBRACE {
switch p.tok.Type {
case token.RBRACE, token.RBRACK:
// Do not count for these cases
default:
// The next token is following on the line immediately after the
// comment group, thus the last comment group is a lead comment.
p.leadComment = comment
}
}
}
return p.tok
}
// unscan pushes the previously read token back onto the buffer.
func (p *Parser) unscan() {
p.n = 1
}
// ----------------------------------------------------------------------------
// Parsing support
func (p *Parser) printTrace(a ...interface{}) {
if !p.enableTrace {
return
}
const dots = ". . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . "
const n = len(dots)
fmt.Printf("%5d:%3d: ", p.tok.Pos.Line, p.tok.Pos.Column)
i := 2 * p.indent
for i > n {
fmt.Print(dots)
i -= n
}
// i <= n
fmt.Print(dots[0:i])
fmt.Println(a...)
}
func trace(p *Parser, msg string) *Parser {
p.printTrace(msg, "(")
p.indent++
return p
}
// Usage pattern: defer un(trace(p, "..."))
func un(p *Parser) {
p.indent--
p.printTrace(")")
}