vendor/github.com/hashicorp/hcl/hcl/scanner/scanner.go - gddo - Git at Google

 // Package scanner implements a scanner for HCL (HashiCorp Configuration
 // Language) source text.
 package scanner

 import (
 	"bytes"
 	"fmt"
 	"os"
 	"regexp"
 	"unicode"
 	"unicode/utf8"

 	"github.com/hashicorp/hcl/hcl/token"
 )

 // eof represents a marker rune for the end of the reader.
 const eof = rune(0)

 // Scanner defines a lexical scanner
 type Scanner struct {
 	buf *bytes.Buffer // Source buffer for advancing and scanning
 	src []byte        // Source buffer for immutable access

 	// Source Position
 	srcPos  token.Pos // current position
 	prevPos token.Pos // previous position, used for peek() method

 	lastCharLen int // length of last character in bytes
 	lastLineLen int // length of last line in characters (for correct column reporting)

 	tokStart int // token text start position
 	tokEnd   int // token text end  position

 	// Error is called for each error encountered. If no Error
 	// function is set, the error is reported to os.Stderr.
 	Error func(pos token.Pos, msg string)

 	// ErrorCount is incremented by one for each error encountered.
 	ErrorCount int

 	// tokPos is the start position of most recently scanned token; set by
 	// Scan. The Filename field is always left untouched by the Scanner.  If
 	// an error is reported (via Error) and Position is invalid, the scanner is
 	// not inside a token.
 	tokPos token.Pos
 }

 // New creates and initializes a new instance of Scanner using src as
 // its source content.
 func New(src []byte) *Scanner {
 	// even though we accept a src, we read from a io.Reader compatible type
 	// (*bytes.Buffer). So in the future we might easily change it to streaming
 	// read.
 	b := bytes.NewBuffer(src)
 	s := &Scanner{
 		buf: b,
 		src: src,
 	}

 	// srcPosition always starts with 1
 	s.srcPos.Line = 1
 	return s
 }

 // next reads the next rune from the bufferred reader. Returns the rune(0) if
 // an error occurs (or io.EOF is returned).
 func (s *Scanner) next() rune {
 	ch, size, err := s.buf.ReadRune()
 	if err != nil {
 		// advance for error reporting
 		s.srcPos.Column++
 		s.srcPos.Offset += size
 		s.lastCharLen = size
 		return eof
 	}

 	if ch == utf8.RuneError && size == 1 {
 		s.srcPos.Column++
 		s.srcPos.Offset += size
 		s.lastCharLen = size
 		s.err("illegal UTF-8 encoding")
 		return ch
 	}

 	// remember last position
 	s.prevPos = s.srcPos

 	s.srcPos.Column++
 	s.lastCharLen = size
 	s.srcPos.Offset += size

 	if ch == '\n' {
 		s.srcPos.Line++
 		s.lastLineLen = s.srcPos.Column
 		s.srcPos.Column = 0
 	}

 	// debug
 	// fmt.Printf("ch: %q, offset:column: %d:%d\n", ch, s.srcPos.Offset, s.srcPos.Column)
 	return ch
 }

 // unread unreads the previous read Rune and updates the source position
 func (s *Scanner) unread() {
 	if err := s.buf.UnreadRune(); err != nil {
 		panic(err) // this is user fault, we should catch it
 	}
 	s.srcPos = s.prevPos // put back last position
 }

 // peek returns the next rune without advancing the reader.
 func (s *Scanner) peek() rune {
 	peek, _, err := s.buf.ReadRune()
 	if err != nil {
 		return eof
 	}

 	s.buf.UnreadRune()
 	return peek
 }

 // Scan scans the next token and returns the token.
 func (s *Scanner) Scan() token.Token {
 	ch := s.next()

 	// skip white space
 	for isWhitespace(ch) {
 		ch = s.next()
 	}

 	var tok token.Type

 	// token text markings
 	s.tokStart = s.srcPos.Offset - s.lastCharLen

 	// token position, initial next() is moving the offset by one(size of rune
 	// actually), though we are interested with the starting point
 	s.tokPos.Offset = s.srcPos.Offset - s.lastCharLen
 	if s.srcPos.Column > 0 {
 		// common case: last character was not a '\n'
 		s.tokPos.Line = s.srcPos.Line
 		s.tokPos.Column = s.srcPos.Column
 	} else {
 		// last character was a '\n'
 		// (we cannot be at the beginning of the source
 		// since we have called next() at least once)
 		s.tokPos.Line = s.srcPos.Line - 1
 		s.tokPos.Column = s.lastLineLen
 	}

 	switch {
 	case isLetter(ch):
 		tok = token.IDENT
 		lit := s.scanIdentifier()
 		if lit == "true" || lit == "false" {
 			tok = token.BOOL
 		}
 	case isDecimal(ch):
 		tok = s.scanNumber(ch)
 	default:
 		switch ch {
 		case eof:
 			tok = token.EOF
 		case '"':
 			tok = token.STRING
 			s.scanString()
 		case '#', '/':
 			tok = token.COMMENT
 			s.scanComment(ch)
 		case '.':
 			tok = token.PERIOD
 			ch = s.peek()
 			if isDecimal(ch) {
 				tok = token.FLOAT
 				ch = s.scanMantissa(ch)
 				ch = s.scanExponent(ch)
 			}
 		case '<':
 			tok = token.HEREDOC
 			s.scanHeredoc()
 		case '[':
 			tok = token.LBRACK
 		case ']':
 			tok = token.RBRACK
 		case '{':
 			tok = token.LBRACE
 		case '}':
 			tok = token.RBRACE
 		case ',':
 			tok = token.COMMA
 		case '=':
 			tok = token.ASSIGN
 		case '+':
 			tok = token.ADD
 		case '-':
 			if isDecimal(s.peek()) {
 				ch := s.next()
 				tok = s.scanNumber(ch)
 			} else {
 				tok = token.SUB
 			}
 		default:
 			s.err("illegal char")
 		}
 	}

 	// finish token ending
 	s.tokEnd = s.srcPos.Offset

 	// create token literal
 	var tokenText string
 	if s.tokStart >= 0 {
 		tokenText = string(s.src[s.tokStart:s.tokEnd])
 	}
 	s.tokStart = s.tokEnd // ensure idempotency of tokenText() call

 	return token.Token{
 		Type: tok,
 		Pos:  s.tokPos,
 		Text: tokenText,
 	}
 }

 func (s *Scanner) scanComment(ch rune) {
 	// single line comments
 	if ch == '#' || (ch == '/' && s.peek() != '*') {
 		if ch == '/' && s.peek() != '/' {
 			s.err("expected '/' for comment")
 			return
 		}

 		ch = s.next()
 		for ch != '\n' && ch >= 0 && ch != eof {
 			ch = s.next()
 		}
 		if ch != eof && ch >= 0 {
 			s.unread()
 		}
 		return
 	}

 	// be sure we get the character after /* This allows us to find comment's
 	// that are not erminated
 	if ch == '/' {
 		s.next()
 		ch = s.next() // read character after "/*"
 	}

 	// look for /* - style comments
 	for {
 		if ch < 0 || ch == eof {
 			s.err("comment not terminated")
 			break
 		}

 		ch0 := ch
 		ch = s.next()
 		if ch0 == '*' && ch == '/' {
 			break
 		}
 	}
 }

 // scanNumber scans a HCL number definition starting with the given rune
 func (s *Scanner) scanNumber(ch rune) token.Type {
 	if ch == '0' {
 		// check for hexadecimal, octal or float
 		ch = s.next()
 		if ch == 'x' || ch == 'X' {
 			// hexadecimal
 			ch = s.next()
 			found := false
 			for isHexadecimal(ch) {
 				ch = s.next()
 				found = true
 			}

 			if !found {
 				s.err("illegal hexadecimal number")
 			}

 			if ch != eof {
 				s.unread()
 			}

 			return token.NUMBER
 		}

 		// now it's either something like: 0421(octal) or 0.1231(float)
 		illegalOctal := false
 		for isDecimal(ch) {
 			ch = s.next()
 			if ch == '8' || ch == '9' {
 				// this is just a possibility. For example 0159 is illegal, but
 				// 0159.23 is valid. So we mark a possible illegal octal. If
 				// the next character is not a period, we'll print the error.
 				illegalOctal = true
 			}
 		}

 		if ch == 'e' || ch == 'E' {
 			ch = s.scanExponent(ch)
 			return token.FLOAT
 		}

 		if ch == '.' {
 			ch = s.scanFraction(ch)

 			if ch == 'e' || ch == 'E' {
 				ch = s.next()
 				ch = s.scanExponent(ch)
 			}
 			return token.FLOAT
 		}

 		if illegalOctal {
 			s.err("illegal octal number")
 		}

 		if ch != eof {
 			s.unread()
 		}
 		return token.NUMBER
 	}

 	s.scanMantissa(ch)
 	ch = s.next() // seek forward
 	if ch == 'e' || ch == 'E' {
 		ch = s.scanExponent(ch)
 		return token.FLOAT
 	}

 	if ch == '.' {
 		ch = s.scanFraction(ch)
 		if ch == 'e' || ch == 'E' {
 			ch = s.next()
 			ch = s.scanExponent(ch)
 		}
 		return token.FLOAT
 	}

 	if ch != eof {
 		s.unread()
 	}
 	return token.NUMBER
 }

 // scanMantissa scans the mantissa begining from the rune. It returns the next
 // non decimal rune. It's used to determine wheter it's a fraction or exponent.
 func (s *Scanner) scanMantissa(ch rune) rune {
 	scanned := false
 	for isDecimal(ch) {
 		ch = s.next()
 		scanned = true
 	}

 	if scanned && ch != eof {
 		s.unread()
 	}
 	return ch
 }

 // scanFraction scans the fraction after the '.' rune
 func (s *Scanner) scanFraction(ch rune) rune {
 	if ch == '.' {
 		ch = s.peek() // we peek just to see if we can move forward
 		ch = s.scanMantissa(ch)
 	}
 	return ch
 }

 // scanExponent scans the remaining parts of an exponent after the 'e' or 'E'
 // rune.
 func (s *Scanner) scanExponent(ch rune) rune {
 	if ch == 'e' || ch == 'E' {
 		ch = s.next()
 		if ch == '-' || ch == '+' {
 			ch = s.next()
 		}
 		ch = s.scanMantissa(ch)
 	}
 	return ch
 }

 // scanHeredoc scans a heredoc string
 func (s *Scanner) scanHeredoc() {
 	// Scan the second '<' in example: '<<EOF'
 	if s.next() != '<' {
 		s.err("heredoc expected second '<', didn't see it")
 		return
 	}

 	// Get the original offset so we can read just the heredoc ident
 	offs := s.srcPos.Offset

 	// Scan the identifier
 	ch := s.next()

 	// Indented heredoc syntax
 	if ch == '-' {
 		ch = s.next()
 	}

 	for isLetter(ch) || isDigit(ch) {
 		ch = s.next()
 	}

 	// If we reached an EOF then that is not good
 	if ch == eof {
 		s.err("heredoc not terminated")
 		return
 	}

 	// Ignore the '\r' in Windows line endings
 	if ch == '\r' {
 		if s.peek() == '\n' {
 			ch = s.next()
 		}
 	}

 	// If we didn't reach a newline then that is also not good
 	if ch != '\n' {
 		s.err("invalid characters in heredoc anchor")
 		return
 	}

 	// Read the identifier
 	identBytes := s.src[offs : s.srcPos.Offset-s.lastCharLen]
 	if len(identBytes) == 0 {
 		s.err("zero-length heredoc anchor")
 		return
 	}

 	var identRegexp *regexp.Regexp
 	if identBytes[0] == '-' {
 		identRegexp = regexp.MustCompile(fmt.Sprintf(`[[:space:]]*%s\z`, identBytes[1:]))
 	} else {
 		identRegexp = regexp.MustCompile(fmt.Sprintf(`[[:space:]]*%s\z`, identBytes))
 	}

 	// Read the actual string value
 	lineStart := s.srcPos.Offset
 	for {
 		ch := s.next()

 		// Special newline handling.
 		if ch == '\n' {
 			// Math is fast, so we first compare the byte counts to see if we have a chance
 			// of seeing the same identifier - if the length is less than the number of bytes
 			// in the identifier, this cannot be a valid terminator.
 			lineBytesLen := s.srcPos.Offset - s.lastCharLen - lineStart
 			if lineBytesLen >= len(identBytes) && identRegexp.Match(s.src[lineStart:s.srcPos.Offset-s.lastCharLen]) {
 				break
 			}

 			// Not an anchor match, record the start of a new line
 			lineStart = s.srcPos.Offset
 		}

 		if ch == eof {
 			s.err("heredoc not terminated")
 			return
 		}
 	}

 	return
 }

 // scanString scans a quoted string
 func (s *Scanner) scanString() {
 	braces := 0
 	for {
 		// '"' opening already consumed
 		// read character after quote
 		ch := s.next()

 		if ch < 0 || ch == eof {
 			s.err("literal not terminated")
 			return
 		}

 		if ch == '"' && braces == 0 {
 			break
 		}

 		// If we're going into a ${} then we can ignore quotes for awhile
 		if braces == 0 && ch == '$' && s.peek() == '{' {
 			braces++
 			s.next()
 		} else if braces > 0 && ch == '{' {
 			braces++
 		}
 		if braces > 0 && ch == '}' {
 			braces--
 		}

 		if ch == '\\' {
 			s.scanEscape()
 		}
 	}

 	return
 }

 // scanEscape scans an escape sequence
 func (s *Scanner) scanEscape() rune {
 	// http://en.cppreference.com/w/cpp/language/escape
 	ch := s.next() // read character after '/'
 	switch ch {
 	case 'a', 'b', 'f', 'n', 'r', 't', 'v', '\\', '"':
 		// nothing to do
 	case '0', '1', '2', '3', '4', '5', '6', '7':
 		// octal notation
 		ch = s.scanDigits(ch, 8, 3)
 	case 'x':
 		// hexademical notation
 		ch = s.scanDigits(s.next(), 16, 2)
 	case 'u':
 		// universal character name
 		ch = s.scanDigits(s.next(), 16, 4)
 	case 'U':
 		// universal character name
 		ch = s.scanDigits(s.next(), 16, 8)
 	default:
 		s.err("illegal char escape")
 	}
 	return ch
 }

 // scanDigits scans a rune with the given base for n times. For example an
 // octal notation \184 would yield in scanDigits(ch, 8, 3)
 func (s *Scanner) scanDigits(ch rune, base, n int) rune {
 	start := n
 	for n > 0 && digitVal(ch) < base {
 		ch = s.next()
 		if ch == eof {
 			// If we see an EOF, we halt any more scanning of digits
 			// immediately.
 			break
 		}

 		n--
 	}
 	if n > 0 {
 		s.err("illegal char escape")
 	}

 	if n != start {
 		// we scanned all digits, put the last non digit char back,
 		// only if we read anything at all
 		s.unread()
 	}

 	return ch
 }

 // scanIdentifier scans an identifier and returns the literal string
 func (s *Scanner) scanIdentifier() string {
 	offs := s.srcPos.Offset - s.lastCharLen
 	ch := s.next()
 	for isLetter(ch) || isDigit(ch) || ch == '-' || ch == '.' {
 		ch = s.next()
 	}

 	if ch != eof {
 		s.unread() // we got identifier, put back latest char
 	}

 	return string(s.src[offs:s.srcPos.Offset])
 }

 // recentPosition returns the position of the character immediately after the
 // character or token returned by the last call to Scan.
 func (s *Scanner) recentPosition() (pos token.Pos) {
 	pos.Offset = s.srcPos.Offset - s.lastCharLen
 	switch {
 	case s.srcPos.Column > 0:
 		// common case: last character was not a '\n'
 		pos.Line = s.srcPos.Line
 		pos.Column = s.srcPos.Column
 	case s.lastLineLen > 0:
 		// last character was a '\n'
 		// (we cannot be at the beginning of the source
 		// since we have called next() at least once)
 		pos.Line = s.srcPos.Line - 1
 		pos.Column = s.lastLineLen
 	default:
 		// at the beginning of the source
 		pos.Line = 1
 		pos.Column = 1
 	}
 	return
 }

 // err prints the error of any scanning to s.Error function. If the function is
 // not defined, by default it prints them to os.Stderr
 func (s *Scanner) err(msg string) {
 	s.ErrorCount++
 	pos := s.recentPosition()

 	if s.Error != nil {
 		s.Error(pos, msg)
 		return
 	}

 	fmt.Fprintf(os.Stderr, "%s: %s\n", pos, msg)
 }

 // isHexadecimal returns true if the given rune is a letter
 func isLetter(ch rune) bool {
 	return 'a' <= ch && ch <= 'z' || 'A' <= ch && ch <= 'Z' || ch == '_' || ch >= 0x80 && unicode.IsLetter(ch)
 }

 // isDigit returns true if the given rune is a decimal digit
 func isDigit(ch rune) bool {
 	return '0' <= ch && ch <= '9' || ch >= 0x80 && unicode.IsDigit(ch)
 }

 // isDecimal returns true if the given rune is a decimal number
 func isDecimal(ch rune) bool {
 	return '0' <= ch && ch <= '9'
 }

 // isHexadecimal returns true if the given rune is an hexadecimal number
 func isHexadecimal(ch rune) bool {
 	return '0' <= ch && ch <= '9' || 'a' <= ch && ch <= 'f' || 'A' <= ch && ch <= 'F'
 }

 // isWhitespace returns true if the rune is a space, tab, newline or carriage return
 func isWhitespace(ch rune) bool {
 	return ch == ' ' || ch == '\t' || ch == '\n' || ch == '\r'
 }

 // digitVal returns the integer value of a given octal,decimal or hexadecimal rune
 func digitVal(ch rune) int {
 	switch {
 	case '0' <= ch && ch <= '9':
 		return int(ch - '0')
 	case 'a' <= ch && ch <= 'f':
 		return int(ch - 'a' + 10)
 	case 'A' <= ch && ch <= 'F':
 		return int(ch - 'A' + 10)
 	}
 	return 16 // larger than any legal digit val
 }
	// Package scanner implements a scanner for HCL (HashiCorp Configuration
	// Language) source text.
	package scanner

	import (
	"bytes"
	"fmt"
	"os"
	"regexp"
	"unicode"
	"unicode/utf8"

	"github.com/hashicorp/hcl/hcl/token"
	)

	// eof represents a marker rune for the end of the reader.
	const eof = rune(0)

	// Scanner defines a lexical scanner
	type Scanner struct {
	buf *bytes.Buffer // Source buffer for advancing and scanning
	src []byte // Source buffer for immutable access

	// Source Position
	srcPos token.Pos // current position
	prevPos token.Pos // previous position, used for peek() method

	lastCharLen int // length of last character in bytes
	lastLineLen int // length of last line in characters (for correct column reporting)

	tokStart int // token text start position
	tokEnd int // token text end position

	// Error is called for each error encountered. If no Error
	// function is set, the error is reported to os.Stderr.
	Error func(pos token.Pos, msg string)

	// ErrorCount is incremented by one for each error encountered.
	ErrorCount int

	// tokPos is the start position of most recently scanned token; set by
	// Scan. The Filename field is always left untouched by the Scanner. If
	// an error is reported (via Error) and Position is invalid, the scanner is
	// not inside a token.
	tokPos token.Pos
	}

	// New creates and initializes a new instance of Scanner using src as
	// its source content.
	func New(src []byte) *Scanner {
	// even though we accept a src, we read from a io.Reader compatible type
	// (*bytes.Buffer). So in the future we might easily change it to streaming
	// read.
	b := bytes.NewBuffer(src)
	s := &Scanner{
	buf: b,
	src: src,
	}

	// srcPosition always starts with 1
	s.srcPos.Line = 1
	return s
	}

	// next reads the next rune from the bufferred reader. Returns the rune(0) if
	// an error occurs (or io.EOF is returned).
	func (s *Scanner) next() rune {
	ch, size, err := s.buf.ReadRune()
	if err != nil {
	// advance for error reporting
	s.srcPos.Column++
	s.srcPos.Offset += size
	s.lastCharLen = size
	return eof
	}

	if ch == utf8.RuneError && size == 1 {
	s.srcPos.Column++
	s.srcPos.Offset += size
	s.lastCharLen = size
	s.err("illegal UTF-8 encoding")
	return ch
	}

	// remember last position
	s.prevPos = s.srcPos

	s.srcPos.Column++
	s.lastCharLen = size
	s.srcPos.Offset += size

	if ch == '\n' {
	s.srcPos.Line++
	s.lastLineLen = s.srcPos.Column
	s.srcPos.Column = 0
	}

	// debug
	// fmt.Printf("ch: %q, offset:column: %d:%d\n", ch, s.srcPos.Offset, s.srcPos.Column)
	return ch
	}

	// unread unreads the previous read Rune and updates the source position
	func (s *Scanner) unread() {
	if err := s.buf.UnreadRune(); err != nil {
	panic(err) // this is user fault, we should catch it
	}
	s.srcPos = s.prevPos // put back last position
	}

	// peek returns the next rune without advancing the reader.
	func (s *Scanner) peek() rune {
	peek, _, err := s.buf.ReadRune()
	if err != nil {
	return eof
	}

	s.buf.UnreadRune()
	return peek
	}

	// Scan scans the next token and returns the token.
	func (s *Scanner) Scan() token.Token {
	ch := s.next()

	// skip white space
	for isWhitespace(ch) {
	ch = s.next()
	}

	var tok token.Type

	// token text markings
	s.tokStart = s.srcPos.Offset - s.lastCharLen

	// token position, initial next() is moving the offset by one(size of rune
	// actually), though we are interested with the starting point
	s.tokPos.Offset = s.srcPos.Offset - s.lastCharLen
	if s.srcPos.Column > 0 {
	// common case: last character was not a '\n'
	s.tokPos.Line = s.srcPos.Line
	s.tokPos.Column = s.srcPos.Column
	} else {
	// last character was a '\n'
	// (we cannot be at the beginning of the source
	// since we have called next() at least once)
	s.tokPos.Line = s.srcPos.Line - 1
	s.tokPos.Column = s.lastLineLen
	}

	switch {
	case isLetter(ch):
	tok = token.IDENT
	lit := s.scanIdentifier()
	if lit == "true" \|\| lit == "false" {
	tok = token.BOOL
	}
	case isDecimal(ch):
	tok = s.scanNumber(ch)
	default:
	switch ch {
	case eof:
	tok = token.EOF
	case '"':
	tok = token.STRING
	s.scanString()
	case '#', '/':
	tok = token.COMMENT
	s.scanComment(ch)
	case '.':
	tok = token.PERIOD
	ch = s.peek()
	if isDecimal(ch) {
	tok = token.FLOAT
	ch = s.scanMantissa(ch)
	ch = s.scanExponent(ch)
	}
	case '<':
	tok = token.HEREDOC
	s.scanHeredoc()
	case '[':
	tok = token.LBRACK
	case ']':
	tok = token.RBRACK
	case '{':
	tok = token.LBRACE
	case '}':
	tok = token.RBRACE
	case ',':
	tok = token.COMMA
	case '=':
	tok = token.ASSIGN
	case '+':
	tok = token.ADD
	case '-':
	if isDecimal(s.peek()) {
	ch := s.next()
	tok = s.scanNumber(ch)
	} else {
	tok = token.SUB
	}
	default:
	s.err("illegal char")
	}
	}

	// finish token ending
	s.tokEnd = s.srcPos.Offset

	// create token literal
	var tokenText string
	if s.tokStart >= 0 {
	tokenText = string(s.src[s.tokStart:s.tokEnd])
	}
	s.tokStart = s.tokEnd // ensure idempotency of tokenText() call

	return token.Token{
	Type: tok,
	Pos: s.tokPos,
	Text: tokenText,
	}
	}

	func (s *Scanner) scanComment(ch rune) {
	// single line comments
	if ch == '#' \|\| (ch == '/' && s.peek() != '*') {
	if ch == '/' && s.peek() != '/' {
	s.err("expected '/' for comment")
	return
	}

	ch = s.next()
	for ch != '\n' && ch >= 0 && ch != eof {
	ch = s.next()
	}
	if ch != eof && ch >= 0 {
	s.unread()
	}
	return
	}

	// be sure we get the character after /* This allows us to find comment's
	// that are not erminated
	if ch == '/' {
	s.next()
	ch = s.next() // read character after "/*"
	}

	// look for /* - style comments
	for {
	if ch < 0 \|\| ch == eof {
	s.err("comment not terminated")
	break
	}

	ch0 := ch
	ch = s.next()
	if ch0 == '*' && ch == '/' {
	break
	}
	}
	}

	// scanNumber scans a HCL number definition starting with the given rune
	func (s *Scanner) scanNumber(ch rune) token.Type {
	if ch == '0' {
	// check for hexadecimal, octal or float
	ch = s.next()
	if ch == 'x' \|\| ch == 'X' {
	// hexadecimal
	ch = s.next()
	found := false
	for isHexadecimal(ch) {
	ch = s.next()
	found = true
	}

	if !found {
	s.err("illegal hexadecimal number")
	}

	if ch != eof {
	s.unread()
	}

	return token.NUMBER
	}

	// now it's either something like: 0421(octal) or 0.1231(float)
	illegalOctal := false
	for isDecimal(ch) {
	ch = s.next()
	if ch == '8' \|\| ch == '9' {
	// this is just a possibility. For example 0159 is illegal, but
	// 0159.23 is valid. So we mark a possible illegal octal. If
	// the next character is not a period, we'll print the error.
	illegalOctal = true
	}
	}

	if ch == 'e' \|\| ch == 'E' {
	ch = s.scanExponent(ch)
	return token.FLOAT
	}

	if ch == '.' {
	ch = s.scanFraction(ch)

	if ch == 'e' \|\| ch == 'E' {
	ch = s.next()
	ch = s.scanExponent(ch)
	}
	return token.FLOAT
	}

	if illegalOctal {
	s.err("illegal octal number")
	}

	if ch != eof {
	s.unread()
	}
	return token.NUMBER
	}

	s.scanMantissa(ch)
	ch = s.next() // seek forward
	if ch == 'e' \|\| ch == 'E' {
	ch = s.scanExponent(ch)
	return token.FLOAT
	}

	if ch == '.' {
	ch = s.scanFraction(ch)
	if ch == 'e' \|\| ch == 'E' {
	ch = s.next()
	ch = s.scanExponent(ch)
	}
	return token.FLOAT
	}

	if ch != eof {
	s.unread()
	}
	return token.NUMBER
	}

	// scanMantissa scans the mantissa begining from the rune. It returns the next
	// non decimal rune. It's used to determine wheter it's a fraction or exponent.
	func (s *Scanner) scanMantissa(ch rune) rune {
	scanned := false
	for isDecimal(ch) {
	ch = s.next()
	scanned = true
	}

	if scanned && ch != eof {
	s.unread()
	}
	return ch
	}

	// scanFraction scans the fraction after the '.' rune
	func (s *Scanner) scanFraction(ch rune) rune {
	if ch == '.' {
	ch = s.peek() // we peek just to see if we can move forward
	ch = s.scanMantissa(ch)
	}
	return ch
	}

	// scanExponent scans the remaining parts of an exponent after the 'e' or 'E'
	// rune.
	func (s *Scanner) scanExponent(ch rune) rune {
	if ch == 'e' \|\| ch == 'E' {
	ch = s.next()
	if ch == '-' \|\| ch == '+' {
	ch = s.next()
	}
	ch = s.scanMantissa(ch)
	}
	return ch
	}

	// scanHeredoc scans a heredoc string
	func (s *Scanner) scanHeredoc() {
	// Scan the second '<' in example: '<<EOF'
	if s.next() != '<' {
	s.err("heredoc expected second '<', didn't see it")
	return
	}

	// Get the original offset so we can read just the heredoc ident
	offs := s.srcPos.Offset

	// Scan the identifier
	ch := s.next()

	// Indented heredoc syntax
	if ch == '-' {
	ch = s.next()
	}

	for isLetter(ch) \|\| isDigit(ch) {
	ch = s.next()
	}

	// If we reached an EOF then that is not good
	if ch == eof {
	s.err("heredoc not terminated")
	return
	}

	// Ignore the '\r' in Windows line endings
	if ch == '\r' {
	if s.peek() == '\n' {
	ch = s.next()
	}
	}

	// If we didn't reach a newline then that is also not good
	if ch != '\n' {
	s.err("invalid characters in heredoc anchor")
	return
	}

	// Read the identifier
	identBytes := s.src[offs : s.srcPos.Offset-s.lastCharLen]
	if len(identBytes) == 0 {
	s.err("zero-length heredoc anchor")
	return
	}

	var identRegexp *regexp.Regexp
	if identBytes[0] == '-' {
	identRegexp = regexp.MustCompile(fmt.Sprintf(`[[:space:]]*%s\z`, identBytes[1:]))
	} else {
	identRegexp = regexp.MustCompile(fmt.Sprintf(`[[:space:]]*%s\z`, identBytes))
	}

	// Read the actual string value
	lineStart := s.srcPos.Offset
	for {
	ch := s.next()

	// Special newline handling.
	if ch == '\n' {
	// Math is fast, so we first compare the byte counts to see if we have a chance
	// of seeing the same identifier - if the length is less than the number of bytes
	// in the identifier, this cannot be a valid terminator.
	lineBytesLen := s.srcPos.Offset - s.lastCharLen - lineStart
	if lineBytesLen >= len(identBytes) && identRegexp.Match(s.src[lineStart:s.srcPos.Offset-s.lastCharLen]) {
	break
	}

	// Not an anchor match, record the start of a new line
	lineStart = s.srcPos.Offset
	}

	if ch == eof {
	s.err("heredoc not terminated")
	return
	}
	}

	return
	}

	// scanString scans a quoted string
	func (s *Scanner) scanString() {
	braces := 0
	for {
	// '"' opening already consumed
	// read character after quote
	ch := s.next()

	if ch < 0 \|\| ch == eof {
	s.err("literal not terminated")
	return
	}

	if ch == '"' && braces == 0 {
	break
	}

	// If we're going into a ${} then we can ignore quotes for awhile
	if braces == 0 && ch == '$' && s.peek() == '{' {
	braces++
	s.next()
	} else if braces > 0 && ch == '{' {
	braces++
	}
	if braces > 0 && ch == '}' {
	braces--
	}

	if ch == '\\' {
	s.scanEscape()
	}
	}

	return
	}

	// scanEscape scans an escape sequence
	func (s *Scanner) scanEscape() rune {
	// http://en.cppreference.com/w/cpp/language/escape
	ch := s.next() // read character after '/'
	switch ch {
	case 'a', 'b', 'f', 'n', 'r', 't', 'v', '\\', '"':
	// nothing to do
	case '0', '1', '2', '3', '4', '5', '6', '7':
	// octal notation
	ch = s.scanDigits(ch, 8, 3)
	case 'x':
	// hexademical notation
	ch = s.scanDigits(s.next(), 16, 2)
	case 'u':
	// universal character name
	ch = s.scanDigits(s.next(), 16, 4)
	case 'U':
	// universal character name
	ch = s.scanDigits(s.next(), 16, 8)
	default:
	s.err("illegal char escape")
	}
	return ch
	}

	// scanDigits scans a rune with the given base for n times. For example an
	// octal notation \184 would yield in scanDigits(ch, 8, 3)
	func (s *Scanner) scanDigits(ch rune, base, n int) rune {
	start := n
	for n > 0 && digitVal(ch) < base {
	ch = s.next()
	if ch == eof {
	// If we see an EOF, we halt any more scanning of digits
	// immediately.
	break
	}

	n--
	}
	if n > 0 {
	s.err("illegal char escape")
	}

	if n != start {
	// we scanned all digits, put the last non digit char back,
	// only if we read anything at all
	s.unread()
	}

	return ch
	}

	// scanIdentifier scans an identifier and returns the literal string
	func (s *Scanner) scanIdentifier() string {
	offs := s.srcPos.Offset - s.lastCharLen
	ch := s.next()
	for isLetter(ch) \|\| isDigit(ch) \|\| ch == '-' \|\| ch == '.' {
	ch = s.next()
	}

	if ch != eof {
	s.unread() // we got identifier, put back latest char
	}

	return string(s.src[offs:s.srcPos.Offset])
	}

	// recentPosition returns the position of the character immediately after the
	// character or token returned by the last call to Scan.
	func (s *Scanner) recentPosition() (pos token.Pos) {
	pos.Offset = s.srcPos.Offset - s.lastCharLen
	switch {
	case s.srcPos.Column > 0:
	// common case: last character was not a '\n'
	pos.Line = s.srcPos.Line
	pos.Column = s.srcPos.Column
	case s.lastLineLen > 0:
	// last character was a '\n'
	// (we cannot be at the beginning of the source
	// since we have called next() at least once)
	pos.Line = s.srcPos.Line - 1
	pos.Column = s.lastLineLen
	default:
	// at the beginning of the source
	pos.Line = 1
	pos.Column = 1
	}
	return
	}

	// err prints the error of any scanning to s.Error function. If the function is
	// not defined, by default it prints them to os.Stderr
	func (s *Scanner) err(msg string) {
	s.ErrorCount++
	pos := s.recentPosition()

	if s.Error != nil {
	s.Error(pos, msg)
	return
	}

	fmt.Fprintf(os.Stderr, "%s: %s\n", pos, msg)
	}

	// isHexadecimal returns true if the given rune is a letter
	func isLetter(ch rune) bool {
	return 'a' <= ch && ch <= 'z' \|\| 'A' <= ch && ch <= 'Z' \|\| ch == '_' \|\| ch >= 0x80 && unicode.IsLetter(ch)
	}

	// isDigit returns true if the given rune is a decimal digit
	func isDigit(ch rune) bool {
	return '0' <= ch && ch <= '9' \|\| ch >= 0x80 && unicode.IsDigit(ch)
	}

	// isDecimal returns true if the given rune is a decimal number
	func isDecimal(ch rune) bool {
	return '0' <= ch && ch <= '9'
	}

	// isHexadecimal returns true if the given rune is an hexadecimal number
	func isHexadecimal(ch rune) bool {
	return '0' <= ch && ch <= '9' \|\| 'a' <= ch && ch <= 'f' \|\| 'A' <= ch && ch <= 'F'
	}

	// isWhitespace returns true if the rune is a space, tab, newline or carriage return
	func isWhitespace(ch rune) bool {
	return ch == ' ' \|\| ch == '\t' \|\| ch == '\n' \|\| ch == '\r'
	}

	// digitVal returns the integer value of a given octal,decimal or hexadecimal rune
	func digitVal(ch rune) int {
	switch {
	case '0' <= ch && ch <= '9':
	return int(ch - '0')
	case 'a' <= ch && ch <= 'f':
	return int(ch - 'a' + 10)
	case 'A' <= ch && ch <= 'F':
	return int(ch - 'A' + 10)
	}
	return 16 // larger than any legal digit val
	}