| // Copyright 2017 Frank Schroeder. All rights reserved. |
| // Use of this source code is governed by a BSD-style |
| // license that can be found in the LICENSE file. |
| // |
| // Parts of the lexer are from the template/text/parser package |
| // For these parts the following applies: |
| // |
| // Copyright 2011 The Go Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style |
| // license that can be found in the LICENSE file of the go 1.2 |
| // distribution. |
| |
| package properties |
| |
| import ( |
| "fmt" |
| "strconv" |
| "strings" |
| "unicode/utf8" |
| ) |
| |
| // item represents a token or text string returned from the scanner. |
| type item struct { |
| typ itemType // The type of this item. |
| pos int // The starting position, in bytes, of this item in the input string. |
| val string // The value of this item. |
| } |
| |
| func (i item) String() string { |
| switch { |
| case i.typ == itemEOF: |
| return "EOF" |
| case i.typ == itemError: |
| return i.val |
| case len(i.val) > 10: |
| return fmt.Sprintf("%.10q...", i.val) |
| } |
| return fmt.Sprintf("%q", i.val) |
| } |
| |
| // itemType identifies the type of lex items. |
| type itemType int |
| |
| const ( |
| itemError itemType = iota // error occurred; value is text of error |
| itemEOF |
| itemKey // a key |
| itemValue // a value |
| itemComment // a comment |
| ) |
| |
| // defines a constant for EOF |
| const eof = -1 |
| |
| // permitted whitespace characters space, FF and TAB |
| const whitespace = " \f\t" |
| |
| // stateFn represents the state of the scanner as a function that returns the next state. |
| type stateFn func(*lexer) stateFn |
| |
| // lexer holds the state of the scanner. |
| type lexer struct { |
| input string // the string being scanned |
| state stateFn // the next lexing function to enter |
| pos int // current position in the input |
| start int // start position of this item |
| width int // width of last rune read from input |
| lastPos int // position of most recent item returned by nextItem |
| runes []rune // scanned runes for this item |
| items chan item // channel of scanned items |
| } |
| |
| // next returns the next rune in the input. |
| func (l *lexer) next() rune { |
| if l.pos >= len(l.input) { |
| l.width = 0 |
| return eof |
| } |
| r, w := utf8.DecodeRuneInString(l.input[l.pos:]) |
| l.width = w |
| l.pos += l.width |
| return r |
| } |
| |
| // peek returns but does not consume the next rune in the input. |
| func (l *lexer) peek() rune { |
| r := l.next() |
| l.backup() |
| return r |
| } |
| |
| // backup steps back one rune. Can only be called once per call of next. |
| func (l *lexer) backup() { |
| l.pos -= l.width |
| } |
| |
| // emit passes an item back to the client. |
| func (l *lexer) emit(t itemType) { |
| i := item{t, l.start, string(l.runes)} |
| l.items <- i |
| l.start = l.pos |
| l.runes = l.runes[:0] |
| } |
| |
| // ignore skips over the pending input before this point. |
| func (l *lexer) ignore() { |
| l.start = l.pos |
| } |
| |
| // appends the rune to the current value |
| func (l *lexer) appendRune(r rune) { |
| l.runes = append(l.runes, r) |
| } |
| |
| // accept consumes the next rune if it's from the valid set. |
| func (l *lexer) accept(valid string) bool { |
| if strings.ContainsRune(valid, l.next()) { |
| return true |
| } |
| l.backup() |
| return false |
| } |
| |
| // acceptRun consumes a run of runes from the valid set. |
| func (l *lexer) acceptRun(valid string) { |
| for strings.ContainsRune(valid, l.next()) { |
| } |
| l.backup() |
| } |
| |
| // acceptRunUntil consumes a run of runes up to a terminator. |
| func (l *lexer) acceptRunUntil(term rune) { |
| for term != l.next() { |
| } |
| l.backup() |
| } |
| |
| // hasText returns true if the current parsed text is not empty. |
| func (l *lexer) isNotEmpty() bool { |
| return l.pos > l.start |
| } |
| |
| // lineNumber reports which line we're on, based on the position of |
| // the previous item returned by nextItem. Doing it this way |
| // means we don't have to worry about peek double counting. |
| func (l *lexer) lineNumber() int { |
| return 1 + strings.Count(l.input[:l.lastPos], "\n") |
| } |
| |
| // errorf returns an error token and terminates the scan by passing |
| // back a nil pointer that will be the next state, terminating l.nextItem. |
| func (l *lexer) errorf(format string, args ...interface{}) stateFn { |
| l.items <- item{itemError, l.start, fmt.Sprintf(format, args...)} |
| return nil |
| } |
| |
| // nextItem returns the next item from the input. |
| func (l *lexer) nextItem() item { |
| i := <-l.items |
| l.lastPos = i.pos |
| return i |
| } |
| |
| // lex creates a new scanner for the input string. |
| func lex(input string) *lexer { |
| l := &lexer{ |
| input: input, |
| items: make(chan item), |
| runes: make([]rune, 0, 32), |
| } |
| go l.run() |
| return l |
| } |
| |
| // run runs the state machine for the lexer. |
| func (l *lexer) run() { |
| for l.state = lexBeforeKey(l); l.state != nil; { |
| l.state = l.state(l) |
| } |
| } |
| |
| // state functions |
| |
| // lexBeforeKey scans until a key begins. |
| func lexBeforeKey(l *lexer) stateFn { |
| switch r := l.next(); { |
| case isEOF(r): |
| l.emit(itemEOF) |
| return nil |
| |
| case isEOL(r): |
| l.ignore() |
| return lexBeforeKey |
| |
| case isComment(r): |
| return lexComment |
| |
| case isWhitespace(r): |
| l.acceptRun(whitespace) |
| l.ignore() |
| return lexKey |
| |
| default: |
| l.backup() |
| return lexKey |
| } |
| } |
| |
| // lexComment scans a comment line. The comment character has already been scanned. |
| func lexComment(l *lexer) stateFn { |
| l.acceptRun(whitespace) |
| l.ignore() |
| for { |
| switch r := l.next(); { |
| case isEOF(r): |
| l.ignore() |
| l.emit(itemEOF) |
| return nil |
| case isEOL(r): |
| l.emit(itemComment) |
| return lexBeforeKey |
| default: |
| l.appendRune(r) |
| } |
| } |
| } |
| |
| // lexKey scans the key up to a delimiter |
| func lexKey(l *lexer) stateFn { |
| var r rune |
| |
| Loop: |
| for { |
| switch r = l.next(); { |
| |
| case isEscape(r): |
| err := l.scanEscapeSequence() |
| if err != nil { |
| return l.errorf(err.Error()) |
| } |
| |
| case isEndOfKey(r): |
| l.backup() |
| break Loop |
| |
| case isEOF(r): |
| break Loop |
| |
| default: |
| l.appendRune(r) |
| } |
| } |
| |
| if len(l.runes) > 0 { |
| l.emit(itemKey) |
| } |
| |
| if isEOF(r) { |
| l.emit(itemEOF) |
| return nil |
| } |
| |
| return lexBeforeValue |
| } |
| |
| // lexBeforeValue scans the delimiter between key and value. |
| // Leading and trailing whitespace is ignored. |
| // We expect to be just after the key. |
| func lexBeforeValue(l *lexer) stateFn { |
| l.acceptRun(whitespace) |
| l.accept(":=") |
| l.acceptRun(whitespace) |
| l.ignore() |
| return lexValue |
| } |
| |
| // lexValue scans text until the end of the line. We expect to be just after the delimiter. |
| func lexValue(l *lexer) stateFn { |
| for { |
| switch r := l.next(); { |
| case isEscape(r): |
| if isEOL(l.peek()) { |
| l.next() |
| l.acceptRun(whitespace) |
| } else { |
| err := l.scanEscapeSequence() |
| if err != nil { |
| return l.errorf(err.Error()) |
| } |
| } |
| |
| case isEOL(r): |
| l.emit(itemValue) |
| l.ignore() |
| return lexBeforeKey |
| |
| case isEOF(r): |
| l.emit(itemValue) |
| l.emit(itemEOF) |
| return nil |
| |
| default: |
| l.appendRune(r) |
| } |
| } |
| } |
| |
| // scanEscapeSequence scans either one of the escaped characters |
| // or a unicode literal. We expect to be after the escape character. |
| func (l *lexer) scanEscapeSequence() error { |
| switch r := l.next(); { |
| |
| case isEscapedCharacter(r): |
| l.appendRune(decodeEscapedCharacter(r)) |
| return nil |
| |
| case atUnicodeLiteral(r): |
| return l.scanUnicodeLiteral() |
| |
| case isEOF(r): |
| return fmt.Errorf("premature EOF") |
| |
| // silently drop the escape character and append the rune as is |
| default: |
| l.appendRune(r) |
| return nil |
| } |
| } |
| |
| // scans a unicode literal in the form \uXXXX. We expect to be after the \u. |
| func (l *lexer) scanUnicodeLiteral() error { |
| // scan the digits |
| d := make([]rune, 4) |
| for i := 0; i < 4; i++ { |
| d[i] = l.next() |
| if d[i] == eof || !strings.ContainsRune("0123456789abcdefABCDEF", d[i]) { |
| return fmt.Errorf("invalid unicode literal") |
| } |
| } |
| |
| // decode the digits into a rune |
| r, err := strconv.ParseInt(string(d), 16, 0) |
| if err != nil { |
| return err |
| } |
| |
| l.appendRune(rune(r)) |
| return nil |
| } |
| |
| // decodeEscapedCharacter returns the unescaped rune. We expect to be after the escape character. |
| func decodeEscapedCharacter(r rune) rune { |
| switch r { |
| case 'f': |
| return '\f' |
| case 'n': |
| return '\n' |
| case 'r': |
| return '\r' |
| case 't': |
| return '\t' |
| default: |
| return r |
| } |
| } |
| |
| // atUnicodeLiteral reports whether we are at a unicode literal. |
| // The escape character has already been consumed. |
| func atUnicodeLiteral(r rune) bool { |
| return r == 'u' |
| } |
| |
| // isComment reports whether we are at the start of a comment. |
| func isComment(r rune) bool { |
| return r == '#' || r == '!' |
| } |
| |
| // isEndOfKey reports whether the rune terminates the current key. |
| func isEndOfKey(r rune) bool { |
| return strings.ContainsRune(" \f\t\r\n:=", r) |
| } |
| |
| // isEOF reports whether we are at EOF. |
| func isEOF(r rune) bool { |
| return r == eof |
| } |
| |
| // isEOL reports whether we are at a new line character. |
| func isEOL(r rune) bool { |
| return r == '\n' || r == '\r' |
| } |
| |
| // isEscape reports whether the rune is the escape character which |
| // prefixes unicode literals and other escaped characters. |
| func isEscape(r rune) bool { |
| return r == '\\' |
| } |
| |
| // isEscapedCharacter reports whether we are at one of the characters that need escaping. |
| // The escape character has already been consumed. |
| func isEscapedCharacter(r rune) bool { |
| return strings.ContainsRune(" :=fnrt", r) |
| } |
| |
| // isWhitespace reports whether the rune is a whitespace character. |
| func isWhitespace(r rune) bool { |
| return strings.ContainsRune(whitespace, r) |
| } |