| // Copyright 2015 The Go Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style |
| // license that can be found in the LICENSE file. |
| |
| package lex |
| |
| import ( |
| "io" |
| "os" |
| "strings" |
| "text/scanner" |
| "unicode" |
| ) |
| |
| // A Tokenizer is a simple wrapping of text/scanner.Scanner, configured |
| // for our purposes and made a TokenReader. It forms the lowest level, |
| // turning text from readers into tokens. |
| type Tokenizer struct { |
| tok ScanToken |
| s *scanner.Scanner |
| line int |
| fileName string |
| file *os.File // If non-nil, file descriptor to close. |
| } |
| |
| func NewTokenizer(name string, r io.Reader, file *os.File) *Tokenizer { |
| var s scanner.Scanner |
| s.Init(r) |
| // Newline is like a semicolon; other space characters are fine. |
| s.Whitespace = 1<<'\t' | 1<<'\r' | 1<<' ' |
| // Don't skip comments: we need to count newlines. |
| s.Mode = scanner.ScanChars | |
| scanner.ScanFloats | |
| scanner.ScanIdents | |
| scanner.ScanInts | |
| scanner.ScanStrings | |
| scanner.ScanComments |
| s.Position.Filename = name |
| s.IsIdentRune = isIdentRune |
| if file != nil { |
| linkCtxt.LineHist.Push(histLine, name) |
| } |
| return &Tokenizer{ |
| s: &s, |
| line: 1, |
| fileName: name, |
| file: file, |
| } |
| } |
| |
| // We want center dot (·) and division slash (∕) to work as identifier characters. |
| func isIdentRune(ch rune, i int) bool { |
| if unicode.IsLetter(ch) { |
| return true |
| } |
| switch ch { |
| case '_': // Underscore; traditional. |
| return true |
| case '\u00B7': // Represents the period in runtime.exit. U+00B7 '·' middle dot |
| return true |
| case '\u2215': // Represents the slash in runtime/debug.setGCPercent. U+2215 '∕' division slash |
| return true |
| } |
| // Digits are OK only after the first character. |
| return i > 0 && unicode.IsDigit(ch) |
| } |
| |
| func (t *Tokenizer) Text() string { |
| switch t.tok { |
| case LSH: |
| return "<<" |
| case RSH: |
| return ">>" |
| case ARR: |
| return "->" |
| case ROT: |
| return "@>" |
| } |
| return t.s.TokenText() |
| } |
| |
| func (t *Tokenizer) File() string { |
| return t.fileName |
| } |
| |
| func (t *Tokenizer) Line() int { |
| return t.line |
| } |
| |
| func (t *Tokenizer) Col() int { |
| return t.s.Pos().Column |
| } |
| |
| func (t *Tokenizer) SetPos(line int, file string) { |
| t.line = line |
| t.fileName = file |
| } |
| |
| func (t *Tokenizer) Next() ScanToken { |
| s := t.s |
| for { |
| t.tok = ScanToken(s.Scan()) |
| if t.tok != scanner.Comment { |
| break |
| } |
| length := strings.Count(s.TokenText(), "\n") |
| t.line += length |
| histLine += length |
| // TODO: If we ever have //go: comments in assembly, will need to keep them here. |
| // For now, just discard all comments. |
| } |
| switch t.tok { |
| case '\n': |
| if t.file != nil { |
| histLine++ |
| } |
| t.line++ |
| case '-': |
| if s.Peek() == '>' { |
| s.Next() |
| t.tok = ARR |
| return ARR |
| } |
| case '@': |
| if s.Peek() == '>' { |
| s.Next() |
| t.tok = ROT |
| return ROT |
| } |
| case '<': |
| if s.Peek() == '<' { |
| s.Next() |
| t.tok = LSH |
| return LSH |
| } |
| case '>': |
| if s.Peek() == '>' { |
| s.Next() |
| t.tok = RSH |
| return RSH |
| } |
| } |
| return t.tok |
| } |
| |
| func (t *Tokenizer) Close() { |
| if t.file != nil { |
| t.file.Close() |
| // It's an open file, so pop the line history. |
| linkCtxt.LineHist.Pop(histLine) |
| } |
| } |