src/cmd/compile/internal/syntax/source.go - go - Git at Google

 // Copyright 2016 The Go Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.

 // This file implements source, a buffered rune reader
 // specialized for scanning Go code: Reading
 // ASCII characters, maintaining current (line, col)
 // position information, and recording of the most
 // recently read source segment are highly optimized.
 // This file is self-contained (go tool compile source.go
 // compiles) and thus could be made into its own package.

 package syntax

 import (
 	"io"
 	"unicode/utf8"
 )

 // The source buffer is accessed using three indices b (begin),
 // r (read), and e (end):
 //
 // - If b >= 0, it points to the beginning of a segment of most
 //   recently read characters (typically a Go literal).
 //
 // - r points to the byte immediately following the most recently
 //   read character ch, which starts at r-chw.
 //
 // - e points to the byte immediately following the last byte that
 //   was read into the buffer.
 //
 // The buffer content is terminated at buf[e] with the sentinel
 // character utf8.RuneSelf. This makes it possible to test for
 // the common case of ASCII characters with a single 'if' (see
 // nextch method).
 //
 //                +------ content in use -------+
 //                v                             v
 // buf [...read...|...segment...|ch|...unread...|s|...free...]
 //                ^             ^  ^            ^
 //                |             |  |            |
 //                b         r-chw  r            e
 //
 // Invariant: -1 <= b < r <= e < len(buf) && buf[e] == sentinel

 type source struct {
 	in   io.Reader
 	errh func(line, col uint, msg string)

 	buf       []byte // source buffer
 	ioerr     error  // pending I/O error, or nil
 	b, r, e   int    // buffer indices (see comment above)
 	line, col uint   // source position of ch (0-based)
 	ch        rune   // most recently read character
 	chw       int    // width of ch
 }

 const sentinel = utf8.RuneSelf

 func (s *source) init(in io.Reader, errh func(line, col uint, msg string)) {
 	s.in = in
 	s.errh = errh

 	if s.buf == nil {
 		s.buf = make([]byte, nextSize(0))
 	}
 	s.buf[0] = sentinel
 	s.ioerr = nil
 	s.b, s.r, s.e = -1, 0, 0
 	s.line, s.col = 0, 0
 	s.ch = ' '
 	s.chw = 0
 }

 // starting points for line and column numbers
 const linebase = 1
 const colbase = 1

 // pos returns the (line, col) source position of s.ch.
 func (s *source) pos() (line, col uint) {
 	return linebase + s.line, colbase + s.col
 }

 // error reports the error msg at source position s.pos().
 func (s *source) error(msg string) {
 	line, col := s.pos()
 	s.errh(line, col, msg)
 }

 // start starts a new active source segment (including s.ch).
 // As long as stop has not been called, the active segment's
 // bytes (excluding s.ch) may be retrieved by calling segment.
 func (s *source) start()          { s.b = s.r - s.chw }
 func (s *source) stop()           { s.b = -1 }
 func (s *source) segment() []byte { return s.buf[s.b : s.r-s.chw] }

 // rewind rewinds the scanner's read position and character s.ch
 // to the start of the currently active segment, which must not
 // contain any newlines (otherwise position information will be
 // incorrect). Currently, rewind is only needed for handling the
 // source sequence ".."; it must not be called outside an active
 // segment.
 func (s *source) rewind() {
 	// ok to verify precondition - rewind is rarely called
 	if s.b < 0 {
 		panic("no active segment")
 	}
 	s.col -= uint(s.r - s.b)
 	s.r = s.b
 	s.nextch()
 }

 func (s *source) nextch() {
 redo:
 	s.col += uint(s.chw)
 	if s.ch == '\n' {
 		s.line++
 		s.col = 0
 	}

 	// fast common case: at least one ASCII character
 	if s.ch = rune(s.buf[s.r]); s.ch < sentinel {
 		s.r++
 		s.chw = 1
 		if s.ch == 0 {
 			s.error("invalid NUL character")
 			goto redo
 		}
 		return
 	}

 	// slower general case: add more bytes to buffer if we don't have a full rune
 	for s.e-s.r < utf8.UTFMax && !utf8.FullRune(s.buf[s.r:s.e]) && s.ioerr == nil {
 		s.fill()
 	}

 	// EOF
 	if s.r == s.e {
 		if s.ioerr != io.EOF {
 			// ensure we never start with a '/' (e.g., rooted path) in the error message
 			s.error("I/O error: " + s.ioerr.Error())
 			s.ioerr = nil
 		}
 		s.ch = -1
 		s.chw = 0
 		return
 	}

 	s.ch, s.chw = utf8.DecodeRune(s.buf[s.r:s.e])
 	s.r += s.chw

 	if s.ch == utf8.RuneError && s.chw == 1 {
 		s.error("invalid UTF-8 encoding")
 		goto redo
 	}

 	// BOM's are only allowed as the first character in a file
 	const BOM = 0xfeff
 	if s.ch == BOM {
 		if s.line > 0 || s.col > 0 {
 			s.error("invalid BOM in the middle of the file")
 		}
 		goto redo
 	}
 }

 // fill reads more source bytes into s.buf.
 // It returns with at least one more byte in the buffer, or with s.ioerr != nil.
 func (s *source) fill() {
 	// determine content to preserve
 	b := s.r
 	if s.b >= 0 {
 		b = s.b
 		s.b = 0 // after buffer has grown or content has been moved down
 	}
 	content := s.buf[b:s.e]

 	// grow buffer or move content down
 	if len(content)*2 > len(s.buf) {
 		s.buf = make([]byte, nextSize(len(s.buf)))
 		copy(s.buf, content)
 	} else if b > 0 {
 		copy(s.buf, content)
 	}
 	s.r -= b
 	s.e -= b

 	// read more data: try a limited number of times
 	for i := 0; i < 10; i++ {
 		var n int
 		n, s.ioerr = s.in.Read(s.buf[s.e : len(s.buf)-1]) // -1 to leave space for sentinel
 		if n < 0 {
 			panic("negative read") // incorrect underlying io.Reader implementation
 		}
 		if n > 0 || s.ioerr != nil {
 			s.e += n
 			s.buf[s.e] = sentinel
 			return
 		}
 		// n == 0
 	}

 	s.buf[s.e] = sentinel
 	s.ioerr = io.ErrNoProgress
 }

 // nextSize returns the next bigger size for a buffer of a given size.
 func nextSize(size int) int {
 	const min = 4 << 10 // 4K: minimum buffer size
 	const max = 1 << 20 // 1M: maximum buffer size which is still doubled
 	if size < min {
 		return min
 	}
 	if size <= max {
 		return size << 1
 	}
 	return size + max
 }
	// Copyright 2016 The Go Authors. All rights reserved.
	// Use of this source code is governed by a BSD-style
	// license that can be found in the LICENSE file.

	// This file implements source, a buffered rune reader
	// specialized for scanning Go code: Reading
	// ASCII characters, maintaining current (line, col)
	// position information, and recording of the most
	// recently read source segment are highly optimized.
	// This file is self-contained (go tool compile source.go
	// compiles) and thus could be made into its own package.

	package syntax

	import (
	"io"
	"unicode/utf8"
	)

	// The source buffer is accessed using three indices b (begin),
	// r (read), and e (end):
	//
	// - If b >= 0, it points to the beginning of a segment of most
	// recently read characters (typically a Go literal).
	//
	// - r points to the byte immediately following the most recently
	// read character ch, which starts at r-chw.
	//
	// - e points to the byte immediately following the last byte that
	// was read into the buffer.
	//
	// The buffer content is terminated at buf[e] with the sentinel
	// character utf8.RuneSelf. This makes it possible to test for
	// the common case of ASCII characters with a single 'if' (see
	// nextch method).
	//
	// +------ content in use -------+
	// v v
	// buf [...read...\|...segment...\|ch\|...unread...\|s\|...free...]
	// ^ ^ ^ ^
	// \| \| \| \|
	// b r-chw r e
	//
	// Invariant: -1 <= b < r <= e < len(buf) && buf[e] == sentinel

	type source struct {
	in io.Reader
	errh func(line, col uint, msg string)

	buf []byte // source buffer
	ioerr error // pending I/O error, or nil
	b, r, e int // buffer indices (see comment above)
	line, col uint // source position of ch (0-based)
	ch rune // most recently read character
	chw int // width of ch
	}

	const sentinel = utf8.RuneSelf

	func (s *source) init(in io.Reader, errh func(line, col uint, msg string)) {
	s.in = in
	s.errh = errh

	if s.buf == nil {
	s.buf = make([]byte, nextSize(0))
	}
	s.buf[0] = sentinel
	s.ioerr = nil
	s.b, s.r, s.e = -1, 0, 0
	s.line, s.col = 0, 0
	s.ch = ' '
	s.chw = 0
	}

	// starting points for line and column numbers
	const linebase = 1
	const colbase = 1

	// pos returns the (line, col) source position of s.ch.
	func (s *source) pos() (line, col uint) {
	return linebase + s.line, colbase + s.col
	}

	// error reports the error msg at source position s.pos().
	func (s *source) error(msg string) {
	line, col := s.pos()
	s.errh(line, col, msg)
	}

	// start starts a new active source segment (including s.ch).
	// As long as stop has not been called, the active segment's
	// bytes (excluding s.ch) may be retrieved by calling segment.
	func (s *source) start() { s.b = s.r - s.chw }
	func (s *source) stop() { s.b = -1 }
	func (s *source) segment() []byte { return s.buf[s.b : s.r-s.chw] }

	// rewind rewinds the scanner's read position and character s.ch
	// to the start of the currently active segment, which must not
	// contain any newlines (otherwise position information will be
	// incorrect). Currently, rewind is only needed for handling the
	// source sequence ".."; it must not be called outside an active
	// segment.
	func (s *source) rewind() {
	// ok to verify precondition - rewind is rarely called
	if s.b < 0 {
	panic("no active segment")
	}
	s.col -= uint(s.r - s.b)
	s.r = s.b
	s.nextch()
	}

	func (s *source) nextch() {
	redo:
	s.col += uint(s.chw)
	if s.ch == '\n' {
	s.line++
	s.col = 0
	}

	// fast common case: at least one ASCII character
	if s.ch = rune(s.buf[s.r]); s.ch < sentinel {
	s.r++
	s.chw = 1
	if s.ch == 0 {
	s.error("invalid NUL character")
	goto redo
	}
	return
	}

	// slower general case: add more bytes to buffer if we don't have a full rune
	for s.e-s.r < utf8.UTFMax && !utf8.FullRune(s.buf[s.r:s.e]) && s.ioerr == nil {
	s.fill()
	}

	// EOF
	if s.r == s.e {
	if s.ioerr != io.EOF {
	// ensure we never start with a '/' (e.g., rooted path) in the error message
	s.error("I/O error: " + s.ioerr.Error())
	s.ioerr = nil
	}
	s.ch = -1
	s.chw = 0
	return
	}

	s.ch, s.chw = utf8.DecodeRune(s.buf[s.r:s.e])
	s.r += s.chw

	if s.ch == utf8.RuneError && s.chw == 1 {
	s.error("invalid UTF-8 encoding")
	goto redo
	}

	// BOM's are only allowed as the first character in a file
	const BOM = 0xfeff
	if s.ch == BOM {
	if s.line > 0 \|\| s.col > 0 {
	s.error("invalid BOM in the middle of the file")
	}
	goto redo
	}
	}

	// fill reads more source bytes into s.buf.
	// It returns with at least one more byte in the buffer, or with s.ioerr != nil.
	func (s *source) fill() {
	// determine content to preserve
	b := s.r
	if s.b >= 0 {
	b = s.b
	s.b = 0 // after buffer has grown or content has been moved down
	}
	content := s.buf[b:s.e]

	// grow buffer or move content down
	if len(content)*2 > len(s.buf) {
	s.buf = make([]byte, nextSize(len(s.buf)))
	copy(s.buf, content)
	} else if b > 0 {
	copy(s.buf, content)
	}
	s.r -= b
	s.e -= b

	// read more data: try a limited number of times
	for i := 0; i < 10; i++ {
	var n int
	n, s.ioerr = s.in.Read(s.buf[s.e : len(s.buf)-1]) // -1 to leave space for sentinel
	if n < 0 {
	panic("negative read") // incorrect underlying io.Reader implementation
	}
	if n > 0 \|\| s.ioerr != nil {
	s.e += n
	s.buf[s.e] = sentinel
	return
	}
	// n == 0
	}

	s.buf[s.e] = sentinel
	s.ioerr = io.ErrNoProgress
	}

	// nextSize returns the next bigger size for a buffer of a given size.
	func nextSize(size int) int {
	const min = 4 << 10 // 4K: minimum buffer size
	const max = 1 << 20 // 1M: maximum buffer size which is still doubled
	if size < min {
	return min
	}
	if size <= max {
	return size << 1
	}
	return size + max
	}