blob: e8fe9a08b2a93bb03bfbcb966c6789a60a35bd91 [file] [log] [blame]
// Copyright 2012 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package build
import (
"bufio"
"bytes"
"errors"
"fmt"
"golang.org/x/website/internal/backport/go/ast"
"golang.org/x/website/internal/backport/go/parser"
"golang.org/x/website/internal/backport/go/token"
"io"
"strconv"
"strings"
"unicode"
"unicode/utf8"
)
type importReader struct {
b *bufio.Reader
buf []byte
peek byte
err error
eof bool
nerr int
pos token.Position
}
var bom = []byte{0xef, 0xbb, 0xbf}
func newImportReader(name string, r io.Reader) *importReader {
b := bufio.NewReader(r)
// Remove leading UTF-8 BOM.
// Per https://golang.org/ref/spec#Source_code_representation:
// a compiler may ignore a UTF-8-encoded byte order mark (U+FEFF)
// if it is the first Unicode code point in the source text.
if leadingBytes, err := b.Peek(3); err == nil && bytes.Equal(leadingBytes, bom) {
b.Discard(3)
}
return &importReader{
b: b,
pos: token.Position{
Filename: name,
Line: 1,
Column: 1,
},
}
}
func isIdent(c byte) bool {
return 'A' <= c && c <= 'Z' || 'a' <= c && c <= 'z' || '0' <= c && c <= '9' || c == '_' || c >= utf8.RuneSelf
}
var (
errSyntax = errors.New("syntax error")
errNUL = errors.New("unexpected NUL in input")
)
// syntaxError records a syntax error, but only if an I/O error has not already been recorded.
func (r *importReader) syntaxError() {
if r.err == nil {
r.err = errSyntax
}
}
// readByte reads the next byte from the input, saves it in buf, and returns it.
// If an error occurs, readByte records the error in r.err and returns 0.
func (r *importReader) readByte() byte {
c, err := r.b.ReadByte()
if err == nil {
r.buf = append(r.buf, c)
if c == 0 {
err = errNUL
}
}
if err != nil {
if err == io.EOF {
r.eof = true
} else if r.err == nil {
r.err = err
}
c = 0
}
return c
}
// readByteNoBuf is like readByte but doesn't buffer the byte.
// It exhausts r.buf before reading from r.b.
func (r *importReader) readByteNoBuf() byte {
var c byte
var err error
if len(r.buf) > 0 {
c = r.buf[0]
r.buf = r.buf[1:]
} else {
c, err = r.b.ReadByte()
if err == nil && c == 0 {
err = errNUL
}
}
if err != nil {
if err == io.EOF {
r.eof = true
} else if r.err == nil {
r.err = err
}
return 0
}
r.pos.Offset++
if c == '\n' {
r.pos.Line++
r.pos.Column = 1
} else {
r.pos.Column++
}
return c
}
// peekByte returns the next byte from the input reader but does not advance beyond it.
// If skipSpace is set, peekByte skips leading spaces and comments.
func (r *importReader) peekByte(skipSpace bool) byte {
if r.err != nil {
if r.nerr++; r.nerr > 10000 {
panic("go/build: import reader looping")
}
return 0
}
// Use r.peek as first input byte.
// Don't just return r.peek here: it might have been left by peekByte(false)
// and this might be peekByte(true).
c := r.peek
if c == 0 {
c = r.readByte()
}
for r.err == nil && !r.eof {
if skipSpace {
// For the purposes of this reader, semicolons are never necessary to
// understand the input and are treated as spaces.
switch c {
case ' ', '\f', '\t', '\r', '\n', ';':
c = r.readByte()
continue
case '/':
c = r.readByte()
if c == '/' {
for c != '\n' && r.err == nil && !r.eof {
c = r.readByte()
}
} else if c == '*' {
var c1 byte
for (c != '*' || c1 != '/') && r.err == nil {
if r.eof {
r.syntaxError()
}
c, c1 = c1, r.readByte()
}
} else {
r.syntaxError()
}
c = r.readByte()
continue
}
}
break
}
r.peek = c
return r.peek
}
// nextByte is like peekByte but advances beyond the returned byte.
func (r *importReader) nextByte(skipSpace bool) byte {
c := r.peekByte(skipSpace)
r.peek = 0
return c
}
var goEmbed = []byte("go:embed")
// findEmbed advances the input reader to the next //go:embed comment.
// It reports whether it found a comment.
// (Otherwise it found an error or EOF.)
func (r *importReader) findEmbed(first bool) bool {
// The import block scan stopped after a non-space character,
// so the reader is not at the start of a line on the first call.
// After that, each //go:embed extraction leaves the reader
// at the end of a line.
startLine := !first
var c byte
for r.err == nil && !r.eof {
c = r.readByteNoBuf()
Reswitch:
switch c {
default:
startLine = false
case '\n':
startLine = true
case ' ', '\t':
// leave startLine alone
case '"':
startLine = false
for r.err == nil {
if r.eof {
r.syntaxError()
}
c = r.readByteNoBuf()
if c == '\\' {
r.readByteNoBuf()
if r.err != nil {
r.syntaxError()
return false
}
continue
}
if c == '"' {
c = r.readByteNoBuf()
goto Reswitch
}
}
goto Reswitch
case '`':
startLine = false
for r.err == nil {
if r.eof {
r.syntaxError()
}
c = r.readByteNoBuf()
if c == '`' {
c = r.readByteNoBuf()
goto Reswitch
}
}
case '\'':
startLine = false
for r.err == nil {
if r.eof {
r.syntaxError()
}
c = r.readByteNoBuf()
if c == '\\' {
r.readByteNoBuf()
if r.err != nil {
r.syntaxError()
return false
}
continue
}
if c == '\'' {
c = r.readByteNoBuf()
goto Reswitch
}
}
case '/':
c = r.readByteNoBuf()
switch c {
default:
startLine = false
goto Reswitch
case '*':
var c1 byte
for (c != '*' || c1 != '/') && r.err == nil {
if r.eof {
r.syntaxError()
}
c, c1 = c1, r.readByteNoBuf()
}
startLine = false
case '/':
if startLine {
// Try to read this as a //go:embed comment.
for i := range goEmbed {
c = r.readByteNoBuf()
if c != goEmbed[i] {
goto SkipSlashSlash
}
}
c = r.readByteNoBuf()
if c == ' ' || c == '\t' {
// Found one!
return true
}
}
SkipSlashSlash:
for c != '\n' && r.err == nil && !r.eof {
c = r.readByteNoBuf()
}
startLine = true
}
}
}
return false
}
// readKeyword reads the given keyword from the input.
// If the keyword is not present, readKeyword records a syntax error.
func (r *importReader) readKeyword(kw string) {
r.peekByte(true)
for i := 0; i < len(kw); i++ {
if r.nextByte(false) != kw[i] {
r.syntaxError()
return
}
}
if isIdent(r.peekByte(false)) {
r.syntaxError()
}
}
// readIdent reads an identifier from the input.
// If an identifier is not present, readIdent records a syntax error.
func (r *importReader) readIdent() {
c := r.peekByte(true)
if !isIdent(c) {
r.syntaxError()
return
}
for isIdent(r.peekByte(false)) {
r.peek = 0
}
}
// readString reads a quoted string literal from the input.
// If an identifier is not present, readString records a syntax error.
func (r *importReader) readString() {
switch r.nextByte(true) {
case '`':
for r.err == nil {
if r.nextByte(false) == '`' {
break
}
if r.eof {
r.syntaxError()
}
}
case '"':
for r.err == nil {
c := r.nextByte(false)
if c == '"' {
break
}
if r.eof || c == '\n' {
r.syntaxError()
}
if c == '\\' {
r.nextByte(false)
}
}
default:
r.syntaxError()
}
}
// readImport reads an import clause - optional identifier followed by quoted string -
// from the input.
func (r *importReader) readImport() {
c := r.peekByte(true)
if c == '.' {
r.peek = 0
} else if isIdent(c) {
r.readIdent()
}
r.readString()
}
// readComments is like io.ReadAll, except that it only reads the leading
// block of comments in the file.
func readComments(f io.Reader) ([]byte, error) {
r := newImportReader("", f)
r.peekByte(true)
if r.err == nil && !r.eof {
// Didn't reach EOF, so must have found a non-space byte. Remove it.
r.buf = r.buf[:len(r.buf)-1]
}
return r.buf, r.err
}
// readGoInfo expects a Go file as input and reads the file up to and including the import section.
// It records what it learned in *info.
// If info.fset is non-nil, readGoInfo parses the file and sets info.parsed, info.parseErr,
// info.imports, info.embeds, and info.embedErr.
//
// It only returns an error if there are problems reading the file,
// not for syntax errors in the file itself.
func readGoInfo(f io.Reader, info *fileInfo) error {
r := newImportReader(info.name, f)
r.readKeyword("package")
r.readIdent()
for r.peekByte(true) == 'i' {
r.readKeyword("import")
if r.peekByte(true) == '(' {
r.nextByte(false)
for r.peekByte(true) != ')' && r.err == nil {
r.readImport()
}
r.nextByte(false)
} else {
r.readImport()
}
}
info.header = r.buf
// If we stopped successfully before EOF, we read a byte that told us we were done.
// Return all but that last byte, which would cause a syntax error if we let it through.
if r.err == nil && !r.eof {
info.header = r.buf[:len(r.buf)-1]
}
// If we stopped for a syntax error, consume the whole file so that
// we are sure we don't change the errors that go/parser returns.
if r.err == errSyntax {
r.err = nil
for r.err == nil && !r.eof {
r.readByte()
}
info.header = r.buf
}
if r.err != nil {
return r.err
}
if info.fset == nil {
return nil
}
// Parse file header & record imports.
info.parsed, info.parseErr = parser.ParseFile(info.fset, info.name, info.header, parser.ImportsOnly|parser.ParseComments)
if info.parseErr != nil {
return nil
}
hasEmbed := false
for _, decl := range info.parsed.Decls {
d, ok := decl.(*ast.GenDecl)
if !ok {
continue
}
for _, dspec := range d.Specs {
spec, ok := dspec.(*ast.ImportSpec)
if !ok {
continue
}
quoted := spec.Path.Value
path, err := strconv.Unquote(quoted)
if err != nil {
return fmt.Errorf("parser returned invalid quoted string: <%s>", quoted)
}
if path == "embed" {
hasEmbed = true
}
doc := spec.Doc
if doc == nil && len(d.Specs) == 1 {
doc = d.Doc
}
info.imports = append(info.imports, fileImport{path, spec.Pos(), doc})
}
}
// If the file imports "embed",
// we have to look for //go:embed comments
// in the remainder of the file.
// The compiler will enforce the mapping of comments to
// declared variables. We just need to know the patterns.
// If there were //go:embed comments earlier in the file
// (near the package statement or imports), the compiler
// will reject them. They can be (and have already been) ignored.
if hasEmbed {
var line []byte
for first := true; r.findEmbed(first); first = false {
line = line[:0]
pos := r.pos
for {
c := r.readByteNoBuf()
if c == '\n' || r.err != nil || r.eof {
break
}
line = append(line, c)
}
// Add args if line is well-formed.
// Ignore badly-formed lines - the compiler will report them when it finds them,
// and we can pretend they are not there to help go list succeed with what it knows.
embs, err := parseGoEmbed(string(line), pos)
if err == nil {
info.embeds = append(info.embeds, embs...)
}
}
}
return nil
}
// parseGoEmbed parses the text following "//go:embed" to extract the glob patterns.
// It accepts unquoted space-separated patterns as well as double-quoted and back-quoted Go strings.
// This is based on a similar function in cmd/compile/internal/gc/noder.go;
// this version calculates position information as well.
func parseGoEmbed(args string, pos token.Position) ([]fileEmbed, error) {
trimBytes := func(n int) {
pos.Offset += n
pos.Column += utf8.RuneCountInString(args[:n])
args = args[n:]
}
trimSpace := func() {
trim := strings.TrimLeftFunc(args, unicode.IsSpace)
trimBytes(len(args) - len(trim))
}
var list []fileEmbed
for trimSpace(); args != ""; trimSpace() {
var path string
pathPos := pos
Switch:
switch args[0] {
default:
i := len(args)
for j, c := range args {
if unicode.IsSpace(c) {
i = j
break
}
}
path = args[:i]
trimBytes(i)
case '`':
var ok bool
path, _, ok = stringsCut(args[1:], "`")
if !ok {
return nil, fmt.Errorf("invalid quoted string in //go:embed: %s", args)
}
trimBytes(1 + len(path) + 1)
case '"':
i := 1
for ; i < len(args); i++ {
if args[i] == '\\' {
i++
continue
}
if args[i] == '"' {
q, err := strconv.Unquote(args[:i+1])
if err != nil {
return nil, fmt.Errorf("invalid quoted string in //go:embed: %s", args[:i+1])
}
path = q
trimBytes(i + 1)
break Switch
}
}
if i >= len(args) {
return nil, fmt.Errorf("invalid quoted string in //go:embed: %s", args)
}
}
if args != "" {
r, _ := utf8.DecodeRuneInString(args)
if !unicode.IsSpace(r) {
return nil, fmt.Errorf("invalid quoted string in //go:embed: %s", args)
}
}
list = append(list, fileEmbed{path, pathPos})
}
return list, nil
}