internal/lsp/semantic: improve semantic token processing There is insufficient type information to compute semantic tokens in packages that don't compile. Particularly affected are test files and files being actively edited in new packages. Further, existing code could panic on poorly formed imports; this has been fixed. Computing semantic tokens for identifiers having neither use or definition information has been improved. (Each of the many cases in the new function unkIdent() occurs in existing code or test files.) Change-Id: Id1b5db0622b17076de1ed23a950a20cd03c3750a Reviewed-on: https://go-review.googlesource.com/c/tools/+/333869 Run-TryBot: Peter Weinberger <pjw@google.com> Trust: Peter Weinberger <pjw@google.com> gopls-CI: kokoro <noreply+kokoro@google.com> TryBot-Result: Go Bot <gobot@golang.org> Reviewed-by: Robert Findley <rfindley@google.com>

commit: 0cf4e2708ac840da8674eb3947b660a931bd2c1f [log] [tgz]
author: pjw <pjw@google.com> Sun Jul 11 12:15:33 2021 -0700
committer: Peter Weinberger <pjw@google.com> Wed Jul 14 22:42:09 2021 +0000
tree: 1dbb18870989e4021c50159ede942e8a3a8a13d3
parent: 5061c412c34515188603c0e5d37172f158048682 [diff]
diff --git a/internal/lsp/semantic.go b/internal/lsp/semantic.go
index c0ed972..073336b 100644
--- a/internal/lsp/semantic.go
+++ b/internal/lsp/semantic.go

@@ -11,7 +11,7 @@
 	"go/ast"
 	"go/token"
 	"go/types"
-	"log"
+	"path/filepath"
 	"sort"
 	"strings"
 	"time"
@@ -93,7 +93,7 @@
 	if err != nil {
 		return nil, err
 	}
-	// don't return errors on pgf.ParseErr. Do what we can.
+	// ignore pgf.ParseErr. Do what we can.
 	if rng == nil && len(pgf.Src) > maxFullFileSize {
 		err := fmt.Errorf("semantic tokens: file %s too large for full (%d>%d)",
 			fh.URI().Filename(), len(pgf.Src), maxFullFileSize)
@@ -122,6 +122,7 @@
 
 func (e *encoded) semantics() {
 	f := e.pgf.File
+	// may not be in range, but harmless
 	e.token(f.Package, len("package"), tokKeyword, nil)
 	e.token(f.Name.NamePos, len(f.Name.Name), tokNamespace, nil)
 	inspect := func(n ast.Node) bool {
@@ -166,8 +167,11 @@
 )
 
 func (e *encoded) token(start token.Pos, leng int, typ tokenType, mods []string) {
-	if start == 0 {
-		e.unexpected("token at token.NoPos")
+
+	if !start.IsValid() {
+		// This is not worth reporting
+		//e.unexpected("token at token.NoPos")
+		return
 	}
 	if start >= e.end || start+token.Pos(leng) <= e.start {
 		return
@@ -186,10 +190,7 @@
 		return
 	}
 	if lspRange.End.Line != lspRange.Start.Line {
-		// abrupt end of file, without \n. TODO(pjw): fix?
-		pos := e.fset.PositionFor(start, false)
-		msg := fmt.Sprintf("token at %s:%d.%d overflows", pos.Filename, pos.Line, pos.Column)
-		event.Log(e.ctx, msg)
+		// this happens if users are typing at the end of the file, but report nothing
 		return
 	}
 	// token is all on one line
@@ -236,12 +237,26 @@
 	if len(e.stack) > 0 {
 		loc := e.stack[len(e.stack)-1].Pos()
 		add := e.pgf.Tok.PositionFor(loc, false)
-		msg = append(msg, fmt.Sprintf("(line:%d,col:%d)", add.Line, add.Column))
+		nm := filepath.Base(add.Filename)
+		msg = append(msg, fmt.Sprintf("(%s:%d,col:%d)", nm, add.Line, add.Column))
 	}
 	msg = append(msg, "]")
 	return strings.Join(msg, " ")
 }
 
+// find the line in the source
+func (e *encoded) srcLine(x ast.Node) string {
+	file := e.pgf.Tok
+	line := file.Line(x.Pos())
+	start := file.Offset(file.LineStart(line))
+	end := start
+	for ; end < len(e.pgf.Src) && e.pgf.Src[end] != '\n'; end++ {
+
+	}
+	ans := e.pgf.Src[start:end]
+	return string(ans)
+}
+
 func (e *encoded) inspector(n ast.Node) bool {
 	pop := func() {
 		e.stack = e.stack[:len(e.stack)-1]
@@ -381,12 +396,12 @@
 	case *ast.UnaryExpr:
 		e.token(x.OpPos, len(x.Op.String()), tokOperator, nil)
 	case *ast.ValueSpec:
-	// things we only see with parsing or type errors, so we ignore them
+	// things only seen with parsing or type errors, so ignore them
 	case *ast.BadDecl, *ast.BadExpr, *ast.BadStmt:
 		return true
 	// not going to see these
 	case *ast.File, *ast.Package:
-		log.Printf("implement %T %s", x, e.pgf.Tok.PositionFor(x.Pos(), false))
+		e.unexpected(fmt.Sprintf("implement %T %s", x, e.pgf.Tok.PositionFor(x.Pos(), false)))
 	// other things we knowingly ignore
 	case *ast.Comment, *ast.CommentGroup:
 		pop()
@@ -409,7 +424,8 @@
 	use := e.ti.Uses[x]
 	switch y := use.(type) {
 	case nil:
-		e.token(x.NamePos, len(x.Name), tokVariable, []string{"definition"})
+		e.unkIdent(x)
+		return
 	case *types.Builtin:
 		e.token(x.NamePos, len(x.Name), tokFunction, []string{"defaultLibrary"})
 	case *types.Const:
@@ -462,6 +478,134 @@
 	}
 }
 
+// both e.ti.Defs and e.ti.Uses are nil. use the parse stack
+// a lot of these only happen when the package doesn't compile
+func (e *encoded) unkIdent(x *ast.Ident) {
+	tok := func(tok tokenType, mod []string) {
+		e.token(x.Pos(), len(x.Name), tok, mod)
+	}
+	def := []string{"definition"}
+	n := len(e.stack) - 2 // parent of Ident
+	if n < 0 {
+		e.unexpected("no stack?")
+		return
+	}
+	switch nd := e.stack[n].(type) {
+	case *ast.BinaryExpr, *ast.UnaryExpr, *ast.ParenExpr, *ast.StarExpr,
+		*ast.IncDecStmt, *ast.SliceExpr, *ast.ExprStmt, *ast.IndexExpr,
+		*ast.ReturnStmt,
+		*ast.IfStmt,       /* condition */
+		*ast.KeyValueExpr: // either key or value
+		tok(tokVariable, nil)
+	case *ast.Ellipsis:
+		tok(tokType, nil)
+	case *ast.CaseClause:
+		if n-2 >= 0 {
+			if _, ok := e.stack[n-2].(*ast.TypeSwitchStmt); ok {
+				tok(tokType, nil)
+				return
+			}
+		}
+		tok(tokVariable, nil)
+	case *ast.ArrayType:
+		if x == nd.Len {
+			tok(tokVariable, nil)
+		} else {
+			tok(tokType, nil)
+		}
+	case *ast.MapType:
+		tok(tokType, nil)
+	case *ast.CallExpr:
+		if x == nd.Fun {
+			tok(tokFunction, nil)
+			return
+		}
+		tok(tokVariable, nil)
+	case *ast.TypeAssertExpr:
+		if x == nd.X {
+			tok(tokVariable, nil)
+		} else if x == nd.Type {
+			tok(tokType, nil)
+		}
+	case *ast.ValueSpec:
+		for _, p := range nd.Names {
+			if p == x {
+				tok(tokVariable, def)
+				return
+			}
+		}
+		for _, p := range nd.Values {
+			if p == x {
+				tok(tokVariable, nil)
+				return
+			}
+		}
+		tok(tokType, nil)
+	case *ast.SelectorExpr: // e.ti.Selections[nd] is nil, so no help
+		if n-1 >= 0 {
+			if ce, ok := e.stack[n-1].(*ast.CallExpr); ok {
+				// ... CallExpr SelectorExpr Ident (_.x())
+				if ce.Fun == nd && nd.Sel == x {
+					tok(tokFunction, nil)
+					return
+				}
+			}
+		}
+		tok(tokVariable, nil)
+	case *ast.AssignStmt:
+		for _, p := range nd.Lhs {
+			// x := ..., or x = ...
+			if p == x {
+				if nd.Tok != token.DEFINE {
+					def = nil
+				}
+				tok(tokVariable, def)
+				return
+			}
+		}
+		// RHS, = x
+		tok(tokVariable, nil)
+	case *ast.TypeSpec: // it's a type if it is either the Name or the Type
+		if x == nd.Type {
+			def = nil
+		}
+		tok(tokType, def)
+	case *ast.Field:
+		// ident could be type in a field, or a method in an interface type, or a variable
+		if x == nd.Type {
+			tok(tokType, nil)
+			return
+		}
+		if n-2 >= 0 {
+			_, okit := e.stack[n-2].(*ast.InterfaceType)
+			_, okfl := e.stack[n-1].(*ast.FieldList)
+			if okit && okfl {
+				tok(tokMember, def)
+				return
+			}
+		}
+		tok(tokVariable, nil)
+	case *ast.LabeledStmt, *ast.BranchStmt:
+		// nothing to report
+	case *ast.CompositeLit:
+		if nd.Type == x {
+			tok(tokType, nil)
+			return
+		}
+		tok(tokVariable, nil)
+	case *ast.RangeStmt:
+		if nd.Tok != token.DEFINE {
+			def = nil
+		}
+		tok(tokVariable, def)
+	case *ast.FuncDecl:
+		tok(tokFunction, def)
+	default:
+		msg := fmt.Sprintf("%T undexpected: %s %s%q", nd, x.Name, e.strStack(), e.srcLine(x))
+		e.unexpected(msg)
+	}
+}
+
 func isDeprecated(n *ast.CommentGroup) bool {
 	if n == nil {
 		return false
@@ -642,16 +786,17 @@
 		}
 		// and fall through for _
 	}
-	if d.Path.Value == "" {
+	val := d.Path.Value
+	if len(val) < 2 || val[0] != '"' || val[len(val)-1] != '"' {
+		// avoid panics on imports without a properly quoted string
 		return
 	}
-	nm := d.Path.Value[1 : len(d.Path.Value)-1] // trailing "
-	v := strings.LastIndex(nm, "/")
-	if v != -1 {
-		nm = nm[v+1:]
-	}
+	nm := val[1 : len(val)-1] // remove surrounding "s
+	nm = filepath.Base(nm)
+	// in import "lib/math", 'math' is the package name
 	start := d.Path.End() - token.Pos(1+len(nm))
 	e.token(start, len(nm), tokNamespace, nil)
+	// There may be more cases, as import strings are implementation defined.
 }
 
 // log unexpected state

diff --git a/internal/lsp/testdata/semantic/a.go b/internal/lsp/testdata/semantic/a.go
index 756c56e..54d6c8a 100644
--- a/internal/lsp/testdata/semantic/a.go
+++ b/internal/lsp/testdata/semantic/a.go

@@ -55,6 +55,8 @@
 	w := b[4:]
 	j := len(x)
 	j--
+	q := []interface{}{j, 23i, &y}
+	g(q...)
 	return true
 }
 
@@ -74,5 +76,6 @@
 	if !ok {
 		switch x := vv[0].(type) {
 		}
+		goto Never
 	}
 }

diff --git a/internal/lsp/testdata/semantic/a.go.golden b/internal/lsp/testdata/semantic/a.go.golden
index 512a83e..4bf70e5 100644
--- a/internal/lsp/testdata/semantic/a.go.golden
+++ b/internal/lsp/testdata/semantic/a.go.golden

@@ -39,7 +39,7 @@
 /*⇒4,keyword,[]*/func (/*⇒1,variable,[]*/a /*⇒1,operator,[]*/*/*⇒1,type,[]*/A) /*⇒1,member,[definition]*/f() /*⇒4,type,[defaultLibrary]*/bool {
 	/*⇒3,keyword,[]*/var /*⇒1,variable,[definition]*/z /*⇒6,type,[defaultLibrary]*/string
 	/*⇒1,variable,[definition]*/x /*⇒2,operator,[]*/:= /*⇒5,string,[]*/"foo"
-	/*⇒1,variable,[]*/a(/*⇒1,variable,[definition]*/x)
+	/*⇒1,variable,[]*/a(/*⇒1,variable,[]*/x)
 	/*⇒1,variable,[definition]*/y /*⇒2,operator,[]*/:= /*⇒5,string,[]*/"bar" /*⇒1,operator,[]*/+ /*⇒1,variable,[]*/x
 	/*⇒6,keyword,[]*/switch /*⇒1,variable,[]*/z {
 	/*⇒4,keyword,[]*/case /*⇒4,string,[]*/"xx":
@@ -52,18 +52,20 @@
 	/*⇒3,keyword,[]*/for /*⇒1,variable,[definition]*/k, /*⇒1,variable,[definition]*/v := /*⇒5,keyword,[]*/range /*⇒1,variable,[]*/m {
 		/*⇒6,keyword,[]*/return (/*⇒1,operator,[]*/!/*⇒1,variable,[]*/k) /*⇒2,operator,[]*/&& /*⇒1,variable,[]*/v[/*⇒1,number,[]*/0] /*⇒2,operator,[]*/== /*⇒3,variable,[readonly defaultLibrary]*/nil
 	}
-	/*⇒2,variable,[]*/c2 /*⇒2,operator,[]*/<- /*⇒1,type,[]*/A./*⇒1,variable,[definition]*/X
+	/*⇒2,variable,[]*/c2 /*⇒2,operator,[]*/<- /*⇒1,type,[]*/A./*⇒1,variable,[]*/X
 	/*⇒1,variable,[definition]*/w /*⇒2,operator,[]*/:= /*⇒1,variable,[]*/b[/*⇒1,number,[]*/4:]
 	/*⇒1,variable,[definition]*/j /*⇒2,operator,[]*/:= /*⇒3,function,[defaultLibrary]*/len(/*⇒1,variable,[]*/x)
 	/*⇒1,variable,[]*/j/*⇒2,operator,[]*/--
+	/*⇒1,variable,[definition]*/q /*⇒2,operator,[]*/:= []/*⇒9,keyword,[]*/interface{}{/*⇒1,variable,[]*/j, /*⇒3,number,[]*/23i, /*⇒1,operator,[]*/&/*⇒1,variable,[]*/y}
+	/*⇒1,function,[]*/g(/*⇒1,variable,[]*/q/*⇒3,operator,[]*/...)
 	/*⇒6,keyword,[]*/return /*⇒4,variable,[readonly]*/true
 }
 
 /*⇒4,keyword,[]*/func /*⇒1,function,[definition]*/g(/*⇒2,parameter,[definition]*/vv /*⇒3,operator,[]*/.../*⇒9,keyword,[]*/interface{}) {
 	/*⇒2,variable,[definition]*/ff /*⇒2,operator,[]*/:= /*⇒4,keyword,[]*/func() {}
 	/*⇒5,keyword,[]*/defer /*⇒2,variable,[]*/ff()
-	/*⇒2,keyword,[]*/go /*⇒3,namespace,[]*/utf./*⇒9,variable,[definition]*/RuneCount(/*⇒2,string,[]*/"")
-	/*⇒2,keyword,[]*/go /*⇒4,namespace,[]*/utf8./*⇒9,function,[]*/RuneCount(/*⇒2,variable,[]*/vv.(/*⇒6,variable,[definition]*/string))
+	/*⇒2,keyword,[]*/go /*⇒3,namespace,[]*/utf./*⇒9,function,[]*/RuneCount(/*⇒2,string,[]*/"")
+	/*⇒2,keyword,[]*/go /*⇒4,namespace,[]*/utf8./*⇒9,function,[]*/RuneCount(/*⇒2,variable,[]*/vv.(/*⇒6,type,[]*/string))
 	/*⇒2,keyword,[]*/if /*⇒4,variable,[readonly]*/true {
 	} /*⇒4,keyword,[]*/else {
 	}
@@ -75,6 +77,7 @@
 	/*⇒2,keyword,[]*/if /*⇒1,operator,[]*/!/*⇒2,variable,[]*/ok {
 		/*⇒6,keyword,[]*/switch /*⇒1,variable,[definition]*/x /*⇒2,operator,[]*/:= /*⇒2,variable,[]*/vv[/*⇒1,number,[]*/0].(/*⇒4,keyword,[]*/type) {
 		}
+		/*⇒4,keyword,[]*/goto Never
 	}
 }
commit	0cf4e2708ac840da8674eb3947b660a931bd2c1f	[log] [tgz]
author	pjw <pjw@google.com>	Sun Jul 11 12:15:33 2021 -0700
committer	Peter Weinberger <pjw@google.com>	Wed Jul 14 22:42:09 2021 +0000
tree	1dbb18870989e4021c50159ede942e8a3a8a13d3
parent	5061c412c34515188603c0e5d37172f158048682 [diff]