internal/lsp/semantic: improve semantic token processing

There is insufficient type information to compute semantic tokens in
packages that don't compile. Particularly affected are test files
and files being actively edited in new packages.
Further, existing code could
panic on poorly formed imports; this has been fixed.
Computing semantic tokens for identifiers
having neither use or definition information has been improved.
(Each of the many cases in the new function unkIdent() occurs
in existing code or test files.)

Change-Id: Id1b5db0622b17076de1ed23a950a20cd03c3750a
Reviewed-on: https://go-review.googlesource.com/c/tools/+/333869
Run-TryBot: Peter Weinberger <pjw@google.com>
Trust: Peter Weinberger <pjw@google.com>
gopls-CI: kokoro <noreply+kokoro@google.com>
TryBot-Result: Go Bot <gobot@golang.org>
Reviewed-by: Robert Findley <rfindley@google.com>
diff --git a/internal/lsp/semantic.go b/internal/lsp/semantic.go
index c0ed972..073336b 100644
--- a/internal/lsp/semantic.go
+++ b/internal/lsp/semantic.go
@@ -11,7 +11,7 @@
 	"go/ast"
 	"go/token"
 	"go/types"
-	"log"
+	"path/filepath"
 	"sort"
 	"strings"
 	"time"
@@ -93,7 +93,7 @@
 	if err != nil {
 		return nil, err
 	}
-	// don't return errors on pgf.ParseErr. Do what we can.
+	// ignore pgf.ParseErr. Do what we can.
 	if rng == nil && len(pgf.Src) > maxFullFileSize {
 		err := fmt.Errorf("semantic tokens: file %s too large for full (%d>%d)",
 			fh.URI().Filename(), len(pgf.Src), maxFullFileSize)
@@ -122,6 +122,7 @@
 
 func (e *encoded) semantics() {
 	f := e.pgf.File
+	// may not be in range, but harmless
 	e.token(f.Package, len("package"), tokKeyword, nil)
 	e.token(f.Name.NamePos, len(f.Name.Name), tokNamespace, nil)
 	inspect := func(n ast.Node) bool {
@@ -166,8 +167,11 @@
 )
 
 func (e *encoded) token(start token.Pos, leng int, typ tokenType, mods []string) {
-	if start == 0 {
-		e.unexpected("token at token.NoPos")
+
+	if !start.IsValid() {
+		// This is not worth reporting
+		//e.unexpected("token at token.NoPos")
+		return
 	}
 	if start >= e.end || start+token.Pos(leng) <= e.start {
 		return
@@ -186,10 +190,7 @@
 		return
 	}
 	if lspRange.End.Line != lspRange.Start.Line {
-		// abrupt end of file, without \n. TODO(pjw): fix?
-		pos := e.fset.PositionFor(start, false)
-		msg := fmt.Sprintf("token at %s:%d.%d overflows", pos.Filename, pos.Line, pos.Column)
-		event.Log(e.ctx, msg)
+		// this happens if users are typing at the end of the file, but report nothing
 		return
 	}
 	// token is all on one line
@@ -236,12 +237,26 @@
 	if len(e.stack) > 0 {
 		loc := e.stack[len(e.stack)-1].Pos()
 		add := e.pgf.Tok.PositionFor(loc, false)
-		msg = append(msg, fmt.Sprintf("(line:%d,col:%d)", add.Line, add.Column))
+		nm := filepath.Base(add.Filename)
+		msg = append(msg, fmt.Sprintf("(%s:%d,col:%d)", nm, add.Line, add.Column))
 	}
 	msg = append(msg, "]")
 	return strings.Join(msg, " ")
 }
 
+// find the line in the source
+func (e *encoded) srcLine(x ast.Node) string {
+	file := e.pgf.Tok
+	line := file.Line(x.Pos())
+	start := file.Offset(file.LineStart(line))
+	end := start
+	for ; end < len(e.pgf.Src) && e.pgf.Src[end] != '\n'; end++ {
+
+	}
+	ans := e.pgf.Src[start:end]
+	return string(ans)
+}
+
 func (e *encoded) inspector(n ast.Node) bool {
 	pop := func() {
 		e.stack = e.stack[:len(e.stack)-1]
@@ -381,12 +396,12 @@
 	case *ast.UnaryExpr:
 		e.token(x.OpPos, len(x.Op.String()), tokOperator, nil)
 	case *ast.ValueSpec:
-	// things we only see with parsing or type errors, so we ignore them
+	// things only seen with parsing or type errors, so ignore them
 	case *ast.BadDecl, *ast.BadExpr, *ast.BadStmt:
 		return true
 	// not going to see these
 	case *ast.File, *ast.Package:
-		log.Printf("implement %T %s", x, e.pgf.Tok.PositionFor(x.Pos(), false))
+		e.unexpected(fmt.Sprintf("implement %T %s", x, e.pgf.Tok.PositionFor(x.Pos(), false)))
 	// other things we knowingly ignore
 	case *ast.Comment, *ast.CommentGroup:
 		pop()
@@ -409,7 +424,8 @@
 	use := e.ti.Uses[x]
 	switch y := use.(type) {
 	case nil:
-		e.token(x.NamePos, len(x.Name), tokVariable, []string{"definition"})
+		e.unkIdent(x)
+		return
 	case *types.Builtin:
 		e.token(x.NamePos, len(x.Name), tokFunction, []string{"defaultLibrary"})
 	case *types.Const:
@@ -462,6 +478,134 @@
 	}
 }
 
+// both e.ti.Defs and e.ti.Uses are nil. use the parse stack
+// a lot of these only happen when the package doesn't compile
+func (e *encoded) unkIdent(x *ast.Ident) {
+	tok := func(tok tokenType, mod []string) {
+		e.token(x.Pos(), len(x.Name), tok, mod)
+	}
+	def := []string{"definition"}
+	n := len(e.stack) - 2 // parent of Ident
+	if n < 0 {
+		e.unexpected("no stack?")
+		return
+	}
+	switch nd := e.stack[n].(type) {
+	case *ast.BinaryExpr, *ast.UnaryExpr, *ast.ParenExpr, *ast.StarExpr,
+		*ast.IncDecStmt, *ast.SliceExpr, *ast.ExprStmt, *ast.IndexExpr,
+		*ast.ReturnStmt,
+		*ast.IfStmt,       /* condition */
+		*ast.KeyValueExpr: // either key or value
+		tok(tokVariable, nil)
+	case *ast.Ellipsis:
+		tok(tokType, nil)
+	case *ast.CaseClause:
+		if n-2 >= 0 {
+			if _, ok := e.stack[n-2].(*ast.TypeSwitchStmt); ok {
+				tok(tokType, nil)
+				return
+			}
+		}
+		tok(tokVariable, nil)
+	case *ast.ArrayType:
+		if x == nd.Len {
+			tok(tokVariable, nil)
+		} else {
+			tok(tokType, nil)
+		}
+	case *ast.MapType:
+		tok(tokType, nil)
+	case *ast.CallExpr:
+		if x == nd.Fun {
+			tok(tokFunction, nil)
+			return
+		}
+		tok(tokVariable, nil)
+	case *ast.TypeAssertExpr:
+		if x == nd.X {
+			tok(tokVariable, nil)
+		} else if x == nd.Type {
+			tok(tokType, nil)
+		}
+	case *ast.ValueSpec:
+		for _, p := range nd.Names {
+			if p == x {
+				tok(tokVariable, def)
+				return
+			}
+		}
+		for _, p := range nd.Values {
+			if p == x {
+				tok(tokVariable, nil)
+				return
+			}
+		}
+		tok(tokType, nil)
+	case *ast.SelectorExpr: // e.ti.Selections[nd] is nil, so no help
+		if n-1 >= 0 {
+			if ce, ok := e.stack[n-1].(*ast.CallExpr); ok {
+				// ... CallExpr SelectorExpr Ident (_.x())
+				if ce.Fun == nd && nd.Sel == x {
+					tok(tokFunction, nil)
+					return
+				}
+			}
+		}
+		tok(tokVariable, nil)
+	case *ast.AssignStmt:
+		for _, p := range nd.Lhs {
+			// x := ..., or x = ...
+			if p == x {
+				if nd.Tok != token.DEFINE {
+					def = nil
+				}
+				tok(tokVariable, def)
+				return
+			}
+		}
+		// RHS, = x
+		tok(tokVariable, nil)
+	case *ast.TypeSpec: // it's a type if it is either the Name or the Type
+		if x == nd.Type {
+			def = nil
+		}
+		tok(tokType, def)
+	case *ast.Field:
+		// ident could be type in a field, or a method in an interface type, or a variable
+		if x == nd.Type {
+			tok(tokType, nil)
+			return
+		}
+		if n-2 >= 0 {
+			_, okit := e.stack[n-2].(*ast.InterfaceType)
+			_, okfl := e.stack[n-1].(*ast.FieldList)
+			if okit && okfl {
+				tok(tokMember, def)
+				return
+			}
+		}
+		tok(tokVariable, nil)
+	case *ast.LabeledStmt, *ast.BranchStmt:
+		// nothing to report
+	case *ast.CompositeLit:
+		if nd.Type == x {
+			tok(tokType, nil)
+			return
+		}
+		tok(tokVariable, nil)
+	case *ast.RangeStmt:
+		if nd.Tok != token.DEFINE {
+			def = nil
+		}
+		tok(tokVariable, def)
+	case *ast.FuncDecl:
+		tok(tokFunction, def)
+	default:
+		msg := fmt.Sprintf("%T undexpected: %s %s%q", nd, x.Name, e.strStack(), e.srcLine(x))
+		e.unexpected(msg)
+	}
+}
+
 func isDeprecated(n *ast.CommentGroup) bool {
 	if n == nil {
 		return false
@@ -642,16 +786,17 @@
 		}
 		// and fall through for _
 	}
-	if d.Path.Value == "" {
+	val := d.Path.Value
+	if len(val) < 2 || val[0] != '"' || val[len(val)-1] != '"' {
+		// avoid panics on imports without a properly quoted string
 		return
 	}
-	nm := d.Path.Value[1 : len(d.Path.Value)-1] // trailing "
-	v := strings.LastIndex(nm, "/")
-	if v != -1 {
-		nm = nm[v+1:]
-	}
+	nm := val[1 : len(val)-1] // remove surrounding "s
+	nm = filepath.Base(nm)
+	// in import "lib/math", 'math' is the package name
 	start := d.Path.End() - token.Pos(1+len(nm))
 	e.token(start, len(nm), tokNamespace, nil)
+	// There may be more cases, as import strings are implementation defined.
 }
 
 // log unexpected state
diff --git a/internal/lsp/testdata/semantic/a.go b/internal/lsp/testdata/semantic/a.go
index 756c56e..54d6c8a 100644
--- a/internal/lsp/testdata/semantic/a.go
+++ b/internal/lsp/testdata/semantic/a.go
@@ -55,6 +55,8 @@
 	w := b[4:]
 	j := len(x)
 	j--
+	q := []interface{}{j, 23i, &y}
+	g(q...)
 	return true
 }
 
@@ -74,5 +76,6 @@
 	if !ok {
 		switch x := vv[0].(type) {
 		}
+		goto Never
 	}
 }
diff --git a/internal/lsp/testdata/semantic/a.go.golden b/internal/lsp/testdata/semantic/a.go.golden
index 512a83e..4bf70e5 100644
--- a/internal/lsp/testdata/semantic/a.go.golden
+++ b/internal/lsp/testdata/semantic/a.go.golden
@@ -39,7 +39,7 @@
 /*⇒4,keyword,[]*/func (/*⇒1,variable,[]*/a /*⇒1,operator,[]*/*/*⇒1,type,[]*/A) /*⇒1,member,[definition]*/f() /*⇒4,type,[defaultLibrary]*/bool {
 	/*⇒3,keyword,[]*/var /*⇒1,variable,[definition]*/z /*⇒6,type,[defaultLibrary]*/string
 	/*⇒1,variable,[definition]*/x /*⇒2,operator,[]*/:= /*⇒5,string,[]*/"foo"
-	/*⇒1,variable,[]*/a(/*⇒1,variable,[definition]*/x)
+	/*⇒1,variable,[]*/a(/*⇒1,variable,[]*/x)
 	/*⇒1,variable,[definition]*/y /*⇒2,operator,[]*/:= /*⇒5,string,[]*/"bar" /*⇒1,operator,[]*/+ /*⇒1,variable,[]*/x
 	/*⇒6,keyword,[]*/switch /*⇒1,variable,[]*/z {
 	/*⇒4,keyword,[]*/case /*⇒4,string,[]*/"xx":
@@ -52,18 +52,20 @@
 	/*⇒3,keyword,[]*/for /*⇒1,variable,[definition]*/k, /*⇒1,variable,[definition]*/v := /*⇒5,keyword,[]*/range /*⇒1,variable,[]*/m {
 		/*⇒6,keyword,[]*/return (/*⇒1,operator,[]*/!/*⇒1,variable,[]*/k) /*⇒2,operator,[]*/&& /*⇒1,variable,[]*/v[/*⇒1,number,[]*/0] /*⇒2,operator,[]*/== /*⇒3,variable,[readonly defaultLibrary]*/nil
 	}
-	/*⇒2,variable,[]*/c2 /*⇒2,operator,[]*/<- /*⇒1,type,[]*/A./*⇒1,variable,[definition]*/X
+	/*⇒2,variable,[]*/c2 /*⇒2,operator,[]*/<- /*⇒1,type,[]*/A./*⇒1,variable,[]*/X
 	/*⇒1,variable,[definition]*/w /*⇒2,operator,[]*/:= /*⇒1,variable,[]*/b[/*⇒1,number,[]*/4:]
 	/*⇒1,variable,[definition]*/j /*⇒2,operator,[]*/:= /*⇒3,function,[defaultLibrary]*/len(/*⇒1,variable,[]*/x)
 	/*⇒1,variable,[]*/j/*⇒2,operator,[]*/--
+	/*⇒1,variable,[definition]*/q /*⇒2,operator,[]*/:= []/*⇒9,keyword,[]*/interface{}{/*⇒1,variable,[]*/j, /*⇒3,number,[]*/23i, /*⇒1,operator,[]*/&/*⇒1,variable,[]*/y}
+	/*⇒1,function,[]*/g(/*⇒1,variable,[]*/q/*⇒3,operator,[]*/...)
 	/*⇒6,keyword,[]*/return /*⇒4,variable,[readonly]*/true
 }
 
 /*⇒4,keyword,[]*/func /*⇒1,function,[definition]*/g(/*⇒2,parameter,[definition]*/vv /*⇒3,operator,[]*/.../*⇒9,keyword,[]*/interface{}) {
 	/*⇒2,variable,[definition]*/ff /*⇒2,operator,[]*/:= /*⇒4,keyword,[]*/func() {}
 	/*⇒5,keyword,[]*/defer /*⇒2,variable,[]*/ff()
-	/*⇒2,keyword,[]*/go /*⇒3,namespace,[]*/utf./*⇒9,variable,[definition]*/RuneCount(/*⇒2,string,[]*/"")
-	/*⇒2,keyword,[]*/go /*⇒4,namespace,[]*/utf8./*⇒9,function,[]*/RuneCount(/*⇒2,variable,[]*/vv.(/*⇒6,variable,[definition]*/string))
+	/*⇒2,keyword,[]*/go /*⇒3,namespace,[]*/utf./*⇒9,function,[]*/RuneCount(/*⇒2,string,[]*/"")
+	/*⇒2,keyword,[]*/go /*⇒4,namespace,[]*/utf8./*⇒9,function,[]*/RuneCount(/*⇒2,variable,[]*/vv.(/*⇒6,type,[]*/string))
 	/*⇒2,keyword,[]*/if /*⇒4,variable,[readonly]*/true {
 	} /*⇒4,keyword,[]*/else {
 	}
@@ -75,6 +77,7 @@
 	/*⇒2,keyword,[]*/if /*⇒1,operator,[]*/!/*⇒2,variable,[]*/ok {
 		/*⇒6,keyword,[]*/switch /*⇒1,variable,[definition]*/x /*⇒2,operator,[]*/:= /*⇒2,variable,[]*/vv[/*⇒1,number,[]*/0].(/*⇒4,keyword,[]*/type) {
 		}
+		/*⇒4,keyword,[]*/goto Never
 	}
 }