internal/lsp/semantic.go: generate semantic tokens
This Cl adds all the support for producing semantic tokens.
vscode 1.49.1 (the current version) does not ask for semantic tokens;
vscode-insiders does.
There are no categories for statement labels, nor for constant
values of boolean types. (The former doesn't exist in Typescript, and
boolean isn't a type in Typescript.)
The CL after this one will provide for extensive testing.
Change-Id: I589f51d510b0e423c947f4d95f685e6ba696506d
Reviewed-on: https://go-review.googlesource.com/c/tools/+/258477
Run-TryBot: Peter Weinberger <pjw@google.com>
gopls-CI: kokoro <noreply+kokoro@google.com>
TryBot-Result: Go Bot <gobot@golang.org>
Reviewed-by: Robert Findley <rfindley@google.com>
Trust: Peter Weinberger <pjw@google.com>
diff --git a/internal/lsp/semantic.go b/internal/lsp/semantic.go
index 7d3b343..d3129f4 100644
--- a/internal/lsp/semantic.go
+++ b/internal/lsp/semantic.go
@@ -5,30 +5,25 @@
package lsp
import (
+ "bytes"
"context"
"fmt"
"go/ast"
+ "go/token"
"go/types"
"log"
+ "runtime"
"sort"
"strings"
"time"
- "golang.org/x/tools/internal/event"
"golang.org/x/tools/internal/lsp/protocol"
"golang.org/x/tools/internal/lsp/source"
errors "golang.org/x/xerrors"
)
func (s *Server) semanticTokensFull(ctx context.Context, p *protocol.SemanticTokensParams) (*protocol.SemanticTokens, error) {
- now := time.Now()
ret, err := s.computeSemanticTokens(ctx, p.TextDocument, nil)
- if ret != nil && err == nil {
- event.Log(ctx, fmt.Sprintf("Full(%v): %d items for %s in %s",
- s.session.Options().SemanticTokens, len(ret.Data)/5, p.TextDocument.URI.SpanURI().Filename(), time.Since(now)))
- } else {
- event.Error(ctx, fmt.Sprintf("Full failed for %s in %s", p.TextDocument.URI.SpanURI().Filename(), time.Since(now)), err)
- }
return ret, err
}
@@ -37,15 +32,7 @@
}
func (s *Server) semanticTokensRange(ctx context.Context, p *protocol.SemanticTokensRangeParams) (*protocol.SemanticTokens, error) {
- now := time.Now()
ret, err := s.computeSemanticTokens(ctx, p.TextDocument, &p.Range)
- if ret != nil && err == nil {
- event.Log(ctx, fmt.Sprintf("Range(%v): %d items for %s %s in %s",
- s.session.Options().SemanticTokens, len(ret.Data)/5, p.TextDocument.URI.SpanURI().Filename(),
- p.Range, time.Since(now)))
- } else {
- event.Error(ctx, "semanticTokensRange failed", err)
- }
return ret, err
}
@@ -63,10 +50,7 @@
if !ok {
return nil, err
}
- vv, err := s.session.ViewOf(td.URI.SpanURI())
- if err != nil {
- return nil, err
- }
+ vv := snapshot.View()
if !vv.Options().SemanticTokens {
// return an error, so if the option changes
// the client won't remember the wrong answer
@@ -81,10 +65,14 @@
if err != nil {
return nil, err
}
+ if pgf.ParseErr != nil {
+ return nil, pgf.ParseErr
+ }
e := &encoded{
- pgf: pgf,
- rng: rng,
- ti: info,
+ pgf: pgf,
+ rng: rng,
+ ti: info,
+ fset: snapshot.FileSet(),
}
if err := e.init(); err != nil {
return nil, err
@@ -103,36 +91,90 @@
func (e *encoded) semantics() {
f := e.pgf.File
+ e.token(f.Package, len("package"), tokKeyword, nil)
+ e.token(f.Name.NamePos, len(f.Name.Name), tokNamespace, nil)
inspect := func(n ast.Node) bool {
return e.inspector(n)
}
for _, d := range f.Decls {
// only look at the decls that overlap the range
start, end := d.Pos(), d.End()
- if int(end) <= e.start || int(start) >= e.end {
+ if end <= e.start || start >= e.end {
continue
}
ast.Inspect(d, inspect)
}
}
+type tokenType string
+
+const (
+ tokNamespace tokenType = "namespace"
+ tokType tokenType = "type"
+ tokInterface tokenType = "interface"
+ tokParameter tokenType = "parameter"
+ tokVariable tokenType = "variable"
+ tokMember tokenType = "member"
+ tokFunction tokenType = "function"
+ tokKeyword tokenType = "keyword"
+ tokComment tokenType = "comment"
+ tokString tokenType = "string"
+ tokNumber tokenType = "number"
+ tokOperator tokenType = "operator"
+)
+
+var lastPosition token.Position
+
+func (e *encoded) token(start token.Pos, leng int, typ tokenType, mods []string) {
+ if start == 0 {
+ // Temporary, pending comprehensive tests
+ log.Printf("SAW token.NoPos")
+ for i := 0; i < 6; i++ {
+ _, f, l, ok := runtime.Caller(i)
+ if !ok {
+ break
+ }
+ log.Printf("%d: %s:%d", i, f, l)
+ }
+ }
+ if start >= e.end || start+token.Pos(leng) <= e.start {
+ return
+ }
+ // want a line and column from start (in LSP coordinates)
+ rng := source.NewMappedRange(e.fset, e.pgf.Mapper, start, start+token.Pos(leng))
+ lspRange, err := rng.Range()
+ if err != nil {
+ log.Printf("failed to convert to range %v", err)
+ }
+ // token is all on one line
+ length := lspRange.End.Character - lspRange.Start.Character
+ e.add(lspRange.Start.Line, lspRange.Start.Character, length, typ, mods)
+}
+
+func (e *encoded) add(line, start float64, len float64, tok tokenType, mod []string) {
+ x := semItem{line, start, len, tok, mod}
+ e.items = append(e.items, x)
+}
+
// semItem represents a token found walking the parse tree
type semItem struct {
- line, start, len int
- typeStr string
- mods []string
+ line, start float64
+ len float64
+ typeStr tokenType
+ mods []string
}
type encoded struct {
// the generated data
items []semItem
- pgf *source.ParsedGoFile
- rng *protocol.Range
- ti *types.Info
+ pgf *source.ParsedGoFile
+ rng *protocol.Range
+ ti *types.Info
+ fset *token.FileSet
// allowed starting and ending token.Pos, set by init
// used to avoid looking at declarations not in range
- start, end int
+ start, end token.Pos
// path from the root of the parse tree, used for debugging
stack []ast.Node
}
@@ -153,7 +195,6 @@
}
func (e *encoded) inspector(n ast.Node) bool {
- // this will be filled in, in the next CL
pop := func() {
e.stack = e.stack[:len(e.stack)-1]
}
@@ -165,53 +206,128 @@
switch x := n.(type) {
case *ast.ArrayType:
case *ast.AssignStmt:
+ e.token(x.TokPos, len(x.Tok.String()), tokOperator, nil)
case *ast.BasicLit:
+ // if it extends across a line, skip it
+ // better would be to mark each line as string TODO(pjw)
+ if strings.Contains(x.Value, "\n") {
+ break
+ }
+ ln := len(x.Value)
+ what := tokNumber
+ if x.Kind == token.STRING {
+ what = tokString
+ if _, ok := e.stack[len(e.stack)-2].(*ast.Field); ok {
+ // struct tags (this is probably pointless, as the
+ // TextMate grammar will treat all the other comments the same)
+ what = tokComment
+ }
+ }
+ e.token(x.Pos(), ln, what, nil)
case *ast.BinaryExpr:
+ e.token(x.OpPos, len(x.Op.String()), tokOperator, nil)
case *ast.BlockStmt:
case *ast.BranchStmt:
+ e.token(x.TokPos, len(x.Tok.String()), tokKeyword, nil)
+ // There's no semantic encoding for labels
case *ast.CallExpr:
+ if x.Ellipsis != token.NoPos {
+ e.token(x.Ellipsis, len("..."), tokOperator, nil)
+ }
case *ast.CaseClause:
+ iam := "case"
+ if x.List == nil {
+ iam = "default"
+ }
+ e.token(x.Case, len(iam), tokKeyword, nil)
case *ast.ChanType:
+ // chan | chan <- | <- chan
+ if x.Arrow == token.NoPos || x.Arrow != x.Begin {
+ e.token(x.Begin, len("chan"), tokKeyword, nil)
+ break
+ }
+ pos := e.findKeyword("chan", x.Begin+2, x.Value.Pos())
+ e.token(pos, len("chan"), tokKeyword, nil)
case *ast.CommClause:
+ iam := len("case")
+ if x.Comm == nil {
+ iam = len("default")
+ }
+ e.token(x.Case, iam, tokKeyword, nil)
case *ast.CompositeLit:
case *ast.DeclStmt:
case *ast.DeferStmt:
+ e.token(x.Defer, len("defer"), tokKeyword, nil)
case *ast.Ellipsis:
+ e.token(x.Ellipsis, len("..."), tokOperator, nil)
case *ast.EmptyStmt:
case *ast.ExprStmt:
case *ast.Field:
case *ast.FieldList:
case *ast.ForStmt:
+ e.token(x.For, len("for"), tokKeyword, nil)
case *ast.FuncDecl:
case *ast.FuncLit:
case *ast.FuncType:
+ if x.Func != token.NoPos {
+ e.token(x.Func, len("func"), tokKeyword, nil)
+ }
case *ast.GenDecl:
+ e.token(x.TokPos, len(x.Tok.String()), tokKeyword, nil)
case *ast.GoStmt:
+ e.token(x.Go, len("go"), tokKeyword, nil)
case *ast.Ident:
+ e.ident(x)
case *ast.IfStmt:
+ e.token(x.If, len("if"), tokKeyword, nil)
+ if x.Else != nil {
+ // x.Body.End() or x.Body.End()+1, not that it matters
+ pos := e.findKeyword("else", x.Body.End(), x.Else.Pos())
+ e.token(pos, len("else"), tokKeyword, nil)
+ }
case *ast.ImportSpec:
+ e.importSpec(x)
pop()
return false
case *ast.IncDecStmt:
+ e.token(x.TokPos, len(x.Tok.String()), tokOperator, nil)
case *ast.IndexExpr:
case *ast.InterfaceType:
+ e.token(x.Interface, len("interface"), tokKeyword, nil)
case *ast.KeyValueExpr:
case *ast.LabeledStmt:
case *ast.MapType:
+ e.token(x.Map, len("map"), tokKeyword, nil)
case *ast.ParenExpr:
case *ast.RangeStmt:
+ e.token(x.For, len("for"), tokKeyword, nil)
+ // x.TokPos == token.NoPos should mean a syntax error
+ pos := e.findKeyword("range", x.TokPos, x.X.Pos())
+ e.token(pos, len("range"), tokKeyword, nil)
case *ast.ReturnStmt:
+ e.token(x.Return, len("return"), tokKeyword, nil)
case *ast.SelectStmt:
+ e.token(x.Select, len("select"), tokKeyword, nil)
case *ast.SelectorExpr:
case *ast.SendStmt:
+ e.token(x.Arrow, len("<-"), tokOperator, nil)
case *ast.SliceExpr:
case *ast.StarExpr:
+ e.token(x.Star, len("*"), tokOperator, nil)
case *ast.StructType:
+ e.token(x.Struct, len("struct"), tokKeyword, nil)
case *ast.SwitchStmt:
+ e.token(x.Switch, len("switch"), tokKeyword, nil)
case *ast.TypeAssertExpr:
+ if x.Type == nil {
+ pos := e.findKeyword("type", x.Lparen, x.Rparen)
+ e.token(pos, len("type"), tokKeyword, nil)
+ }
case *ast.TypeSpec:
case *ast.TypeSwitchStmt:
+ e.token(x.Switch, len("switch"), tokKeyword, nil)
case *ast.UnaryExpr:
+ e.token(x.OpPos, len(x.Op.String()), tokOperator, nil)
case *ast.ValueSpec:
// things we won't see
case *ast.BadDecl, *ast.BadExpr, *ast.BadStmt,
@@ -226,10 +342,142 @@
}
return true
}
+func (e *encoded) ident(x *ast.Ident) {
+ def := e.ti.Defs[x]
+ if def != nil {
+ what, mods := e.definitionFor(x)
+ if what != "" {
+ e.token(x.Pos(), len(x.String()), what, mods)
+ }
+ return
+ }
+ use := e.ti.Uses[x]
+ switch y := use.(type) {
+ case nil:
+ e.token(x.NamePos, len(x.Name), tokVariable, []string{"definition"})
+ case *types.Builtin:
+ e.token(x.NamePos, len(x.Name), tokFunction, []string{"defaultLibrary"})
+ case *types.Const:
+ mods := []string{"readonly"}
+ tt := y.Type()
+ if ttx, ok := tt.(*types.Basic); ok {
+ switch bi := ttx.Info(); {
+ case bi&types.IsNumeric != 0:
+ me := tokVariable
+ if x.String() == "iota" {
+ me = tokKeyword
+ mods = []string{}
+ }
+ e.token(x.Pos(), len(x.String()), me, mods)
+ case bi&types.IsString != 0:
+ e.token(x.Pos(), len(x.String()), tokString, mods)
+ case bi&types.IsBoolean != 0:
+ e.token(x.Pos(), len(x.Name), tokKeyword, nil)
+ case bi == 0:
+ // nothing to say
+ default:
+ // replace with panic after extensive testing
+ log.Printf("unexpected %x at %s", bi, e.pgf.Tok.PositionFor(x.Pos(), false))
+ }
+ break
+ }
+ if ttx, ok := tt.(*types.Named); ok {
+ if x.String() == "iota" {
+ log.Printf("ttx:%T", ttx)
+ }
+ if _, ok := ttx.Underlying().(*types.Basic); ok {
+ e.token(x.Pos(), len("nil"), tokVariable, mods)
+ break
+ }
+ // can this happen?
+ log.Printf("unexpectd %q/%T", x.String(), tt)
+ e.token(x.Pos(), len(x.String()), tokVariable, nil)
+ break
+ }
+ // can this happen? Don't think so
+ log.Printf("%s %T %#v", x.String(), tt, tt)
+ case *types.Func:
+ e.token(x.Pos(), len(x.Name), tokFunction, nil)
+ case *types.Label:
+ // nothing to map it to
+ case *types.Nil:
+ // nil is a predeclared identifier
+ e.token(x.Pos(), 3, tokKeyword, []string{"readonly"})
+ case *types.PkgName:
+ e.token(x.Pos(), len(x.Name), tokNamespace, nil)
+ case *types.TypeName:
+ e.token(x.Pos(), len(x.String()), tokType, nil)
+ case *types.Var:
+ e.token(x.Pos(), len(x.Name), tokVariable, nil)
+ default:
+ // replace with panic after extensive testing
+ if use == nil {
+ log.Printf("HOW did we get here? %#v/%#v %#v %#v", x, x.Obj, e.ti.Defs[x], e.ti.Uses[x])
+ }
+ if use.Type() != nil {
+ log.Printf("%s %T/%T,%#v", x.String(), use, use.Type(), use)
+ } else {
+ log.Printf("%s %T", x.String(), use)
+ }
+ }
+}
+
+func (e *encoded) definitionFor(x *ast.Ident) (tokenType, []string) {
+ mods := []string{"definition"}
+ for i := len(e.stack) - 1; i >= 0; i-- {
+ s := e.stack[i]
+ switch y := s.(type) {
+ case *ast.AssignStmt, *ast.RangeStmt:
+ if x.Name == "_" {
+ return "", nil // not really a variable
+ }
+ return "variable", mods
+ case *ast.GenDecl:
+ if y.Tok == token.CONST {
+ mods = append(mods, "readonly")
+ }
+ return tokVariable, mods
+ case *ast.FuncDecl:
+ // If x is immediately under a FuncDecl, it is a function or method
+ if i == len(e.stack)-2 {
+ if y.Recv != nil {
+ return tokMember, mods
+ }
+ return tokFunction, mods
+ }
+ // if x < ... < FieldList < FuncDecl, this is the receiver, a variable
+ if _, ok := e.stack[i+1].(*ast.FieldList); ok {
+ return tokVariable, nil
+ }
+ // if x < ... < FieldList < FuncType < FuncDecl, this is a param
+ return tokParameter, mods
+ case *ast.InterfaceType:
+ return tokMember, mods
+ case *ast.TypeSpec:
+ return tokType, mods
+ }
+ }
+ // panic after extensive testing
+ log.Printf("failed to find the decl for %s", e.pgf.Tok.PositionFor(x.Pos(), false))
+ return "", []string{""}
+}
+
+// findKeyword finds a keyword rather than guessing its location
+func (e *encoded) findKeyword(keyword string, start, end token.Pos) token.Pos {
+ offset := int(start) - e.pgf.Tok.Base()
+ last := int(end) - e.pgf.Tok.Base()
+ buf := e.pgf.Src
+ idx := bytes.Index(buf[offset:last], []byte(keyword))
+ if idx != -1 {
+ return start + token.Pos(idx)
+ }
+ // can't happen
+ return token.NoPos
+}
func (e *encoded) init() error {
- e.start = e.pgf.Tok.Base()
- e.end = e.start + e.pgf.Tok.Size()
+ e.start = token.Pos(e.pgf.Tok.Base())
+ e.end = e.start + token.Pos(e.pgf.Tok.Size())
if e.rng == nil {
return nil
}
@@ -237,8 +485,8 @@
if err != nil {
return errors.Errorf("range span error for %s", e.pgf.File.Name)
}
- e.end = e.start + span.End().Offset()
- e.start += span.Start().Offset()
+ e.end = e.start + token.Pos(span.End().Offset())
+ e.start += token.Pos(span.Start().Offset())
return nil
}
@@ -252,7 +500,7 @@
})
// each semantic token needs five values
// (see Integer Encoding for Tokens in the LSP spec)
- x := make([]int, 5*len(e.items))
+ x := make([]float64, 5*len(e.items))
for i := 0; i < len(e.items); i++ {
j := 5 * i
if i == 0 {
@@ -265,42 +513,82 @@
x[j+1] = e.items[i].start - e.items[i-1].start
}
x[j+2] = e.items[i].len
- x[j+3] = SemanticMemo.typeMap[e.items[i].typeStr]
+ x[j+3] = float64(SemanticMemo.TypeMap[e.items[i].typeStr])
+ mask := 0
for _, s := range e.items[i].mods {
- x[j+4] |= SemanticMemo.modMap[s]
+ mask |= SemanticMemo.ModMap[s]
}
+ x[j+4] = float64(mask)
}
- // As the client never sends these, we could fix the generated
- // type to want []int rather than []float64.
- ans := make([]float64, len(x))
- for i, y := range x {
- ans[i] = float64(y)
+ return x, nil
+}
+
+func (e *encoded) importSpec(d *ast.ImportSpec) {
+ // a local package name or the last component of the Path
+ if d.Name != nil {
+ nm := d.Name.String()
+ // import . x => x is not a namespace
+ // import _ x => x is a namespace
+ if nm != "_" && nm != "." {
+ e.token(d.Name.Pos(), len(nm), tokNamespace, nil)
+ return
+ }
+ if nm == "." {
+ return
+ }
+ // and fall through for _
}
- return ans, nil
+ nm := d.Path.Value[1 : len(d.Path.Value)-1] // trailing "
+ v := strings.LastIndex(nm, "/")
+ if v != -1 {
+ nm = nm[v+1:]
+ }
+ start := d.Path.End() - token.Pos(1+len(nm))
+ e.token(start, len(nm), tokNamespace, nil)
}
// SemMemo supports semantic token translations between numbers and strings
type SemMemo struct {
tokTypes, tokMods []string
- typeMap map[string]int
- modMap map[string]int
+ // these exported fields are used in the 'gopls semtok' command
+ TypeMap map[tokenType]int
+ ModMap map[string]int
}
var SemanticMemo *SemMemo
+// Type returns a string equivalent of the type, for gopls semtok
+func (m *SemMemo) Type(n int) string {
+ if n >= 0 && n < len(m.tokTypes) {
+ return m.tokTypes[n]
+ }
+ return fmt.Sprintf("?%d[%d,%d]?", n, len(m.tokTypes), len(m.tokMods))
+}
+
+// Mods returns the []string equivalent of the mods, for gopls semtok.
+func (m *SemMemo) Mods(n int) []string {
+ mods := []string{}
+ for i := 0; i < len(m.tokMods); i++ {
+ if (n & (1 << uint(i))) != 0 {
+ mods = append(mods, m.tokMods[i])
+ }
+ }
+ return mods
+}
+
// save what the client sent
func rememberToks(toks []string, mods []string) ([]string, []string) {
SemanticMemo = &SemMemo{
tokTypes: toks,
tokMods: mods,
- typeMap: make(map[string]int),
- modMap: make(map[string]int),
+ TypeMap: make(map[tokenType]int),
+ ModMap: make(map[string]int),
}
for i, t := range toks {
- SemanticMemo.typeMap[t] = i
+ SemanticMemo.TypeMap[tokenType(t)] = i
}
for i, m := range mods {
- SemanticMemo.modMap[m] = 1 << uint(i)
+ SemanticMemo.ModMap[m] = 1 << uint(i)
}
// we could have pruned or rearranged them.
// But then change the list in cmd.go too