internal/lsp: add gopls semtok command and semantic token tests

The command 'gopls semtok <file>.go' produces a version of <file>.go
decorated with comments describing tokens as semantic tokens. The
format is described towards the top of cmd/semantictokens.go.

There are also tests in testdata/semantic/. a.go is used by lsp_test.go
(with -cover) to show that essentially all of semantic.go is executed.
b.go illustrates the handling of strange, but legal, constructs
like 'const false = nil and non-ascii'. The .golden files show
the expected results.

The implementation will panic, rather than log, on unexpected state.

gopls semtok has been run on all the .go files in x/tools and in
the source distribution. There were no unexpected messages nor panics.

Change-Id: I11715bcc8d6830d1951eb58239978c4a363bbc30
Reviewed-on: https://go-review.googlesource.com/c/tools/+/262198
Run-TryBot: Peter Weinberger <pjw@google.com>
gopls-CI: kokoro <noreply+kokoro@google.com>
TryBot-Result: Go Bot <gobot@golang.org>
Trust: Peter Weinberger <pjw@google.com>
Reviewed-by: Robert Findley <rfindley@google.com>
diff --git a/internal/lsp/cmd/cmd.go b/internal/lsp/cmd/cmd.go
index c9f038f..7225ae8 100644
--- a/internal/lsp/cmd/cmd.go
+++ b/internal/lsp/cmd/cmd.go
@@ -189,6 +189,7 @@
 		&prepareRename{app: app},
 		&references{app: app},
 		&rename{app: app},
+		&semtok{app: app},
 		&signature{app: app},
 		&suggestedFix{app: app},
 		&symbols{app: app},
@@ -280,6 +281,12 @@
 		ContentFormat: []protocol.MarkupKind{opts.PreferredContentFormat},
 	}
 	params.Capabilities.TextDocument.DocumentSymbol.HierarchicalDocumentSymbolSupport = opts.HierarchicalDocumentSymbolSupport
+	params.Capabilities.TextDocument.SemanticTokens = &protocol.SemanticTokensClientCapabilities{}
+	params.Capabilities.TextDocument.SemanticTokens.Formats = []string{"relative"}
+	params.Capabilities.TextDocument.SemanticTokens.Requests.Range = true
+	params.Capabilities.TextDocument.SemanticTokens.Requests.Full.Delta = true
+	params.Capabilities.TextDocument.SemanticTokens.TokenTypes = lsp.SemanticTypes()
+	params.Capabilities.TextDocument.SemanticTokens.TokenModifiers = lsp.SemanticModifiers()
 	params.InitializationOptions = map[string]interface{}{
 		"symbolMatcher": matcherString[opts.SymbolMatcher],
 	}
@@ -495,6 +502,19 @@
 	return file
 }
 
+func (c *connection) semanticTokens(ctx context.Context, file span.URI) (*protocol.SemanticTokens, error) {
+	p := &protocol.SemanticTokensParams{
+		TextDocument: protocol.TextDocumentIdentifier{
+			URI: protocol.URIFromSpanURI(file),
+		},
+	}
+	resp, err := c.Server.SemanticTokensFull(ctx, p)
+	if err != nil {
+		return nil, err
+	}
+	return resp, nil
+}
+
 func (c *connection) diagnoseFiles(ctx context.Context, files []span.URI) error {
 	var untypedFiles []interface{}
 	for _, file := range files {
diff --git a/internal/lsp/cmd/semantictokens.go b/internal/lsp/cmd/semantictokens.go
new file mode 100644
index 0000000..93f2cdb
--- /dev/null
+++ b/internal/lsp/cmd/semantictokens.go
@@ -0,0 +1,228 @@
+// Copyright 2020 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package cmd
+
+import (
+	"bytes"
+	"context"
+	"flag"
+	"fmt"
+	"go/parser"
+	"go/token"
+	"io/ioutil"
+	"log"
+	"os"
+	"runtime"
+	"unicode/utf8"
+
+	"golang.org/x/tools/internal/lsp"
+	"golang.org/x/tools/internal/lsp/protocol"
+	"golang.org/x/tools/internal/lsp/source"
+	"golang.org/x/tools/internal/span"
+)
+
+// generate semantic tokens and interpolate them in the file
+
+// The output is the input file decorated with comments showing the
+// syntactic tokens. The comments are stylized:
+//   /*<arrow><length>,<token type>,[<modifiers]*/
+// For most occurrences, the comment comes just before the token it
+// describes, and arrow is a right arrow. If the token is inside a string
+// the comment comes just after the string, and the arrow is a left arrow.
+// <length> is the length of the token in runes, <token type> is one
+// of the supported semantic token types, and <modifiers. is a
+// (possibly empty) list of token type modifiers.
+
+// There are 3 coordinate systems for lines and character offsets in lines
+// LSP (what's returned from semanticTokens()):
+//    0-based: the first line is line 0, the first character of a line
+//      is character 0, and characters are counted as UTF-16 code points
+// gopls (and Go error messages):
+//    1-based: the first line is line1, the first chararcter of a line
+//      is character 0, and characters are counted as bytes
+// internal (as used in marks, and lines:=bytes.Split(buf, '\n'))
+//    0-based: lines and character positions are 1 less than in
+//      the gopls coordinate system
+
+type semtok struct {
+	app *Application
+}
+
+var colmap *protocol.ColumnMapper
+
+func (c *semtok) Name() string      { return "semtok" }
+func (c *semtok) Usage() string     { return "<filename>" }
+func (c *semtok) ShortHelp() string { return "show semantic tokens for the specified file" }
+func (c *semtok) DetailedHelp(f *flag.FlagSet) {
+	for i := 1; ; i++ {
+		_, f, l, ok := runtime.Caller(i)
+		if !ok {
+			break
+		}
+		log.Printf("%d: %s:%d", i, f, l)
+	}
+	fmt.Fprint(f.Output(), `
+Example: show the semantic tokens for this file:
+
+  $ gopls semtok internal/lsp/cmd/semtok.go
+
+	gopls semtok flags are:
+`)
+	f.PrintDefaults()
+}
+
+// Run performs the semtok on the files specified by args and prints the
+// results to stdout. PJW: fix this description
+func (c *semtok) Run(ctx context.Context, args ...string) error {
+	log.SetFlags(log.Lshortfile)
+	if len(args) != 1 {
+		return fmt.Errorf("expected one file name, got %d", len(args))
+	}
+	// perhaps simpler if app had just had a FlagSet member
+	origOptions := c.app.options
+	c.app.options = func(opts *source.Options) {
+		origOptions(opts)
+		opts.SemanticTokens = true
+	}
+	conn, err := c.app.connect(ctx)
+	if err != nil {
+		return err
+	}
+	defer conn.terminate(ctx)
+	uri := span.URIFromPath(args[0])
+	file := conn.AddFile(ctx, uri)
+	if file.err != nil {
+		return file.err
+	}
+
+	resp, err := conn.semanticTokens(ctx, uri)
+	if err != nil {
+		return err
+	}
+	buf, err := ioutil.ReadFile(args[0])
+	if err != nil {
+		log.Fatal(err)
+	}
+	fset := token.NewFileSet()
+	f, err := parser.ParseFile(fset, args[0], buf, 0)
+	if err != nil {
+		log.Printf("parsing %s failed %v", args[0], err)
+		return err
+	}
+	tok := fset.File(f.Pos())
+	if tok == nil {
+		// can't happen; just parsed this file
+		log.Printf("tok is nil!")
+		return fmt.Errorf("can't find %s in fset!", args[0])
+	}
+	tc := span.NewContentConverter(args[0], buf)
+	colmap = &protocol.ColumnMapper{
+		URI:       span.URI(args[0]),
+		Content:   buf,
+		Converter: tc,
+	}
+	memo = lsp.SemanticMemo
+	err = decorate(file.uri.Filename(), resp.Data)
+	if err != nil {
+		return err
+	}
+	return nil
+}
+
+var memo *lsp.SemMemo
+
+type mark struct {
+	line, offset int // 1-based, from RangeSpan
+	len          int // bytes, not runes
+	typ          string
+	mods         []string
+}
+
+// prefixes for semantic token comments
+const (
+	SemanticLeft  = "/*⇐"
+	SemanticRight = "/*⇒"
+)
+
+func markLine(m mark, lines [][]byte) {
+	l := lines[m.line-1] // mx is 1-based
+	length := utf8.RuneCount(l[m.offset-1 : m.offset-1+m.len])
+	splitAt := m.offset - 1
+	insert := ""
+	if m.typ == "namespace" && m.offset-1+m.len < len(l) && l[m.offset-1+m.len] == '"' {
+		// it is the last component of an import spec
+		// cannot put a comment inside a string
+		insert = fmt.Sprintf("%s%d,namespace,[]*/", SemanticLeft, length)
+		splitAt = m.offset + m.len
+	} else {
+		insert = fmt.Sprintf("%s%d,%s,%v*/", SemanticRight, length, m.typ, m.mods)
+	}
+	x := append([]byte(insert), l[splitAt:]...)
+	l = append(l[:splitAt], x...)
+	lines[m.line-1] = l
+}
+
+func decorate(file string, result []float64) error {
+	buf, err := ioutil.ReadFile(file)
+	if err != nil {
+		return err
+	}
+	marks := newMarks(result)
+	if len(marks) == 0 {
+		return nil
+	}
+	lines := bytes.Split(buf, []byte{'\n'})
+	for i := len(marks) - 1; i >= 0; i-- {
+		mx := marks[i]
+		markLine(mx, lines)
+	}
+	os.Stdout.Write(bytes.Join(lines, []byte{'\n'}))
+	return nil
+}
+
+func newMarks(d []float64) []mark {
+	ans := []mark{}
+	// the following two loops could be merged, at the cost
+	// of making the logic slightly more complicated to understand
+	// first, convert from deltas to absolute, in LSP coordinates
+	lspLine := make([]float64, len(d)/5)
+	lspChar := make([]float64, len(d)/5)
+	line, char := 0.0, 0.0
+	for i := 0; 5*i < len(d); i++ {
+		lspLine[i] = line + d[5*i+0]
+		if d[5*i+0] > 0 {
+			char = 0
+		}
+		lspChar[i] = char + d[5*i+1]
+		char = lspChar[i]
+		line = lspLine[i]
+	}
+	// second, convert to gopls coordinates
+	for i := 0; 5*i < len(d); i++ {
+		pr := protocol.Range{
+			Start: protocol.Position{
+				Line:      lspLine[i],
+				Character: lspChar[i],
+			},
+			End: protocol.Position{
+				Line:      lspLine[i],
+				Character: lspChar[i] + d[5*i+2],
+			},
+		}
+		spn, err := colmap.RangeSpan(pr)
+		if err != nil {
+			log.Fatal(err)
+		}
+		m := mark{
+			line:   spn.Start().Line(),
+			offset: spn.Start().Column(),
+			len:    spn.End().Column() - spn.Start().Column(),
+			typ:    memo.Type(int(d[5*i+3])),
+			mods:   memo.Mods(int(d[5*i+4])),
+		}
+		ans = append(ans, m)
+	}
+	return ans
+}
diff --git a/internal/lsp/cmd/test/semanticdriver.go b/internal/lsp/cmd/test/semanticdriver.go
new file mode 100644
index 0000000..80dc61e
--- /dev/null
+++ b/internal/lsp/cmd/test/semanticdriver.go
@@ -0,0 +1,34 @@
+// Copyright 2020 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package cmdtest
+
+import (
+	"strings"
+	"testing"
+
+	"golang.org/x/tools/internal/span"
+)
+
+func (r *runner) SemanticTokens(t *testing.T, spn span.Span) {
+	uri := spn.URI()
+	filename := uri.Filename()
+	got, stderr := r.NormalizeGoplsCmd(t, "semtok", filename)
+	if stderr != "" {
+		t.Fatalf("%s: %q", filename, stderr)
+	}
+	want := string(r.data.Golden("semantic", filename, func() ([]byte, error) {
+		return []byte(got), nil
+	}))
+	if want != got {
+		lwant := strings.Split(want, "\n")
+		lgot := strings.Split(got, "\n")
+		t.Errorf("want(%d-%d) != got(%d-%d) for %s", len(want), len(lwant), len(got), len(lgot), r.Normalize(filename))
+		for i := 0; i < len(lwant) && i < len(lgot); i++ {
+			if lwant[i] != lgot[i] {
+				t.Errorf("line %d:\nwant%q\ngot %q\n", i, lwant[i], lgot[i])
+			}
+		}
+	}
+}
diff --git a/internal/lsp/lsp_test.go b/internal/lsp/lsp_test.go
index 56e0412..b24b436 100644
--- a/internal/lsp/lsp_test.go
+++ b/internal/lsp/lsp_test.go
@@ -390,6 +390,41 @@
 	}
 }
 
+func (r *runner) SemanticTokens(t *testing.T, spn span.Span) {
+	// no client, so use default
+	rememberToks(SemanticTypes(), SemanticModifiers())
+	uri := spn.URI()
+	filename := uri.Filename()
+	// this is called solely for coverage in semantic.go
+	_, err := r.server.semanticTokensFull(r.ctx, &protocol.SemanticTokensParams{
+		TextDocument: protocol.TextDocumentIdentifier{
+			URI: protocol.URIFromSpanURI(uri),
+		},
+	})
+	if err != nil {
+		t.Errorf("%v for %s", err, filename)
+	}
+	_, err = r.server.semanticTokensRange(r.ctx, &protocol.SemanticTokensRangeParams{
+		TextDocument: protocol.TextDocumentIdentifier{
+			URI: protocol.URIFromSpanURI(uri),
+		},
+		// any legal range. Just to exercise the call.
+		Range: protocol.Range{
+			Start: protocol.Position{
+				Line:      0,
+				Character: 0,
+			},
+			End: protocol.Position{
+				Line:      2,
+				Character: 0,
+			},
+		},
+	})
+	if err != nil {
+		t.Errorf("%v for Range %s", err, filename)
+	}
+}
+
 func (r *runner) Import(t *testing.T, spn span.Span) {
 	uri := spn.URI()
 	filename := uri.Filename()
diff --git a/internal/lsp/semantic.go b/internal/lsp/semantic.go
index d3129f4..f883a67 100644
--- a/internal/lsp/semantic.go
+++ b/internal/lsp/semantic.go
@@ -12,11 +12,11 @@
 	"go/token"
 	"go/types"
 	"log"
-	"runtime"
 	"sort"
 	"strings"
 	"time"
 
+	"golang.org/x/tools/internal/event"
 	"golang.org/x/tools/internal/lsp/protocol"
 	"golang.org/x/tools/internal/lsp/source"
 	errors "golang.org/x/xerrors"
@@ -69,6 +69,7 @@
 		return nil, pgf.ParseErr
 	}
 	e := &encoded{
+		ctx:  ctx,
 		pgf:  pgf,
 		rng:  rng,
 		ti:   info,
@@ -127,24 +128,29 @@
 
 func (e *encoded) token(start token.Pos, leng int, typ tokenType, mods []string) {
 	if start == 0 {
-		// Temporary, pending comprehensive tests
-		log.Printf("SAW token.NoPos")
-		for i := 0; i < 6; i++ {
-			_, f, l, ok := runtime.Caller(i)
-			if !ok {
-				break
-			}
-			log.Printf("%d: %s:%d", i, f, l)
-		}
+		e.unexpected("token at token.NoPos")
 	}
 	if start >= e.end || start+token.Pos(leng) <= e.start {
 		return
 	}
 	// want a line and column from start (in LSP coordinates)
+	// [//line directives should be ignored]
 	rng := source.NewMappedRange(e.fset, e.pgf.Mapper, start, start+token.Pos(leng))
 	lspRange, err := rng.Range()
 	if err != nil {
-		log.Printf("failed to convert to range %v", err)
+		// possibly a //line directive. TODO(pjw): fix this somehow
+		// "column mapper is for file...instead of..."
+		// "line is beyond end of file..."
+		// see line 116 of internal/span/token.go which uses Position not PositionFor
+		event.Error(e.ctx, "failed to convert to range", err)
+		return
+	}
+	if lspRange.End.Line != lspRange.Start.Line {
+		// abrupt end of file, without \n. TODO(pjw): fix?
+		pos := e.fset.PositionFor(start, false)
+		msg := fmt.Sprintf("token at %s:%d.%d overflows", pos.Filename, pos.Line, pos.Column)
+		event.Log(e.ctx, msg)
+		return
 	}
 	// token is all on one line
 	length := lspRange.End.Character - lspRange.Start.Character
@@ -168,6 +174,7 @@
 	// the generated data
 	items []semItem
 
+	ctx  context.Context
 	pgf  *source.ParsedGoFile
 	rng  *protocol.Range
 	ti   *types.Info
@@ -301,8 +308,12 @@
 	case *ast.ParenExpr:
 	case *ast.RangeStmt:
 		e.token(x.For, len("for"), tokKeyword, nil)
-		// x.TokPos == token.NoPos should mean a syntax error
-		pos := e.findKeyword("range", x.TokPos, x.X.Pos())
+		// x.TokPos == token.NoPos is legal (for range foo {})
+		offset := x.TokPos
+		if offset == token.NoPos {
+			offset = x.For
+		}
+		pos := e.findKeyword("range", offset, x.X.Pos())
 		e.token(pos, len("range"), tokKeyword, nil)
 	case *ast.ReturnStmt:
 		e.token(x.Return, len("return"), tokKeyword, nil)
@@ -338,7 +349,7 @@
 		pop()
 		return false
 	default: // just to be super safe.
-		panic(fmt.Sprintf("failed to implement %T", x))
+		e.unexpected(fmt.Sprintf("failed to implement %T", x))
 	}
 	return true
 }
@@ -374,35 +385,32 @@
 			case bi&types.IsBoolean != 0:
 				e.token(x.Pos(), len(x.Name), tokKeyword, nil)
 			case bi == 0:
-				// nothing to say
+				e.token(x.Pos(), len(x.String()), tokVariable, mods)
 			default:
-				// replace with panic after extensive testing
-				log.Printf("unexpected %x at %s", bi, e.pgf.Tok.PositionFor(x.Pos(), false))
+				msg := fmt.Sprintf("unexpected %x at %s", bi, e.pgf.Tok.PositionFor(x.Pos(), false))
+				e.unexpected(msg)
 			}
 			break
 		}
 		if ttx, ok := tt.(*types.Named); ok {
 			if x.String() == "iota" {
-				log.Printf("ttx:%T", ttx)
+				e.unexpected(fmt.Sprintf("iota:%T", ttx))
 			}
 			if _, ok := ttx.Underlying().(*types.Basic); ok {
-				e.token(x.Pos(), len("nil"), tokVariable, mods)
+				e.token(x.Pos(), len(x.String()), tokVariable, mods)
 				break
 			}
-			// can this happen?
-			log.Printf("unexpectd %q/%T", x.String(), tt)
-			e.token(x.Pos(), len(x.String()), tokVariable, nil)
-			break
+			e.unexpected(fmt.Sprintf("%q/%T", x.String(), tt))
 		}
 		// can this happen? Don't think so
-		log.Printf("%s %T %#v", x.String(), tt, tt)
+		e.unexpected(fmt.Sprintf("%s %T %#v", x.String(), tt, tt))
 	case *types.Func:
 		e.token(x.Pos(), len(x.Name), tokFunction, nil)
 	case *types.Label:
 		// nothing to map it to
 	case *types.Nil:
 		// nil is a predeclared identifier
-		e.token(x.Pos(), 3, tokKeyword, []string{"readonly"})
+		e.token(x.Pos(), len("nil"), tokKeyword, []string{"readonly"})
 	case *types.PkgName:
 		e.token(x.Pos(), len(x.Name), tokNamespace, nil)
 	case *types.TypeName:
@@ -412,12 +420,13 @@
 	default:
 		// replace with panic after extensive testing
 		if use == nil {
-			log.Printf("HOW did we get here? %#v/%#v %#v %#v", x, x.Obj, e.ti.Defs[x], e.ti.Uses[x])
+			msg := fmt.Sprintf("%#v/%#v %#v %#v", x, x.Obj, e.ti.Defs[x], e.ti.Uses[x])
+			e.unexpected(msg)
 		}
 		if use.Type() != nil {
-			log.Printf("%s %T/%T,%#v", x.String(), use, use.Type(), use)
+			e.unexpected(fmt.Sprintf("%s %T/%T,%#v", x.String(), use, use.Type(), use))
 		} else {
-			log.Printf("%s %T", x.String(), use)
+			e.unexpected(fmt.Sprintf("%s %T", x.String(), use))
 		}
 	}
 }
@@ -458,7 +467,8 @@
 		}
 	}
 	// panic after extensive testing
-	log.Printf("failed to find the decl for %s", e.pgf.Tok.PositionFor(x.Pos(), false))
+	msg := fmt.Sprintf("failed to find the decl for %s", e.pgf.Tok.PositionFor(x.Pos(), false))
+	e.unexpected(msg)
 	return "", []string{""}
 }
 
@@ -472,6 +482,7 @@
 		return start + token.Pos(idx)
 	}
 	// can't happen
+	e.unexpected(fmt.Sprintf("not found:%s %v", keyword, e.fset.PositionFor(start, false)))
 	return token.NoPos
 }
 
@@ -547,6 +558,13 @@
 	e.token(start, len(nm), tokNamespace, nil)
 }
 
+// panic on unexpected state
+func (e *encoded) unexpected(msg string) {
+	log.Print(msg)
+	log.Print(e.strStack())
+	panic(msg)
+}
+
 // SemMemo supports semantic token translations between numbers and strings
 type SemMemo struct {
 	tokTypes, tokMods []string
@@ -594,3 +612,24 @@
 	// But then change the list in cmd.go too
 	return SemanticMemo.tokTypes, SemanticMemo.tokMods
 }
+
+// SemanticTypes to use in case there is no client, as in the command line, or tests
+func SemanticTypes() []string {
+	return semanticTypes[:]
+}
+
+// SemanticModifiers to use in case there is no client.
+func SemanticModifiers() []string {
+	return semanticModifiers[:]
+}
+
+var (
+	semanticTypes = [...]string{
+		"namespace", "type", "class", "enum", "interface",
+		"struct", "typeParameter", "parameter", "variable", "property", "enumMember",
+		"event", "function", "member", "macro", "keyword", "modifier", "comment",
+		"string", "number", "regexp", "operator"}
+	semanticModifiers = [...]string{
+		"declaration", "definition", "readonly", "static",
+		"deprecated", "abstract", "async", "modification", "documentation", "defaultLibrary"}
+)
diff --git a/internal/lsp/source/source_test.go b/internal/lsp/source/source_test.go
index 17046c3..ada58b5 100644
--- a/internal/lsp/source/source_test.go
+++ b/internal/lsp/source/source_test.go
@@ -500,6 +500,10 @@
 	}
 }
 
+func (r *runner) SemanticTokens(t *testing.T, spn span.Span) {
+	t.Skip("nothing to test in source")
+}
+
 func (r *runner) Import(t *testing.T, spn span.Span) {
 	fh, err := r.snapshot.GetFile(r.ctx, spn.URI())
 	if err != nil {
diff --git a/internal/lsp/testdata/semantic/a.go b/internal/lsp/testdata/semantic/a.go
new file mode 100644
index 0000000..a8c7d99
--- /dev/null
+++ b/internal/lsp/testdata/semantic/a.go
@@ -0,0 +1,78 @@
+package semantictokens
+
+import (
+	_ "encoding/utf8"
+	utf "encoding/utf8"
+	"fmt" //@ semantic("fmt")
+	. "fmt"
+	"unicode/utf8"
+)
+
+var (
+	a           = fmt.Print
+	b  []string = []string{"foo"}
+	c1 chan int
+	c2 <-chan int
+	c3 = make([]chan<- int)
+	b  = A{X: 23}
+	m  map[bool][3]*float64
+)
+
+const (
+	xx F = iota
+	yy   = xx + 3
+	zz   = ""
+	ww   = "not " + zz
+)
+
+type A struct {
+	X int `foof`
+}
+type B interface {
+	A
+	sad(int) bool
+}
+
+type F int
+
+func (a *A) f() bool {
+	var z string
+	x := "foo"
+	a(x)
+	y := "bar" + x
+	switch z {
+	case "xx":
+	default:
+	}
+	select {
+	case z := <-c3[0]:
+	default:
+	}
+	for k, v := range m {
+		return (!k) && v[0] == nil
+	}
+	c2 <- A.X
+	w := b[4:]
+	j := len(x)
+	j--
+	return true
+}
+
+func g(vv ...interface{}) {
+	ff := func() {}
+	defer ff()
+	go utf.RuneCount("")
+	go utf8.RuneCount(vv.(string))
+	if true {
+	} else {
+	}
+Never:
+	for i := 0; i < 10; {
+		break Never
+	}
+	_, ok := vv[0].(A)
+	if !ok {
+		switch x := vv[0].(type) {
+		}
+	}
+}
diff --git a/internal/lsp/testdata/semantic/a.go.golden b/internal/lsp/testdata/semantic/a.go.golden
new file mode 100644
index 0000000..6721726
--- /dev/null
+++ b/internal/lsp/testdata/semantic/a.go.golden
@@ -0,0 +1,80 @@
+-- semantic --
+/*⇒7,keyword,[]*/package /*⇒14,namespace,[]*/semantictokens
+
+/*⇒6,keyword,[]*/import (
+	_ "encoding/utf8"/*⇐4,namespace,[]*/
+	/*⇒3,namespace,[]*/utf "encoding/utf8"
+	"fmt"/*⇐3,namespace,[]*/ //@ semantic("fmt")
+	. "fmt"
+	"unicode/utf8"/*⇐4,namespace,[]*/
+)
+
+/*⇒3,keyword,[]*/var (
+	/*⇒1,variable,[definition]*/a           = /*⇒3,namespace,[]*/fmt./*⇒5,function,[]*/Print
+	/*⇒1,variable,[definition]*/b  []/*⇒6,type,[]*/string = []/*⇒6,type,[]*/string{/*⇒5,string,[]*/"foo"}
+	/*⇒2,variable,[definition]*/c1 /*⇒4,keyword,[]*/chan /*⇒3,type,[]*/int
+	/*⇒2,variable,[definition]*/c2 <-/*⇒4,keyword,[]*/chan /*⇒3,type,[]*/int
+	/*⇒2,variable,[definition]*/c3 = /*⇒4,function,[defaultLibrary]*/make([]/*⇒4,keyword,[]*/chan<- /*⇒3,type,[]*/int)
+	/*⇒1,variable,[definition]*/b  = /*⇒1,type,[]*/A{/*⇒1,variable,[]*/X: /*⇒2,number,[]*/23}
+	/*⇒1,variable,[definition]*/m  /*⇒3,keyword,[]*/map[/*⇒4,type,[]*/bool][/*⇒1,number,[]*/3]/*⇒1,operator,[]*/*/*⇒7,type,[]*/float64
+)
+
+/*⇒5,keyword,[]*/const (
+	/*⇒2,variable,[definition readonly]*/xx /*⇒1,type,[]*/F = /*⇒4,keyword,[]*/iota
+	/*⇒2,variable,[definition readonly]*/yy   = /*⇒2,variable,[readonly]*/xx /*⇒1,operator,[]*/+ /*⇒1,number,[]*/3
+	/*⇒2,variable,[definition readonly]*/zz   = /*⇒2,string,[]*/""
+	/*⇒2,variable,[definition readonly]*/ww   = /*⇒6,string,[]*/"not " /*⇒1,operator,[]*/+ /*⇒2,string,[readonly]*/zz
+)
+
+/*⇒4,keyword,[]*/type /*⇒1,type,[definition]*/A /*⇒6,keyword,[]*/struct {
+	/*⇒1,type,[definition]*/X /*⇒3,type,[]*/int /*⇒6,comment,[]*/`foof`
+}
+/*⇒4,keyword,[]*/type /*⇒1,type,[definition]*/B /*⇒9,keyword,[]*/interface {
+	/*⇒1,type,[]*/A
+	/*⇒3,member,[definition]*/sad(/*⇒3,type,[]*/int) /*⇒4,type,[]*/bool
+}
+
+/*⇒4,keyword,[]*/type /*⇒1,type,[definition]*/F /*⇒3,type,[]*/int
+
+/*⇒4,keyword,[]*/func (/*⇒1,variable,[]*/a /*⇒1,operator,[]*/*/*⇒1,type,[]*/A) /*⇒1,member,[definition]*/f() /*⇒4,type,[]*/bool {
+	/*⇒3,keyword,[]*/var /*⇒1,variable,[definition]*/z /*⇒6,type,[]*/string
+	/*⇒1,variable,[definition]*/x /*⇒2,operator,[]*/:= /*⇒5,string,[]*/"foo"
+	/*⇒1,variable,[]*/a(/*⇒1,variable,[definition]*/x)
+	/*⇒1,variable,[definition]*/y /*⇒2,operator,[]*/:= /*⇒5,string,[]*/"bar" /*⇒1,operator,[]*/+ /*⇒1,variable,[]*/x
+	/*⇒6,keyword,[]*/switch /*⇒1,variable,[]*/z {
+	/*⇒4,keyword,[]*/case /*⇒4,string,[]*/"xx":
+	/*⇒7,keyword,[]*/default:
+	}
+	/*⇒6,keyword,[]*/select {
+	/*⇒4,keyword,[]*/case /*⇒1,variable,[definition]*/z /*⇒2,operator,[]*/:= /*⇒2,operator,[]*/<-/*⇒2,variable,[]*/c3[/*⇒1,number,[]*/0]:
+	/*⇒7,keyword,[]*/default:
+	}
+	/*⇒3,keyword,[]*/for /*⇒1,variable,[definition]*/k, /*⇒1,variable,[definition]*/v := /*⇒5,keyword,[]*/range /*⇒1,variable,[]*/m {
+		/*⇒6,keyword,[]*/return (/*⇒1,operator,[]*/!/*⇒1,variable,[]*/k) /*⇒2,operator,[]*/&& /*⇒1,variable,[]*/v[/*⇒1,number,[]*/0] /*⇒2,operator,[]*/== /*⇒3,keyword,[readonly]*/nil
+	}
+	/*⇒2,variable,[]*/c2 /*⇒2,operator,[]*/<- /*⇒1,type,[]*/A./*⇒1,variable,[definition]*/X
+	/*⇒1,variable,[definition]*/w /*⇒2,operator,[]*/:= /*⇒1,variable,[]*/b[/*⇒1,number,[]*/4:]
+	/*⇒1,variable,[definition]*/j /*⇒2,operator,[]*/:= /*⇒3,function,[defaultLibrary]*/len(/*⇒1,variable,[]*/x)
+	/*⇒1,variable,[]*/j/*⇒2,operator,[]*/--
+	/*⇒6,keyword,[]*/return /*⇒4,keyword,[]*/true
+}
+
+/*⇒4,keyword,[]*/func /*⇒1,function,[definition]*/g(/*⇒2,parameter,[definition]*/vv /*⇒3,operator,[]*/.../*⇒9,keyword,[]*/interface{}) {
+	/*⇒2,variable,[definition]*/ff /*⇒2,operator,[]*/:= /*⇒4,keyword,[]*/func() {}
+	/*⇒5,keyword,[]*/defer /*⇒2,variable,[]*/ff()
+	/*⇒2,keyword,[]*/go /*⇒3,namespace,[]*/utf./*⇒9,variable,[definition]*/RuneCount(/*⇒2,string,[]*/"")
+	/*⇒2,keyword,[]*/go /*⇒4,namespace,[]*/utf8./*⇒9,function,[]*/RuneCount(/*⇒2,variable,[]*/vv.(/*⇒6,variable,[definition]*/string))
+	/*⇒2,keyword,[]*/if /*⇒4,keyword,[]*/true {
+	} /*⇒4,keyword,[]*/else {
+	}
+/*⇒5,parameter,[definition]*/Never:
+	/*⇒3,keyword,[]*/for /*⇒1,variable,[definition]*/i /*⇒2,operator,[]*/:= /*⇒1,number,[]*/0; /*⇒1,variable,[]*/i /*⇒1,operator,[]*/< /*⇒2,number,[]*/10; {
+		/*⇒5,keyword,[]*/break Never
+	}
+	_, /*⇒2,variable,[definition]*/ok /*⇒2,operator,[]*/:= /*⇒2,variable,[]*/vv[/*⇒1,number,[]*/0].(/*⇒1,type,[]*/A)
+	/*⇒2,keyword,[]*/if /*⇒1,operator,[]*/!/*⇒2,variable,[]*/ok {
+		/*⇒6,keyword,[]*/switch /*⇒1,variable,[definition]*/x /*⇒2,operator,[]*/:= /*⇒2,variable,[]*/vv[/*⇒1,number,[]*/0].(/*⇒4,keyword,[]*/type) {
+		}
+	}
+}
+
diff --git a/internal/lsp/testdata/semantic/b.go b/internal/lsp/testdata/semantic/b.go
new file mode 100644
index 0000000..8c10eb7
--- /dev/null
+++ b/internal/lsp/testdata/semantic/b.go
@@ -0,0 +1,10 @@
+package semantictokens //@ semantic("")
+
+func weirⰀd() {
+	const (
+		snil  = nil
+		nil   = true
+		true  = false
+		false = snil
+	)
+}
diff --git a/internal/lsp/testdata/semantic/b.go.golden b/internal/lsp/testdata/semantic/b.go.golden
new file mode 100644
index 0000000..90d4977
--- /dev/null
+++ b/internal/lsp/testdata/semantic/b.go.golden
@@ -0,0 +1,12 @@
+-- semantic --
+/*⇒7,keyword,[]*/package /*⇒14,namespace,[]*/semantictokens //@ semantic("")
+
+/*⇒4,keyword,[]*/func /*⇒6,function,[definition]*/weirⰀd() {
+	/*⇒5,keyword,[]*/const (
+		/*⇒4,variable,[definition readonly]*/snil  = /*⇒3,keyword,[readonly]*/nil
+		/*⇒3,variable,[definition readonly]*/nil   = /*⇒4,keyword,[]*/true
+		/*⇒4,variable,[definition readonly]*/true  = /*⇒5,keyword,[]*/false
+		/*⇒5,variable,[definition readonly]*/false = /*⇒4,variable,[readonly]*/snil
+	)
+}
+
diff --git a/internal/lsp/testdata/semantic/semantic_test.go b/internal/lsp/testdata/semantic/semantic_test.go
new file mode 100644
index 0000000..63d59f6
--- /dev/null
+++ b/internal/lsp/testdata/semantic/semantic_test.go
@@ -0,0 +1,13 @@
+package semantictokens
+
+import (
+	"os"
+	"testing"
+)
+
+func TestSemanticTokens(t *testing.T) {
+	a, _ := os.Getwd()
+	// climb up to find internal/lsp
+	// find all the .go files
+
+}
diff --git a/internal/lsp/testdata/summary.txt.golden b/internal/lsp/testdata/summary.txt.golden
index b7795a1..6e2da0b 100644
--- a/internal/lsp/testdata/summary.txt.golden
+++ b/internal/lsp/testdata/summary.txt.golden
@@ -12,6 +12,7 @@
 FoldingRangesCount = 2
 FormatCount = 6
 ImportCount = 8
+SemanticTokenCount = 2
 SuggestedFixCount = 38
 FunctionExtractionCount = 12
 DefinitionsCount = 63
diff --git a/internal/lsp/tests/tests.go b/internal/lsp/tests/tests.go
index 088289b..3d173b4 100644
--- a/internal/lsp/tests/tests.go
+++ b/internal/lsp/tests/tests.go
@@ -58,6 +58,7 @@
 type FoldingRanges []span.Span
 type Formats []span.Span
 type Imports []span.Span
+type SemanticTokens []span.Span
 type SuggestedFixes map[span.Span][]string
 type FunctionExtractions map[span.Span]span.Span
 type Definitions map[span.Span]Definition
@@ -90,6 +91,7 @@
 	FoldingRanges                 FoldingRanges
 	Formats                       Formats
 	Imports                       Imports
+	SemanticTokens                SemanticTokens
 	SuggestedFixes                SuggestedFixes
 	FunctionExtractions           FunctionExtractions
 	Definitions                   Definitions
@@ -133,6 +135,7 @@
 	FoldingRanges(*testing.T, span.Span)
 	Format(*testing.T, span.Span)
 	Import(*testing.T, span.Span)
+	SemanticTokens(*testing.T, span.Span)
 	SuggestedFix(*testing.T, span.Span, []string)
 	FunctionExtraction(*testing.T, span.Span, span.Span)
 	Definition(*testing.T, span.Span, Definition)
@@ -241,6 +244,7 @@
 	o.CompletionBudget = time.Minute
 	o.HierarchicalDocumentSymbolSupport = true
 	o.ExperimentalWorkspaceModule = true
+	o.SemanticTokens = true
 }
 
 func RunTests(t *testing.T, dataDir string, includeMultiModule bool, f func(*testing.T, *Data)) {
@@ -431,6 +435,7 @@
 		"fold":            datum.collectFoldingRanges,
 		"format":          datum.collectFormats,
 		"import":          datum.collectImports,
+		"semantic":        datum.collectSemanticTokens,
 		"godef":           datum.collectDefinitions,
 		"implementations": datum.collectImplementations,
 		"typdef":          datum.collectTypeDefinitions,
@@ -642,6 +647,16 @@
 		}
 	})
 
+	t.Run("SemanticTokens", func(t *testing.T) {
+		t.Helper()
+		for _, spn := range data.SemanticTokens {
+			t.Run(uriName(spn.URI()), func(t *testing.T) {
+				t.Helper()
+				tests.SemanticTokens(t, spn)
+			})
+		}
+	})
+
 	t.Run("SuggestedFix", func(t *testing.T) {
 		t.Helper()
 		for spn, actionKinds := range data.SuggestedFixes {
@@ -860,6 +875,7 @@
 	fmt.Fprintf(buf, "FoldingRangesCount = %v\n", len(data.FoldingRanges))
 	fmt.Fprintf(buf, "FormatCount = %v\n", len(data.Formats))
 	fmt.Fprintf(buf, "ImportCount = %v\n", len(data.Imports))
+	fmt.Fprintf(buf, "SemanticTokenCount = %v\n", len(data.SemanticTokens))
 	fmt.Fprintf(buf, "SuggestedFixCount = %v\n", len(data.SuggestedFixes))
 	fmt.Fprintf(buf, "FunctionExtractionCount = %v\n", len(data.FunctionExtractions))
 	fmt.Fprintf(buf, "DefinitionsCount = %v\n", definitionCount)
@@ -955,6 +971,9 @@
 	if file == nil {
 		data.t.Fatalf("could not find golden contents %v: %v", fragment, tag)
 	}
+	if len(file.Data) == 0 {
+		return file.Data
+	}
 	return file.Data[:len(file.Data)-1] // drop the trailing \n
 }
 
@@ -1077,6 +1096,10 @@
 	data.Imports = append(data.Imports, spn)
 }
 
+func (data *Data) collectSemanticTokens(spn span.Span) {
+	data.SemanticTokens = append(data.SemanticTokens, spn)
+}
+
 func (data *Data) collectSuggestedFixes(spn span.Span, actionKind string) {
 	if _, ok := data.SuggestedFixes[spn]; !ok {
 		data.SuggestedFixes[spn] = []string{}