internal/lsp/lsppos: add helpers for mapping token positions

For use-cases that work only with token.Pos and protocol.Position, the
span package is unnecessarily indirect, and inefficient. It also loses
information about newline termination, and handles positions within CRLF
line endings incorrectly.

The lsppos package was written to bypass this complexity, but had
limited use and lacked tests.

Add tests, and an wrapper API that operates on token.Pos. Also fix
source.TestTokenOffset to not panic, and add a temporary exemption of
the new token.Offset usage.

This change also fixes position calculation in the case of empty file
content. The mapper now finds position (0, 0) at offset 0 of an empty
file.

Change-Id: I639bd3fac78a127b1c8eddad60b890449901c68c
Reviewed-on: https://go-review.googlesource.com/c/tools/+/403678
Reviewed-by: Alan Donovan <adonovan@google.com>
Run-TryBot: Robert Findley <rfindley@google.com>
gopls-CI: kokoro <noreply+kokoro@google.com>
TryBot-Result: Gopher Robot <gobot@golang.org>
diff --git a/internal/lsp/lsppos/lsppos.go b/internal/lsp/lsppos/lsppos.go
index 0e74330..35f6f13 100644
--- a/internal/lsp/lsppos/lsppos.go
+++ b/internal/lsp/lsppos/lsppos.go
@@ -2,7 +2,10 @@
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.
 
-// Package lsppos provides utilities for working with LSP positions.
+// Package lsppos provides utilities for working with LSP positions. Much of
+// this functionality is duplicated from the internal/span package, but this
+// package is simpler and more accurate with respect to newline terminated
+// content.
 //
 // See https://microsoft.github.io/language-server-protocol/specification#textDocuments
 // for a description of LSP positions. Notably:
@@ -14,25 +17,30 @@
 package lsppos
 
 import (
+	"errors"
 	"sort"
 	"unicode/utf8"
+
+	"golang.org/x/tools/internal/lsp/protocol"
 )
 
+// Mapper maps utf-8 byte offsets to LSP positions for a single file.
 type Mapper struct {
 	nonASCII bool
-	src      []byte
+	content  []byte
 
-	// Start-of-line positions. If src is newline-terminated, the final entry will be empty.
+	// Start-of-line positions. If src is newline-terminated, the final entry
+	// will be len(content).
 	lines []int
 }
 
-func NewMapper(src []byte) *Mapper {
-	m := &Mapper{src: src}
-	if len(src) == 0 {
-		return m
+// NewMapper creates a new Mapper for the given content.
+func NewMapper(content []byte) *Mapper {
+	m := &Mapper{
+		content: content,
+		lines:   []int{0},
 	}
-	m.lines = []int{0}
-	for offset, b := range src {
+	for offset, b := range content {
 		if b == '\n' {
 			m.lines = append(m.lines, offset+1)
 		}
@@ -43,8 +51,11 @@
 	return m
 }
 
-func (m *Mapper) Position(offset int) (line, char int) {
-	if offset < 0 || offset > len(m.src) {
+// LineColUTF16 returns the 0-based UTF-16 line and character index for the
+// given offset. It returns -1, -1 if offset is out of bounds for the file
+// being mapped.
+func (m *Mapper) LineColUTF16(offset int) (line, char int) {
+	if offset < 0 || offset > len(m.content) {
 		return -1, -1
 	}
 	nextLine := sort.Search(len(m.lines), func(i int) bool {
@@ -57,27 +68,59 @@
 	start := m.lines[line]
 	var charOffset int
 	if m.nonASCII {
-		charOffset = UTF16len(m.src[start:offset])
+		charOffset = UTF16len(m.content[start:offset])
 	} else {
 		charOffset = offset - start
 	}
 
 	var eol int
 	if line == len(m.lines)-1 {
-		eol = len(m.src)
+		eol = len(m.content)
 	} else {
 		eol = m.lines[line+1] - 1
 	}
 
 	// Adjustment for line-endings: \r|\n is the same as |\r\n.
-	if offset == eol && offset > 0 && m.src[offset-1] == '\r' {
+	if offset == eol && offset > 0 && m.content[offset-1] == '\r' {
 		charOffset--
 	}
 
 	return line, charOffset
 }
 
+// Position returns the protocol position corresponding to the given offset. It
+// returns false if offset is out of bounds for the file being mapped.
+func (m *Mapper) Position(offset int) (protocol.Position, bool) {
+	l, c := m.LineColUTF16(offset)
+	if l < 0 {
+		return protocol.Position{}, false
+	}
+	return protocol.Position{
+		Line:      uint32(l),
+		Character: uint32(c),
+	}, true
+}
+
+// Range returns the protocol range corresponding to the given start and end
+// offsets.
+func (m *Mapper) Range(start, end int) (protocol.Range, error) {
+	startPos, ok := m.Position(start)
+	if !ok {
+		return protocol.Range{}, errors.New("invalid start position")
+	}
+	endPos, ok := m.Position(end)
+	if !ok {
+		return protocol.Range{}, errors.New("invalid end position")
+	}
+
+	return protocol.Range{Start: startPos, End: endPos}, nil
+}
+
+// UTF16Len returns the UTF-16 length of the UTF-8 encoded content, were it to
+// be re-encoded as UTF-16.
 func UTF16len(buf []byte) int {
+	// This function copies buf, but microbenchmarks showed it to be faster than
+	// using utf8.DecodeRune due to inlining and avoiding bounds checks.
 	cnt := 0
 	for _, r := range string(buf) {
 		cnt++
diff --git a/internal/lsp/lsppos/lsppos_test.go b/internal/lsp/lsppos/lsppos_test.go
new file mode 100644
index 0000000..8353f92
--- /dev/null
+++ b/internal/lsp/lsppos/lsppos_test.go
@@ -0,0 +1,107 @@
+// Copyright 2022 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package lsppos_test
+
+import (
+	"fmt"
+	"strings"
+	"testing"
+
+	. "golang.org/x/tools/internal/lsp/lsppos"
+	"golang.org/x/tools/internal/lsp/protocol"
+)
+
+type testCase struct {
+	content            string      // input text
+	substrOrOffset     interface{} // explicit integer offset, or a substring
+	wantLine, wantChar int         // expected LSP position information
+}
+
+// offset returns the test case byte offset
+func (c testCase) offset() int {
+	switch x := c.substrOrOffset.(type) {
+	case int:
+		return x
+	case string:
+		i := strings.Index(c.content, x)
+		if i < 0 {
+			panic(fmt.Sprintf("%q does not contain substring %q", c.content, x))
+		}
+		return i
+	}
+	panic("substrOrIndex must be an integer or string")
+}
+
+var tests = []testCase{
+	{"a𐐀b", "a", 0, 0},
+	{"a𐐀b", "𐐀", 0, 1},
+	{"a𐐀b", "b", 0, 3},
+	{"a𐐀b\n", "\n", 0, 4},
+	{"a𐐀b\r\n", "\n", 0, 4}, // \r|\n is not a valid position, so we move back to the end of the first line.
+	{"a𐐀b\r\nx", "x", 1, 0},
+	{"a𐐀b\r\nx\ny", "y", 2, 0},
+
+	// Testing EOL and EOF positions
+	{"", 0, 0, 0}, // 0th position of an empty buffer is (0, 0)
+	{"abc", "c", 0, 2},
+	{"abc", 3, 0, 3},
+	{"abc\n", "\n", 0, 3},
+	{"abc\n", 4, 1, 0}, // position after a newline is on the next line
+}
+
+func TestLineChar(t *testing.T) {
+	for _, test := range tests {
+		m := NewMapper([]byte(test.content))
+		offset := test.offset()
+		gotLine, gotChar := m.LineColUTF16(offset)
+		if gotLine != test.wantLine || gotChar != test.wantChar {
+			t.Errorf("LineChar(%d) = (%d,%d), want (%d,%d)", offset, gotLine, gotChar, test.wantLine, test.wantChar)
+		}
+	}
+}
+
+func TestInvalidOffset(t *testing.T) {
+	content := []byte("a𐐀b\r\nx\ny")
+	m := NewMapper(content)
+	for _, offset := range []int{-1, 100} {
+		gotLine, gotChar := m.LineColUTF16(offset)
+		if gotLine != -1 {
+			t.Errorf("LineChar(%d) = (%d,%d), want (-1,-1)", offset, gotLine, gotChar)
+		}
+	}
+}
+
+func TestPosition(t *testing.T) {
+	for _, test := range tests {
+		m := NewMapper([]byte(test.content))
+		offset := test.offset()
+		got, ok := m.Position(offset)
+		if !ok {
+			t.Error("invalid position for", test.substrOrOffset)
+			continue
+		}
+		want := protocol.Position{Line: uint32(test.wantLine), Character: uint32(test.wantChar)}
+		if got != want {
+			t.Errorf("Position(%d) = %v, want %v", offset, got, want)
+		}
+	}
+}
+
+func TestRange(t *testing.T) {
+	for _, test := range tests {
+		m := NewMapper([]byte(test.content))
+		offset := test.offset()
+		got, err := m.Range(0, offset)
+		if err != nil {
+			t.Fatal(err)
+		}
+		want := protocol.Range{
+			End: protocol.Position{Line: uint32(test.wantLine), Character: uint32(test.wantChar)},
+		}
+		if got != want {
+			t.Errorf("Range(%d) = %v, want %v", offset, got, want)
+		}
+	}
+}
diff --git a/internal/lsp/lsppos/token.go b/internal/lsp/lsppos/token.go
new file mode 100644
index 0000000..0a3caed
--- /dev/null
+++ b/internal/lsp/lsppos/token.go
@@ -0,0 +1,59 @@
+// Copyright 2022 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package lsppos
+
+import (
+	"errors"
+	"go/token"
+
+	"golang.org/x/tools/internal/lsp/protocol"
+)
+
+// TokenMapper maps token.Pos to LSP positions for a single file.
+type TokenMapper struct {
+	// file is used for computing offsets.
+	file *token.File
+
+	// For now, just delegate to a Mapper for position calculation. As an
+	// optimization we could avoid building the mapper and just use the file, but
+	// then have to correctly adjust for newline-terminated files. It is easier
+	// to just delegate unless performance becomes a concern.
+	mapper *Mapper
+}
+
+// NewMapper creates a new TokenMapper for the given content, using the
+// provided file to compute offsets.
+func NewTokenMapper(content []byte, file *token.File) *TokenMapper {
+	return &TokenMapper{
+		file:   file,
+		mapper: NewMapper(content),
+	}
+}
+
+// Position returns the protocol position corresponding to the given pos. It
+// returns false if pos is out of bounds for the file being mapped.
+func (m *TokenMapper) Position(pos token.Pos) (protocol.Position, bool) {
+	if int(pos) < m.file.Base() || int(pos) > m.file.Base()+m.file.Size() {
+		return protocol.Position{}, false
+	}
+	offset := m.file.Offset(pos) // usage of token.File.Offset is temporarily exempted
+	return m.mapper.Position(offset)
+}
+
+// Range returns the protocol range corresponding to the given start and end
+// positions. It returns an error if start or end is out of bounds for the file
+// being mapped.
+func (m *TokenMapper) Range(start, end token.Pos) (protocol.Range, error) {
+	startPos, ok := m.Position(start)
+	if !ok {
+		return protocol.Range{}, errors.New("invalid start position")
+	}
+	endPos, ok := m.Position(end)
+	if !ok {
+		return protocol.Range{}, errors.New("invalid end position")
+	}
+
+	return protocol.Range{Start: startPos, End: endPos}, nil
+}
diff --git a/internal/lsp/lsppos/token_test.go b/internal/lsp/lsppos/token_test.go
new file mode 100644
index 0000000..c12d150
--- /dev/null
+++ b/internal/lsp/lsppos/token_test.go
@@ -0,0 +1,57 @@
+// Copyright 2022 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package lsppos_test
+
+import (
+	"go/token"
+	"testing"
+
+	. "golang.org/x/tools/internal/lsp/lsppos"
+	"golang.org/x/tools/internal/lsp/protocol"
+)
+
+func makeTokenMapper(content []byte) (*TokenMapper, *token.File) {
+	file := token.NewFileSet().AddFile("p.go", -1, len(content))
+	file.SetLinesForContent(content)
+	return NewTokenMapper(content, file), file
+}
+
+func TestInvalidPosition(t *testing.T) {
+	content := []byte("a𐐀b\r\nx\ny")
+	m, _ := makeTokenMapper(content)
+
+	for _, pos := range []token.Pos{-1, 100} {
+		posn, ok := m.Position(pos)
+		if ok {
+			t.Errorf("Position(%d) = %v, want error", pos, posn)
+		}
+	}
+}
+
+func TestTokenPosition(t *testing.T) {
+	for _, test := range tests {
+		m, f := makeTokenMapper([]byte(test.content))
+		pos := token.Pos(f.Base() + test.offset())
+		got, ok := m.Position(pos)
+		if !ok {
+			t.Error("invalid position for", test.substrOrOffset)
+			continue
+		}
+		want := protocol.Position{Line: uint32(test.wantLine), Character: uint32(test.wantChar)}
+		if got != want {
+			t.Errorf("Position(%d) = %v, want %v", pos, got, want)
+		}
+		gotRange, err := m.Range(token.Pos(f.Base()), pos)
+		if err != nil {
+			t.Fatal(err)
+		}
+		wantRange := protocol.Range{
+			End: want,
+		}
+		if gotRange != wantRange {
+			t.Errorf("Range(%d) = %v, want %v", pos, got, want)
+		}
+	}
+}
diff --git a/internal/lsp/source/format.go b/internal/lsp/source/format.go
index 79da0b3..5460ec3 100644
--- a/internal/lsp/source/format.go
+++ b/internal/lsp/source/format.go
@@ -329,21 +329,18 @@
 		if err != nil {
 			return nil, fmt.Errorf("computing offsets: %v", err)
 		}
-		startLine, startChar := m.Position(spn.Start().Offset())
-		endLine, endChar := m.Position(spn.End().Offset())
-		if startLine < 0 || endLine < 0 {
-			return nil, fmt.Errorf("out of bound span: %v", spn)
+		rng, err := m.Range(spn.Start().Offset(), spn.End().Offset())
+		if err != nil {
+			return nil, err
 		}
 
-		pstart := protocol.Position{Line: uint32(startLine), Character: uint32(startChar)}
-		pend := protocol.Position{Line: uint32(endLine), Character: uint32(endChar)}
-		if pstart == pend && edit.NewText == "" {
+		if rng.Start == rng.End && edit.NewText == "" {
 			// Degenerate case, which may result from a diff tool wanting to delete
 			// '\r' in line endings. Filter it out.
 			continue
 		}
 		result = append(result, protocol.TextEdit{
-			Range:   protocol.Range{Start: pstart, End: pend},
+			Range:   rng,
 			NewText: edit.NewText,
 		})
 	}
diff --git a/internal/lsp/source/offset_test.go b/internal/lsp/source/offset_test.go
index 1007677..71c7e72 100644
--- a/internal/lsp/source/offset_test.go
+++ b/internal/lsp/source/offset_test.go
@@ -25,43 +25,39 @@
 	if err != nil {
 		t.Fatal(err)
 	}
-	var tokPkg *packages.Package
+	var tokenPkg, sourcePkg *packages.Package
 	for _, pkg := range pkgs {
-		if pkg.PkgPath == "go/token" {
-			tokPkg = pkg
-			break
+		switch pkg.PkgPath {
+		case "go/token":
+			tokenPkg = pkg
+		case "golang.org/x/tools/internal/lsp/source":
+			sourcePkg = pkg
 		}
 	}
-	typname, ok := tokPkg.Types.Scope().Lookup("File").(*types.TypeName)
-	if !ok {
-		t.Fatal("expected go/token.File typename, got none")
+
+	if tokenPkg == nil {
+		t.Fatal("missing package go/token")
 	}
-	named, ok := typname.Type().(*types.Named)
-	if !ok {
-		t.Fatalf("expected named type, got %T", typname.Type)
+	if sourcePkg == nil {
+		t.Fatal("missing package golang.org/x/tools/internal/lsp/source")
 	}
-	var offset *types.Func
-	for i := 0; i < named.NumMethods(); i++ {
-		meth := named.Method(i)
-		if meth.Name() == "Offset" {
-			offset = meth
-			break
-		}
-	}
+
+	fileObj := tokenPkg.Types.Scope().Lookup("File")
+	tokenOffset, _, _ := types.LookupFieldOrMethod(fileObj.Type(), true, fileObj.Pkg(), "Offset")
+
+	sourceOffset := sourcePkg.Types.Scope().Lookup("Offset").(*types.Func)
+
 	for _, pkg := range pkgs {
+		if pkg.PkgPath == "go/token" { // Allow usage from within go/token itself.
+			continue
+		}
+		if pkg.PkgPath == "golang.org/x/tools/internal/lsp/lsppos" {
+			continue // temporary exemption, to be refactored away
+		}
 		for ident, obj := range pkg.TypesInfo.Uses {
-			if ident.Name != "Offset" {
+			if obj != tokenOffset {
 				continue
 			}
-			if pkg.PkgPath == "go/token" {
-				continue
-			}
-			if !types.Identical(offset.Type(), obj.Type()) {
-				continue
-			}
-			// The only permitted use is in golang.org/x/tools/internal/lsp/source.Offset,
-			// so check the enclosing function.
-			sourceOffset := pkg.Types.Scope().Lookup("Offset").(*types.Func)
 			if sourceOffset.Pos() <= ident.Pos() && ident.Pos() <= sourceOffset.Scope().End() {
 				continue // accepted usage
 			}