blob: c518c8047d3b2e44d95838cbd62248bfd66dba2a [file] [log] [blame]
// Copyright 2018 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// this file contains protocol<->span converters
// Here's a handy guide for your tour of the location zoo:
//
// Imports: lsppos --> protocol --> span --> token
//
// lsppos.TokenMapper = (token.File, lsppos.Mapper)
// lsppos.Mapper = (line offset table, content)
//
// protocol.ColumnMapper = (URI, Content). Does all offset <=> column conversions.
// protocol.MappedRange = (protocol.ColumnMapper, {start,end} int)
// protocol.Location = (URI, protocol.Range)
// protocol.Range = (start, end Position)
// protocol.Position = (line, char uint32) 0-based UTF-16
//
// span.Point = (line?, col?, offset?) 1-based UTF-8
// span.Span = (uri URI, start, end span.Point)
// span.Range = (file token.File, start, end token.Pos)
//
// token.Pos
// token.FileSet
// token.File
// offset int
//
// TODO(adonovan): simplify this picture:
// - Eliminate the optionality of span.{Span,Point}'s position and offset fields?
// - Move span.Range to package safetoken. Can we eliminate it?
// Without a ColumnMapper it's not really self-contained.
// It is mostly used by completion. Given access to complete.mapper,
// it could use a pair byte offsets instead.
// - Merge lsppos.Mapper and protocol.ColumnMapper.
// - Replace all uses of lsppos.TokenMapper by the underlying ParsedGoFile,
// which carries a token.File and a ColumnMapper.
// - Then delete lsppos package.
// - ColumnMapper.OffsetPoint and .Position aren't used outside this package.
// OffsetSpan is barely used, and its user would better off with a MappedRange
// or protocol.Range. The span package data tyes are mostly used in tests
// and in argument parsing (without access to file content).
package protocol
import (
"bytes"
"fmt"
"go/token"
"path/filepath"
"strings"
"unicode/utf8"
"golang.org/x/tools/gopls/internal/span"
"golang.org/x/tools/internal/bug"
)
// A ColumnMapper wraps the content of a file and provides mapping
// from byte offsets to and from other notations of position:
//
// - (line, col8) pairs, where col8 is a 1-based UTF-8 column number (bytes),
// as used by go/token;
//
// - (line, col16) pairs, where col16 is a 1-based UTF-16 column number,
// as used by the LSP protocol;
//
// - (line, colRune) pairs, where colRune is a rune index, as used by ParseWork.
//
// This type does not depend on or use go/token-based representations.
// Use safetoken to map between token.Pos <=> byte offsets.
type ColumnMapper struct {
URI span.URI
Content []byte
// This field provides a line-number table, nothing more.
// The public API of ColumnMapper doesn't mention go/token,
// nor should it. It need not be consistent with any
// other token.File or FileSet.
//
// TODO(adonovan): eliminate this field in a follow-up
// by inlining the line-number table. Then merge this
// type with the nearly identical lsspos.Mapper.
//
// TODO(adonovan): opt: quick experiments suggest that
// ColumnMappers are created for thousands of files but the
// m.lines field is accessed only for a small handful.
// So it would make sense to allocate it lazily.
lines *token.File
}
// NewColumnMapper creates a new column mapper for the given uri and content.
func NewColumnMapper(uri span.URI, content []byte) *ColumnMapper {
fset := token.NewFileSet()
tf := fset.AddFile(uri.Filename(), -1, len(content))
tf.SetLinesForContent(content)
return &ColumnMapper{
URI: uri,
lines: tf,
Content: content,
}
}
func URIFromSpanURI(uri span.URI) DocumentURI {
return DocumentURI(uri)
}
func URIFromPath(path string) DocumentURI {
return URIFromSpanURI(span.URIFromPath(path))
}
func (u DocumentURI) SpanURI() span.URI {
return span.URIFromURI(string(u))
}
func (m *ColumnMapper) Location(s span.Span) (Location, error) {
rng, err := m.Range(s)
if err != nil {
return Location{}, err
}
return Location{URI: URIFromSpanURI(s.URI()), Range: rng}, nil
}
func (m *ColumnMapper) Range(s span.Span) (Range, error) {
// Assert that we aren't using the wrong mapper.
// We check only the base name, and case insensitively,
// because we can't assume clean paths, no symbolic links,
// case-sensitive directories. The authoritative answer
// requires querying the file system, and we don't want
// to do that.
if !strings.EqualFold(filepath.Base(string(m.URI)), filepath.Base(string(s.URI()))) {
return Range{}, bug.Errorf("column mapper is for file %q instead of %q", m.URI, s.URI())
}
s, err := s.WithOffset(m.lines)
if err != nil {
return Range{}, err
}
start, err := m.Position(s.Start())
if err != nil {
return Range{}, err
}
end, err := m.Position(s.End())
if err != nil {
return Range{}, err
}
return Range{Start: start, End: end}, nil
}
// OffsetRange returns a Range for the byte-offset interval Content[start:end],
func (m *ColumnMapper) OffsetRange(start, end int) (Range, error) {
startPosition, err := m.OffsetPosition(start)
if err != nil {
return Range{}, fmt.Errorf("start: %v", err)
}
endPosition, err := m.OffsetPosition(end)
if err != nil {
return Range{}, fmt.Errorf("end: %v", err)
}
return Range{Start: startPosition, End: endPosition}, nil
}
// OffsetSpan converts a pair of byte offsets to a Span.
func (m *ColumnMapper) OffsetSpan(start, end int) (span.Span, error) {
if start > end {
return span.Span{}, fmt.Errorf("start offset (%d) > end (%d)", start, end)
}
startPoint, err := m.OffsetPoint(start)
if err != nil {
return span.Span{}, err
}
endPoint, err := m.OffsetPoint(end)
if err != nil {
return span.Span{}, err
}
return span.New(m.URI, startPoint, endPoint), nil
}
// Position returns the protocol position for the specified point,
// which must have a byte offset.
func (m *ColumnMapper) Position(p span.Point) (Position, error) {
if !p.HasOffset() {
return Position{}, fmt.Errorf("point is missing offset")
}
return m.OffsetPosition(p.Offset())
}
// OffsetPosition returns the protocol position of the specified
// offset within m.Content.
func (m *ColumnMapper) OffsetPosition(offset int) (Position, error) {
// We use span.OffsetToLineCol8 for its "line+1 at EOF" workaround.
line, _, err := span.OffsetToLineCol8(m.lines, offset)
if err != nil {
return Position{}, fmt.Errorf("OffsetPosition: %v", err)
}
// If that workaround executed, skip the usual column computation.
char := 0
if offset != m.lines.Size() {
char = m.utf16Column(offset)
}
return Position{
Line: uint32(line - 1),
Character: uint32(char),
}, nil
}
// utf16Column returns the zero-based column index of the
// specified file offset, measured in UTF-16 codes.
// Precondition: 0 <= offset <= len(m.Content).
func (m *ColumnMapper) utf16Column(offset int) int {
s := m.Content[:offset]
if i := bytes.LastIndex(s, []byte("\n")); i >= 0 {
s = s[i+1:]
}
// s is the prefix of the line before offset.
return utf16len(s)
}
// utf16len returns the number of codes in the UTF-16 transcoding of s.
func utf16len(s []byte) int {
var n int
for len(s) > 0 {
n++
// Fast path for ASCII.
if s[0] < 0x80 {
s = s[1:]
continue
}
r, size := utf8.DecodeRune(s)
if r >= 0x10000 {
n++ // surrogate pair
}
s = s[size:]
}
return n
}
func (m *ColumnMapper) Span(l Location) (span.Span, error) {
return m.RangeSpan(l.Range)
}
// RangeSpan converts a UTF-16 range to a Span with both the
// position (line/col) and offset fields populated.
func (m *ColumnMapper) RangeSpan(r Range) (span.Span, error) {
start, err := m.Point(r.Start)
if err != nil {
return span.Span{}, err
}
end, err := m.Point(r.End)
if err != nil {
return span.Span{}, err
}
return span.New(m.URI, start, end).WithAll(m.lines)
}
// Offset returns the utf-8 byte offset of p within the mapped file.
func (m *ColumnMapper) Offset(p Position) (int, error) {
start, err := m.Point(p)
if err != nil {
return 0, err
}
return start.Offset(), nil
}
// OffsetPoint returns the span.Point for the given byte offset.
func (m *ColumnMapper) OffsetPoint(offset int) (span.Point, error) {
// We use span.ToPosition for its "line+1 at EOF" workaround.
line, col8, err := span.OffsetToLineCol8(m.lines, offset)
if err != nil {
return span.Point{}, fmt.Errorf("OffsetPoint: %v", err)
}
return span.NewPoint(line, col8, offset), nil
}
// Point returns a span.Point for the protocol position p within the mapped file.
// The resulting point has a valid Position and Offset.
func (m *ColumnMapper) Point(p Position) (span.Point, error) {
line := int(p.Line) + 1
// Find byte offset of start of containing line.
offset, err := span.ToOffset(m.lines, line, 1)
if err != nil {
return span.Point{}, err
}
lineStart := span.NewPoint(line, 1, offset)
return span.FromUTF16Column(lineStart, int(p.Character)+1, m.Content)
}
// OffsetMappedRange returns a MappedRange for the given byte offsets.
// A MappedRange can be converted to any other form.
func (m *ColumnMapper) OffsetMappedRange(start, end int) (MappedRange, error) {
if !(0 <= start && start <= end && end <= len(m.Content)) {
return MappedRange{}, fmt.Errorf("invalid offsets (%d, %d) (file %s has size %d)", start, end, m.URI, len(m.Content))
}
return MappedRange{m, start, end}, nil
}
// A MappedRange represents a valid byte-offset range of a file.
// Through its ColumnMapper it can be converted into other forms such
// as protocol.Range or span.Span.
//
// Construct one by calling ColumnMapper.OffsetMappedRange with start/end offsets.
// From the go/token domain, call safetoken.Offsets first,
// or use a helper such as ParsedGoFile.MappedPosRange.
type MappedRange struct {
Mapper *ColumnMapper
start, end int // valid byte offsets
}
// Offsets returns the (start, end) byte offsets of this range.
func (mr MappedRange) Offsets() (start, end int) { return mr.start, mr.end }
// -- convenience functions --
// URI returns the URI of the range's file.
func (mr MappedRange) URI() span.URI {
return mr.Mapper.URI
}
// TODO(adonovan): once the fluff is removed from all the
// location-conversion methods, it will be obvious that a properly
// constructed MappedRange is always valid and its Range and Span (and
// other) methods simply cannot fail.
// At that point we might want to provide variants of methods such as
// Range and Span below that don't return an error.
// Range returns the range in protocol form.
func (mr MappedRange) Range() (Range, error) {
return mr.Mapper.OffsetRange(mr.start, mr.end)
}
// Span returns the range in span form.
func (mr MappedRange) Span() (span.Span, error) {
return mr.Mapper.OffsetSpan(mr.start, mr.end)
}
func IsPoint(r Range) bool {
return r.Start.Line == r.End.Line && r.Start.Character == r.End.Character
}
// CompareRange returns -1 if a is before b, 0 if a == b, and 1 if a is after
// b.
//
// A range a is defined to be 'before' b if a.Start is before b.Start, or
// a.Start == b.Start and a.End is before b.End.
func CompareRange(a, b Range) int {
if r := ComparePosition(a.Start, b.Start); r != 0 {
return r
}
return ComparePosition(a.End, b.End)
}
// ComparePosition returns -1 if a is before b, 0 if a == b, and 1 if a is
// after b.
func ComparePosition(a, b Position) int {
if a.Line < b.Line {
return -1
}
if a.Line > b.Line {
return 1
}
if a.Character < b.Character {
return -1
}
if a.Character > b.Character {
return 1
}
return 0
}
func Intersect(a, b Range) bool {
if a.Start.Line > b.End.Line || a.End.Line < b.Start.Line {
return false
}
return !((a.Start.Line == b.End.Line) && a.Start.Character > b.End.Character ||
(a.End.Line == b.Start.Line) && a.End.Character < b.Start.Character)
}
// Format implements fmt.Formatter.
//
// Note: Formatter is implemented instead of Stringer (presumably) for
// performance reasons, though it is not clear that it matters in practice.
func (r Range) Format(f fmt.State, _ rune) {
fmt.Fprintf(f, "%v-%v", r.Start, r.End)
}
// Format implements fmt.Formatter.
//
// See Range.Format for discussion of why the Formatter interface is
// implemented rather than Stringer.
func (p Position) Format(f fmt.State, _ rune) {
fmt.Fprintf(f, "%v:%v", p.Line, p.Character)
}