internal/lsp/lsppos/lsppos.go - tools - Git at Google

 // Copyright 2021 The Go Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.

 // Package lsppos provides utilities for working with LSP positions. Much of
 // this functionality is duplicated from the internal/span package, but this
 // package is simpler and more accurate with respect to newline terminated
 // content.
 //
 // See https://microsoft.github.io/language-server-protocol/specification#textDocuments
 // for a description of LSP positions. Notably:
 //   - Positions are specified by a 0-based line count and 0-based utf-16
 //     character offset.
 //   - Positions are line-ending agnostic: there is no way to specify \r|\n or
 //     \n|. Instead the former maps to the end of the current line, and the
 //     latter to the start of the next line.
 package lsppos

 import (
 	"errors"
 	"sort"
 	"unicode/utf8"

 	"golang.org/x/tools/internal/lsp/protocol"
 )

 // Mapper maps utf-8 byte offsets to LSP positions for a single file.
 type Mapper struct {
 	nonASCII bool
 	content  []byte

 	// Start-of-line positions. If src is newline-terminated, the final entry
 	// will be len(content).
 	lines []int
 }

 // NewMapper creates a new Mapper for the given content.
 func NewMapper(content []byte) *Mapper {
 	m := &Mapper{
 		content: content,
 		lines:   []int{0},
 	}
 	for offset, b := range content {
 		if b == '\n' {
 			m.lines = append(m.lines, offset+1)
 		}
 		if b >= utf8.RuneSelf {
 			m.nonASCII = true
 		}
 	}
 	return m
 }

 // LineColUTF16 returns the 0-based UTF-16 line and character index for the
 // given offset. It returns -1, -1 if offset is out of bounds for the file
 // being mapped.
 func (m *Mapper) LineColUTF16(offset int) (line, char int) {
 	if offset < 0 || offset > len(m.content) {
 		return -1, -1
 	}
 	nextLine := sort.Search(len(m.lines), func(i int) bool {
 		return offset < m.lines[i]
 	})
 	if nextLine == 0 {
 		return -1, -1
 	}
 	line = nextLine - 1
 	start := m.lines[line]
 	var charOffset int
 	if m.nonASCII {
 		charOffset = UTF16len(m.content[start:offset])
 	} else {
 		charOffset = offset - start
 	}

 	var eol int
 	if line == len(m.lines)-1 {
 		eol = len(m.content)
 	} else {
 		eol = m.lines[line+1] - 1
 	}

 	// Adjustment for line-endings: \r|\n is the same as |\r\n.
 	if offset == eol && offset > 0 && m.content[offset-1] == '\r' {
 		charOffset--
 	}

 	return line, charOffset
 }

 // Position returns the protocol position corresponding to the given offset. It
 // returns false if offset is out of bounds for the file being mapped.
 func (m *Mapper) Position(offset int) (protocol.Position, bool) {
 	l, c := m.LineColUTF16(offset)
 	if l < 0 {
 		return protocol.Position{}, false
 	}
 	return protocol.Position{
 		Line:      uint32(l),
 		Character: uint32(c),
 	}, true
 }

 // Range returns the protocol range corresponding to the given start and end
 // offsets.
 func (m *Mapper) Range(start, end int) (protocol.Range, error) {
 	startPos, ok := m.Position(start)
 	if !ok {
 		return protocol.Range{}, errors.New("invalid start position")
 	}
 	endPos, ok := m.Position(end)
 	if !ok {
 		return protocol.Range{}, errors.New("invalid end position")
 	}

 	return protocol.Range{Start: startPos, End: endPos}, nil
 }

 // UTF16Len returns the UTF-16 length of the UTF-8 encoded content, were it to
 // be re-encoded as UTF-16.
 func UTF16len(buf []byte) int {
 	// This function copies buf, but microbenchmarks showed it to be faster than
 	// using utf8.DecodeRune due to inlining and avoiding bounds checks.
 	cnt := 0
 	for _, r := range string(buf) {
 		cnt++
 		if r >= 1<<16 {
 			cnt++
 		}
 	}
 	return cnt
 }
	// Copyright 2021 The Go Authors. All rights reserved.
	// Use of this source code is governed by a BSD-style
	// license that can be found in the LICENSE file.

	// Package lsppos provides utilities for working with LSP positions. Much of
	// this functionality is duplicated from the internal/span package, but this
	// package is simpler and more accurate with respect to newline terminated
	// content.
	//
	// See https://microsoft.github.io/language-server-protocol/specification#textDocuments
	// for a description of LSP positions. Notably:
	// - Positions are specified by a 0-based line count and 0-based utf-16
	// character offset.
	// - Positions are line-ending agnostic: there is no way to specify \r\|\n or
	// \n\|. Instead the former maps to the end of the current line, and the
	// latter to the start of the next line.
	package lsppos

	import (
	"errors"
	"sort"
	"unicode/utf8"

	"golang.org/x/tools/internal/lsp/protocol"
	)

	// Mapper maps utf-8 byte offsets to LSP positions for a single file.
	type Mapper struct {
	nonASCII bool
	content []byte

	// Start-of-line positions. If src is newline-terminated, the final entry
	// will be len(content).
	lines []int
	}

	// NewMapper creates a new Mapper for the given content.
	func NewMapper(content []byte) *Mapper {
	m := &Mapper{
	content: content,
	lines: []int{0},
	}
	for offset, b := range content {
	if b == '\n' {
	m.lines = append(m.lines, offset+1)
	}
	if b >= utf8.RuneSelf {
	m.nonASCII = true
	}
	}
	return m
	}

	// LineColUTF16 returns the 0-based UTF-16 line and character index for the
	// given offset. It returns -1, -1 if offset is out of bounds for the file
	// being mapped.
	func (m *Mapper) LineColUTF16(offset int) (line, char int) {
	if offset < 0 \|\| offset > len(m.content) {
	return -1, -1
	}
	nextLine := sort.Search(len(m.lines), func(i int) bool {
	return offset < m.lines[i]
	})
	if nextLine == 0 {
	return -1, -1
	}
	line = nextLine - 1
	start := m.lines[line]
	var charOffset int
	if m.nonASCII {
	charOffset = UTF16len(m.content[start:offset])
	} else {
	charOffset = offset - start
	}

	var eol int
	if line == len(m.lines)-1 {
	eol = len(m.content)
	} else {
	eol = m.lines[line+1] - 1
	}

	// Adjustment for line-endings: \r\|\n is the same as \|\r\n.
	if offset == eol && offset > 0 && m.content[offset-1] == '\r' {
	charOffset--
	}

	return line, charOffset
	}

	// Position returns the protocol position corresponding to the given offset. It
	// returns false if offset is out of bounds for the file being mapped.
	func (m *Mapper) Position(offset int) (protocol.Position, bool) {
	l, c := m.LineColUTF16(offset)
	if l < 0 {
	return protocol.Position{}, false
	}
	return protocol.Position{
	Line: uint32(l),
	Character: uint32(c),
	}, true
	}

	// Range returns the protocol range corresponding to the given start and end
	// offsets.
	func (m *Mapper) Range(start, end int) (protocol.Range, error) {
	startPos, ok := m.Position(start)
	if !ok {
	return protocol.Range{}, errors.New("invalid start position")
	}
	endPos, ok := m.Position(end)
	if !ok {
	return protocol.Range{}, errors.New("invalid end position")
	}

	return protocol.Range{Start: startPos, End: endPos}, nil
	}

	// UTF16Len returns the UTF-16 length of the UTF-8 encoded content, were it to
	// be re-encoded as UTF-16.
	func UTF16len(buf []byte) int {
	// This function copies buf, but microbenchmarks showed it to be faster than
	// using utf8.DecodeRune due to inlining and avoiding bounds checks.
	cnt := 0
	for _, r := range string(buf) {
	cnt++
	if r >= 1<<16 {
	cnt++
	}
	}
	return cnt
	}