| // Copyright 2021 The Go Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style |
| // license that can be found in the LICENSE file. |
| |
| // Package lsppos provides utilities for working with LSP positions. Much of |
| // this functionality is duplicated from the internal/span package, but this |
| // package is simpler and more accurate with respect to newline terminated |
| // content. |
| // |
| // See https://microsoft.github.io/language-server-protocol/specification#textDocuments |
| // for a description of LSP positions. Notably: |
| // - Positions are specified by a 0-based line count and 0-based utf-16 |
| // character offset. |
| // - Positions are line-ending agnostic: there is no way to specify \r|\n or |
| // \n|. Instead the former maps to the end of the current line, and the |
| // latter to the start of the next line. |
| package lsppos |
| |
| import ( |
| "errors" |
| "sort" |
| "unicode/utf8" |
| |
| "golang.org/x/tools/internal/lsp/protocol" |
| ) |
| |
| // Mapper maps utf-8 byte offsets to LSP positions for a single file. |
| type Mapper struct { |
| nonASCII bool |
| content []byte |
| |
| // Start-of-line positions. If src is newline-terminated, the final entry |
| // will be len(content). |
| lines []int |
| } |
| |
| // NewMapper creates a new Mapper for the given content. |
| func NewMapper(content []byte) *Mapper { |
| m := &Mapper{ |
| content: content, |
| lines: []int{0}, |
| } |
| for offset, b := range content { |
| if b == '\n' { |
| m.lines = append(m.lines, offset+1) |
| } |
| if b >= utf8.RuneSelf { |
| m.nonASCII = true |
| } |
| } |
| return m |
| } |
| |
| // LineColUTF16 returns the 0-based UTF-16 line and character index for the |
| // given offset. It returns -1, -1 if offset is out of bounds for the file |
| // being mapped. |
| func (m *Mapper) LineColUTF16(offset int) (line, char int) { |
| if offset < 0 || offset > len(m.content) { |
| return -1, -1 |
| } |
| nextLine := sort.Search(len(m.lines), func(i int) bool { |
| return offset < m.lines[i] |
| }) |
| if nextLine == 0 { |
| return -1, -1 |
| } |
| line = nextLine - 1 |
| start := m.lines[line] |
| var charOffset int |
| if m.nonASCII { |
| charOffset = UTF16len(m.content[start:offset]) |
| } else { |
| charOffset = offset - start |
| } |
| |
| var eol int |
| if line == len(m.lines)-1 { |
| eol = len(m.content) |
| } else { |
| eol = m.lines[line+1] - 1 |
| } |
| |
| // Adjustment for line-endings: \r|\n is the same as |\r\n. |
| if offset == eol && offset > 0 && m.content[offset-1] == '\r' { |
| charOffset-- |
| } |
| |
| return line, charOffset |
| } |
| |
| // Position returns the protocol position corresponding to the given offset. It |
| // returns false if offset is out of bounds for the file being mapped. |
| func (m *Mapper) Position(offset int) (protocol.Position, bool) { |
| l, c := m.LineColUTF16(offset) |
| if l < 0 { |
| return protocol.Position{}, false |
| } |
| return protocol.Position{ |
| Line: uint32(l), |
| Character: uint32(c), |
| }, true |
| } |
| |
| // Range returns the protocol range corresponding to the given start and end |
| // offsets. |
| func (m *Mapper) Range(start, end int) (protocol.Range, error) { |
| startPos, ok := m.Position(start) |
| if !ok { |
| return protocol.Range{}, errors.New("invalid start position") |
| } |
| endPos, ok := m.Position(end) |
| if !ok { |
| return protocol.Range{}, errors.New("invalid end position") |
| } |
| |
| return protocol.Range{Start: startPos, End: endPos}, nil |
| } |
| |
| // UTF16Len returns the UTF-16 length of the UTF-8 encoded content, were it to |
| // be re-encoded as UTF-16. |
| func UTF16len(buf []byte) int { |
| // This function copies buf, but microbenchmarks showed it to be faster than |
| // using utf8.DecodeRune due to inlining and avoiding bounds checks. |
| cnt := 0 |
| for _, r := range string(buf) { |
| cnt++ |
| if r >= 1<<16 { |
| cnt++ |
| } |
| } |
| return cnt |
| } |