// Copyright 2023 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

package protocol

// This file defines Mapper, which wraps a file content buffer
// ([]byte) and provides efficient conversion between every kind of
// position representation.
//
// gopls uses four main representations of position:
//
// 1. byte offsets, e.g. (start, end int), starting from zero.
//
// 2. go/token notation. Use these types when interacting directly
//    with the go/* syntax packages:
//
// 	token.Pos
// 	token.FileSet
// 	token.File
// 	safetoken.Range = (file token.File, start, end token.Pos)
//
//    Because File.Offset and File.Pos panic on invalid inputs,
//    we do not call them directly and instead use the safetoken package
//    for these conversions. This is enforced by a static check.
//
//    Beware also that the methods of token.File have two bugs for which
//    safetoken contain workarounds:
//    - #57490, whereby the parser may create ast.Nodes during error
//      recovery whose computed positions are out of bounds (EOF+1).
//    - #41029, whereby the wrong line number is returned for the EOF position.
//
// 3. the span package.
//
//    span.Point = (line, col8, offset).
//    span.Span = (uri URI, start, end span.Point)
//
//          Line and column are 1-based.
//          Columns are measured in bytes (UTF-8 codes).
//          All fields are optional.
//
//    These types are useful as intermediate conversions of validated
//    ranges (though MappedRange is superior as it is self contained
//    and universally convertible).  Since their fields are optional
//    they are also useful for parsing user-provided positions (e.g. in
//    the CLI) before we have access to file contents.
//
// 4. protocol, the LSP wire format.
//
//    protocol.Position = (Line, Character uint32)
//    protocol.Range = (start, end Position)
//    protocol.Location = (URI, protocol.Range)
//
//          Line and Character are 0-based.
//          Characters (columns) are measured in UTF-16 codes.
//
//    protocol.Mapper holds the (URI, Content) of a file, enabling
//    efficient mapping between byte offsets, span ranges, and
//    protocol ranges.
//
//    protocol.MappedRange holds a protocol.Mapper and valid (start,
//    end int) byte offsets, enabling infallible, efficient conversion
//    to any other format.

import (
	"bytes"
	"fmt"
	"go/ast"
	"go/token"
	"path/filepath"
	"sort"
	"strings"
	"sync"
	"unicode/utf8"

	"golang.org/x/tools/gopls/internal/lsp/safetoken"
	"golang.org/x/tools/gopls/internal/span"
	"golang.org/x/tools/internal/bug"
)

// A Mapper wraps the content of a file and provides mapping
// between byte offsets and notations of position such as:
//
//   - (line, col8) pairs, where col8 is a 1-based UTF-8 column number
//     (bytes), as used by the go/token and span packages.
//
//   - (line, col16) pairs, where col16 is a 1-based UTF-16 column
//     number, as used by the LSP protocol.
//
// All conversion methods are named "FromTo", where From and To are the two types.
// For example, the PointPosition method converts from a Point to a Position.
//
// Mapper does not intrinsically depend on go/token-based
// representations.  Use safetoken to map between token.Pos <=> byte
// offsets, or the convenience methods such as PosPosition,
// NodePosition, or NodeRange.
//
// See overview comments at top of this file.
type Mapper struct {
	URI     span.URI
	Content []byte

	// Line-number information is requested only for a tiny
	// fraction of Mappers, so we compute it lazily.
	// Call initLines() before accessing fields below.
	linesOnce sync.Once
	lineStart []int // byte offset of start of ith line (0-based); last=EOF iff \n-terminated
	nonASCII  bool

	// TODO(adonovan): adding an extra lineStart entry for EOF
	// might simplify every method that accesses it. Try it out.
}

// NewMapper creates a new mapper for the given URI and content.
func NewMapper(uri span.URI, content []byte) *Mapper {
	return &Mapper{URI: uri, Content: content}
}

// initLines populates the lineStart table.
func (m *Mapper) initLines() {
	m.linesOnce.Do(func() {
		nlines := bytes.Count(m.Content, []byte("\n"))
		m.lineStart = make([]int, 1, nlines+1) // initially []int{0}
		for offset, b := range m.Content {
			if b == '\n' {
				m.lineStart = append(m.lineStart, offset+1)
			}
			if b >= utf8.RuneSelf {
				m.nonASCII = true
			}
		}
	})
}

// -- conversions from span (UTF-8) domain --

// SpanLocation converts a (UTF-8) span to a protocol (UTF-16) range.
// Precondition: the URIs of SpanLocation and Mapper match.
func (m *Mapper) SpanLocation(s span.Span) (Location, error) {
	rng, err := m.SpanRange(s)
	if err != nil {
		return Location{}, err
	}
	return m.RangeLocation(rng), nil
}

// SpanRange converts a (UTF-8) span to a protocol (UTF-16) range.
// Precondition: the URIs of Span and Mapper match.
func (m *Mapper) SpanRange(s span.Span) (Range, error) {
	// Assert that we aren't using the wrong mapper.
	// We check only the base name, and case insensitively,
	// because we can't assume clean paths, no symbolic links,
	// case-sensitive directories. The authoritative answer
	// requires querying the file system, and we don't want
	// to do that.
	if !strings.EqualFold(filepath.Base(string(m.URI)), filepath.Base(string(s.URI()))) {
		return Range{}, bug.Errorf("mapper is for file %q instead of %q", m.URI, s.URI())
	}
	start, err := m.PointPosition(s.Start())
	if err != nil {
		return Range{}, fmt.Errorf("start: %w", err)
	}
	end, err := m.PointPosition(s.End())
	if err != nil {
		return Range{}, fmt.Errorf("end: %w", err)
	}
	return Range{Start: start, End: end}, nil
}

// PointPosition converts a valid span (UTF-8) point to a protocol (UTF-16) position.
func (m *Mapper) PointPosition(p span.Point) (Position, error) {
	if p.HasPosition() {
		line, col8 := p.Line()-1, p.Column()-1 // both 0-based
		m.initLines()
		if line >= len(m.lineStart) {
			return Position{}, fmt.Errorf("line number %d out of range (max %d)", line, len(m.lineStart))
		}
		offset := m.lineStart[line]
		end := offset + col8

		// Validate column.
		if end > len(m.Content) {
			return Position{}, fmt.Errorf("column is beyond end of file")
		} else if line+1 < len(m.lineStart) && end >= m.lineStart[line+1] {
			return Position{}, fmt.Errorf("column is beyond end of line")
		}

		char := UTF16Len(m.Content[offset:end])
		return Position{Line: uint32(line), Character: uint32(char)}, nil
	}
	if p.HasOffset() {
		return m.OffsetPosition(p.Offset())
	}
	return Position{}, fmt.Errorf("point has neither offset nor line/column")
}

// -- conversions from byte offsets --

// OffsetLocation converts a byte-offset interval to a protocol (UTF-16) location.
func (m *Mapper) OffsetLocation(start, end int) (Location, error) {
	rng, err := m.OffsetRange(start, end)
	if err != nil {
		return Location{}, err
	}
	return m.RangeLocation(rng), nil
}

// OffsetRange converts a byte-offset interval to a protocol (UTF-16) range.
func (m *Mapper) OffsetRange(start, end int) (Range, error) {
	if start > end {
		return Range{}, fmt.Errorf("start offset (%d) > end (%d)", start, end)
	}
	startPosition, err := m.OffsetPosition(start)
	if err != nil {
		return Range{}, fmt.Errorf("start: %v", err)
	}
	endPosition, err := m.OffsetPosition(end)
	if err != nil {
		return Range{}, fmt.Errorf("end: %v", err)
	}
	return Range{Start: startPosition, End: endPosition}, nil
}

// OffsetSpan converts a byte-offset interval to a (UTF-8) span.
// The resulting span contains line, column, and offset information.
func (m *Mapper) OffsetSpan(start, end int) (span.Span, error) {
	if start > end {
		return span.Span{}, fmt.Errorf("start offset (%d) > end (%d)", start, end)
	}
	startPoint, err := m.OffsetPoint(start)
	if err != nil {
		return span.Span{}, fmt.Errorf("start: %v", err)
	}
	endPoint, err := m.OffsetPoint(end)
	if err != nil {
		return span.Span{}, fmt.Errorf("end: %v", err)
	}
	return span.New(m.URI, startPoint, endPoint), nil
}

// OffsetPosition converts a byte offset to a protocol (UTF-16) position.
func (m *Mapper) OffsetPosition(offset int) (Position, error) {
	if !(0 <= offset && offset <= len(m.Content)) {
		return Position{}, fmt.Errorf("invalid offset %d (want 0-%d)", offset, len(m.Content))
	}
	// No error may be returned after this point,
	// even if the offset does not fall at a rune boundary.
	// (See panic in MappedRange.Range reachable.)

	line, col16 := m.lineCol16(offset)
	return Position{Line: uint32(line), Character: uint32(col16)}, nil
}

// lineCol16 converts a valid byte offset to line and UTF-16 column numbers, both 0-based.
func (m *Mapper) lineCol16(offset int) (int, int) {
	line, start, cr := m.line(offset)
	var col16 int
	if m.nonASCII {
		col16 = UTF16Len(m.Content[start:offset])
	} else {
		col16 = offset - start
	}
	if cr {
		col16-- // retreat from \r at line end
	}
	return line, col16
}

// lineCol8 converts a valid byte offset to line and UTF-8 column numbers, both 0-based.
func (m *Mapper) lineCol8(offset int) (int, int) {
	line, start, cr := m.line(offset)
	col8 := offset - start
	if cr {
		col8-- // retreat from \r at line end
	}
	return line, col8
}

// line returns:
// - the 0-based index of the line that encloses the (valid) byte offset;
// - the start offset of that line; and
// - whether the offset denotes a carriage return (\r) at line end.
func (m *Mapper) line(offset int) (int, int, bool) {
	m.initLines()
	// In effect, binary search returns a 1-based result.
	line := sort.Search(len(m.lineStart), func(i int) bool {
		return offset < m.lineStart[i]
	})

	// Adjustment for line-endings: \r|\n is the same as |\r\n.
	var eol int
	if line == len(m.lineStart) {
		eol = len(m.Content) // EOF
	} else {
		eol = m.lineStart[line] - 1
	}
	cr := offset == eol && offset > 0 && m.Content[offset-1] == '\r'

	line-- // 0-based

	return line, m.lineStart[line], cr
}

// OffsetPoint converts a byte offset to a span (UTF-8) point.
// The resulting point contains line, column, and offset information.
func (m *Mapper) OffsetPoint(offset int) (span.Point, error) {
	if !(0 <= offset && offset <= len(m.Content)) {
		return span.Point{}, fmt.Errorf("invalid offset %d (want 0-%d)", offset, len(m.Content))
	}
	line, col8 := m.lineCol8(offset)
	return span.NewPoint(line+1, col8+1, offset), nil
}

// -- conversions from protocol (UTF-16) domain --

// LocationSpan converts a protocol (UTF-16) Location to a (UTF-8) span.
// Precondition: the URIs of Location and Mapper match.
func (m *Mapper) LocationSpan(l Location) (span.Span, error) {
	// TODO(adonovan): check that l.URI matches m.URI.
	return m.RangeSpan(l.Range)
}

// RangeSpan converts a protocol (UTF-16) range to a (UTF-8) span.
// The resulting span has valid Positions and Offsets.
func (m *Mapper) RangeSpan(r Range) (span.Span, error) {
	start, end, err := m.RangeOffsets(r)
	if err != nil {
		return span.Span{}, err
	}
	return m.OffsetSpan(start, end)
}

// RangeOffsets converts a protocol (UTF-16) range to start/end byte offsets.
func (m *Mapper) RangeOffsets(r Range) (int, int, error) {
	start, err := m.PositionOffset(r.Start)
	if err != nil {
		return 0, 0, err
	}
	end, err := m.PositionOffset(r.End)
	if err != nil {
		return 0, 0, err
	}
	return start, end, nil
}

// PositionOffset converts a protocol (UTF-16) position to a byte offset.
func (m *Mapper) PositionOffset(p Position) (int, error) {
	m.initLines()

	// Validate line number.
	if p.Line > uint32(len(m.lineStart)) {
		return 0, fmt.Errorf("line number %d out of range 0-%d", p.Line, len(m.lineStart))
	} else if p.Line == uint32(len(m.lineStart)) {
		if p.Character == 0 {
			return len(m.Content), nil // EOF
		}
		return 0, fmt.Errorf("column is beyond end of file")
	}

	offset := m.lineStart[p.Line]
	content := m.Content[offset:] // rest of file from start of enclosing line

	// Advance bytes up to the required number of UTF-16 codes.
	col8 := 0
	for col16 := 0; col16 < int(p.Character); col16++ {
		r, sz := utf8.DecodeRune(content)
		if sz == 0 {
			return 0, fmt.Errorf("column is beyond end of file")
		}
		if r == '\n' {
			return 0, fmt.Errorf("column is beyond end of line")
		}
		if sz == 1 && r == utf8.RuneError {
			return 0, fmt.Errorf("buffer contains invalid UTF-8 text")
		}
		content = content[sz:]

		if r >= 0x10000 {
			col16++ // rune was encoded by a pair of surrogate UTF-16 codes

			if col16 == int(p.Character) {
				break // requested position is in the middle of a rune
			}
		}
		col8 += sz
	}
	return offset + col8, nil
}

// PositionPoint converts a protocol (UTF-16) position to a span (UTF-8) point.
// The resulting point has a valid Position and Offset.
func (m *Mapper) PositionPoint(p Position) (span.Point, error) {
	offset, err := m.PositionOffset(p)
	if err != nil {
		return span.Point{}, err
	}
	line, col8 := m.lineCol8(offset)

	return span.NewPoint(line+1, col8+1, offset), nil
}

// -- go/token domain convenience methods --

// PosPosition converts a token pos to a protocol (UTF-16) position.
func (m *Mapper) PosPosition(tf *token.File, pos token.Pos) (Position, error) {
	offset, err := safetoken.Offset(tf, pos)
	if err != nil {
		return Position{}, err
	}
	return m.OffsetPosition(offset)
}

// PosPosition converts a token range to a protocol (UTF-16) location.
func (m *Mapper) PosLocation(tf *token.File, start, end token.Pos) (Location, error) {
	startOffset, endOffset, err := safetoken.Offsets(tf, start, end)
	if err != nil {
		return Location{}, err
	}
	rng, err := m.OffsetRange(startOffset, endOffset)
	if err != nil {
		return Location{}, err
	}
	return m.RangeLocation(rng), nil
}

// PosPosition converts a token range to a protocol (UTF-16) range.
func (m *Mapper) PosRange(tf *token.File, start, end token.Pos) (Range, error) {
	startOffset, endOffset, err := safetoken.Offsets(tf, start, end)
	if err != nil {
		return Range{}, err
	}
	return m.OffsetRange(startOffset, endOffset)
}

// PosPosition converts a syntax node range to a protocol (UTF-16) range.
func (m *Mapper) NodeRange(tf *token.File, node ast.Node) (Range, error) {
	return m.PosRange(tf, node.Pos(), node.End())
}

// RangeLocation pairs a protocol Range with its URI, in a Location.
func (m *Mapper) RangeLocation(rng Range) Location {
	return Location{URI: URIFromSpanURI(m.URI), Range: rng}
}

// -- MappedRange --

// OffsetMappedRange returns a MappedRange for the given byte offsets.
// A MappedRange can be converted to any other form.
func (m *Mapper) OffsetMappedRange(start, end int) (MappedRange, error) {
	if !(0 <= start && start <= end && end <= len(m.Content)) {
		return MappedRange{}, fmt.Errorf("invalid offsets (%d, %d) (file %s has size %d)", start, end, m.URI, len(m.Content))
	}
	return MappedRange{m, start, end}, nil
}

// A MappedRange represents a valid byte-offset range of a file.
// Through its Mapper it can be converted into other forms such
// as protocol.Range or span.Span.
//
// Construct one by calling Mapper.OffsetMappedRange with start/end offsets.
// From the go/token domain, call safetoken.Offsets first,
// or use a helper such as ParsedGoFile.MappedPosRange.
type MappedRange struct {
	Mapper     *Mapper
	start, end int // valid byte offsets:  0 <= start <= end <= len(Mapper.Content)
}

// Offsets returns the (start, end) byte offsets of this range.
func (mr MappedRange) Offsets() (start, end int) { return mr.start, mr.end }

// -- convenience functions --

// URI returns the URI of the range's file.
func (mr MappedRange) URI() span.URI {
	return mr.Mapper.URI
}

// Range returns the range in protocol (UTF-16) form.
func (mr MappedRange) Range() Range {
	rng, err := mr.Mapper.OffsetRange(mr.start, mr.end)
	if err != nil {
		panic(err) // can't happen
	}
	return rng
}

// Location returns the range in protocol location (UTF-16) form.
func (mr MappedRange) Location() Location {
	return mr.Mapper.RangeLocation(mr.Range())
}

// Span returns the range in span (UTF-8) form.
func (mr MappedRange) Span() span.Span {
	spn, err := mr.Mapper.OffsetSpan(mr.start, mr.end)
	if err != nil {
		panic(err) // can't happen
	}
	return spn
}

// String formats the range in span (UTF-8) notation.
func (mr MappedRange) String() string {
	return fmt.Sprint(mr.Span())
}
