internal/span/utf16.go - tools - Git at Google

 // Copyright 2019 The Go Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.

 package span

 import (
 	"fmt"
 	"unicode/utf16"
 	"unicode/utf8"
 )

 // ToUTF16Column calculates the utf16 column expressed by the point given the
 // supplied file contents.
 // This is used to convert from the native (always in bytes) column
 // representation and the utf16 counts used by some editors.
 func ToUTF16Column(p Point, content []byte) (int, error) {
 	if content == nil {
 		return -1, fmt.Errorf("ToUTF16Column: missing content")
 	}
 	if !p.HasPosition() {
 		return -1, fmt.Errorf("ToUTF16Column: point is missing position")
 	}
 	if !p.HasOffset() {
 		return -1, fmt.Errorf("ToUTF16Column: point is missing offset")
 	}
 	offset := p.Offset()      // 0-based
 	colZero := p.Column() - 1 // 0-based
 	if colZero == 0 {
 		// 0-based column 0, so it must be chr 1
 		return 1, nil
 	} else if colZero < 0 {
 		return -1, fmt.Errorf("ToUTF16Column: column is invalid (%v)", colZero)
 	}
 	// work out the offset at the start of the line using the column
 	lineOffset := offset - colZero
 	if lineOffset < 0 || offset > len(content) {
 		return -1, fmt.Errorf("ToUTF16Column: offsets %v-%v outside file contents (%v)", lineOffset, offset, len(content))
 	}
 	// Use the offset to pick out the line start.
 	// This cannot panic: offset > len(content) and lineOffset < offset.
 	start := content[lineOffset:]

 	// Now, truncate down to the supplied column.
 	start = start[:colZero]

 	// and count the number of utf16 characters
 	// in theory we could do this by hand more efficiently...
 	return len(utf16.Encode([]rune(string(start)))) + 1, nil
 }

 // FromUTF16Column advances the point by the utf16 character offset given the
 // supplied line contents.
 // This is used to convert from the utf16 counts used by some editors to the
 // native (always in bytes) column representation.
 func FromUTF16Column(p Point, chr int, content []byte) (Point, error) {
 	if !p.HasOffset() {
 		return Point{}, fmt.Errorf("FromUTF16Column: point is missing offset")
 	}
 	// if chr is 1 then no adjustment needed
 	if chr <= 1 {
 		return p, nil
 	}
 	if p.Offset() >= len(content) {
 		return p, fmt.Errorf("FromUTF16Column: offset (%v) greater than length of content (%v)", p.Offset(), len(content))
 	}
 	remains := content[p.Offset():]
 	// scan forward the specified number of characters
 	for count := 1; count < chr; count++ {
 		if len(remains) <= 0 {
 			return Point{}, fmt.Errorf("FromUTF16Column: chr goes beyond the content")
 		}
 		r, w := utf8.DecodeRune(remains)
 		if r == '\n' {
 			// Per the LSP spec:
 			//
 			// > If the character value is greater than the line length it
 			// > defaults back to the line length.
 			break
 		}
 		remains = remains[w:]
 		if r >= 0x10000 {
 			// a two point rune
 			count++
 			// if we finished in a two point rune, do not advance past the first
 			if count >= chr {
 				break
 			}
 		}
 		p.v.Column += w
 		p.v.Offset += w
 	}
 	return p, nil
 }
	// Copyright 2019 The Go Authors. All rights reserved.
	// Use of this source code is governed by a BSD-style
	// license that can be found in the LICENSE file.

	package span

	import (
	"fmt"
	"unicode/utf16"
	"unicode/utf8"
	)

	// ToUTF16Column calculates the utf16 column expressed by the point given the
	// supplied file contents.
	// This is used to convert from the native (always in bytes) column
	// representation and the utf16 counts used by some editors.
	func ToUTF16Column(p Point, content []byte) (int, error) {
	if content == nil {
	return -1, fmt.Errorf("ToUTF16Column: missing content")
	}
	if !p.HasPosition() {
	return -1, fmt.Errorf("ToUTF16Column: point is missing position")
	}
	if !p.HasOffset() {
	return -1, fmt.Errorf("ToUTF16Column: point is missing offset")
	}
	offset := p.Offset() // 0-based
	colZero := p.Column() - 1 // 0-based
	if colZero == 0 {
	// 0-based column 0, so it must be chr 1
	return 1, nil
	} else if colZero < 0 {
	return -1, fmt.Errorf("ToUTF16Column: column is invalid (%v)", colZero)
	}
	// work out the offset at the start of the line using the column
	lineOffset := offset - colZero
	if lineOffset < 0 \|\| offset > len(content) {
	return -1, fmt.Errorf("ToUTF16Column: offsets %v-%v outside file contents (%v)", lineOffset, offset, len(content))
	}
	// Use the offset to pick out the line start.
	// This cannot panic: offset > len(content) and lineOffset < offset.
	start := content[lineOffset:]

	// Now, truncate down to the supplied column.
	start = start[:colZero]

	// and count the number of utf16 characters
	// in theory we could do this by hand more efficiently...
	return len(utf16.Encode([]rune(string(start)))) + 1, nil
	}

	// FromUTF16Column advances the point by the utf16 character offset given the
	// supplied line contents.
	// This is used to convert from the utf16 counts used by some editors to the
	// native (always in bytes) column representation.
	func FromUTF16Column(p Point, chr int, content []byte) (Point, error) {
	if !p.HasOffset() {
	return Point{}, fmt.Errorf("FromUTF16Column: point is missing offset")
	}
	// if chr is 1 then no adjustment needed
	if chr <= 1 {
	return p, nil
	}
	if p.Offset() >= len(content) {
	return p, fmt.Errorf("FromUTF16Column: offset (%v) greater than length of content (%v)", p.Offset(), len(content))
	}
	remains := content[p.Offset():]
	// scan forward the specified number of characters
	for count := 1; count < chr; count++ {
	if len(remains) <= 0 {
	return Point{}, fmt.Errorf("FromUTF16Column: chr goes beyond the content")
	}
	r, w := utf8.DecodeRune(remains)
	if r == '\n' {
	// Per the LSP spec:
	//
	// > If the character value is greater than the line length it
	// > defaults back to the line length.
	break
	}
	remains = remains[w:]
	if r >= 0x10000 {
	// a two point rune
	count++
	// if we finished in a two point rune, do not advance past the first
	if count >= chr {
	break
	}
	}
	p.v.Column += w
	p.v.Offset += w
	}
	return p, nil
	}