blob: 4439acb6ff6978056f9a1850380276ddd461b52f [file] [log] [blame]
Ian Cottrell5c2858a2019-03-08 13:22:06 -05001// Copyright 2019 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5package span
6
7import (
Ian Cottrell2f43c6d2019-03-15 13:19:43 -04008 "fmt"
Ian Cottrell5c2858a2019-03-08 13:22:06 -05009 "unicode/utf16"
10 "unicode/utf8"
11)
12
13// ToUTF16Column calculates the utf16 column expressed by the point given the
14// supplied file contents.
15// This is used to convert from the native (always in bytes) column
16// representation and the utf16 counts used by some editors.
Ian Cottrell2f43c6d2019-03-15 13:19:43 -040017func ToUTF16Column(p Point, content []byte) (int, error) {
18 if content == nil {
19 return -1, fmt.Errorf("ToUTF16Column: missing content")
Ian Cottrell5c2858a2019-03-08 13:22:06 -050020 }
Ian Cottrell2f43c6d2019-03-15 13:19:43 -040021 if !p.HasPosition() {
22 return -1, fmt.Errorf("ToUTF16Column: point is missing position")
Ian Cottrell48d47c42019-03-13 18:00:13 -040023 }
Ian Cottrell2f43c6d2019-03-15 13:19:43 -040024 if !p.HasOffset() {
25 return -1, fmt.Errorf("ToUTF16Column: point is missing offset")
26 }
27 offset := p.Offset()
28 col := p.Column()
29 if col == 1 {
30 // column 1, so it must be chr 1
31 return 1, nil
32 }
33 // work out the offset at the start of the line using the column
34 lineOffset := offset - (col - 1)
35 if lineOffset < 0 || offset > len(content) {
36 return -1, fmt.Errorf("ToUTF16Column: offsets %v-%v outside file contents (%v)", lineOffset, offset, len(content))
Ian Cottrell5c2858a2019-03-08 13:22:06 -050037 }
Rebecca Stamblerf9398172019-04-18 16:55:24 -040038 // Use the offset to pick out the line start.
39 // This cannot panic: offset > len(content) and lineOffset < offset.
Ian Cottrell5c2858a2019-03-08 13:22:06 -050040 start := content[lineOffset:]
Rebecca Stamblerf9398172019-04-18 16:55:24 -040041
42 // Now, truncate down to the supplied column.
43 if col >= len(start) {
44 return -1, fmt.Errorf("ToUTF16Column: line (%v) is shorter than column (%v)", len(start), col)
45 }
Ian Cottrell2f43c6d2019-03-15 13:19:43 -040046 start = start[:col]
Ian Cottrell5c2858a2019-03-08 13:22:06 -050047 // and count the number of utf16 characters
48 // in theory we could do this by hand more efficiently...
Ian Cottrell2f43c6d2019-03-15 13:19:43 -040049 return len(utf16.Encode([]rune(string(start)))), nil
Ian Cottrell5c2858a2019-03-08 13:22:06 -050050}
51
Ian Cottrell2f43c6d2019-03-15 13:19:43 -040052// FromUTF16Column advances the point by the utf16 character offset given the
53// supplied line contents.
Ian Cottrell5c2858a2019-03-08 13:22:06 -050054// This is used to convert from the utf16 counts used by some editors to the
55// native (always in bytes) column representation.
Ian Cottrell2f43c6d2019-03-15 13:19:43 -040056func FromUTF16Column(p Point, chr int, content []byte) (Point, error) {
57 if !p.HasOffset() {
58 return Point{}, fmt.Errorf("FromUTF16Column: point is missing offset")
Ian Cottrell5c2858a2019-03-08 13:22:06 -050059 }
Ian Cottrell2f43c6d2019-03-15 13:19:43 -040060 // if chr is 1 then no adjustment needed
61 if chr <= 1 {
62 return p, nil
63 }
Rebecca Stambleraa385af2019-04-16 16:19:03 -040064 if p.Offset() >= len(content) {
65 return p, fmt.Errorf("offset (%v) greater than length of content (%v)", p.Offset(), len(content))
66 }
Ian Cottrell2f43c6d2019-03-15 13:19:43 -040067 remains := content[p.Offset():]
68 // scan forward the specified number of characters
Ian Cottrell5c2858a2019-03-08 13:22:06 -050069 for count := 1; count < chr; count++ {
70 if len(remains) <= 0 {
Ian Cottrell2f43c6d2019-03-15 13:19:43 -040071 return Point{}, fmt.Errorf("FromUTF16Column: chr goes beyond the content")
Ian Cottrell5c2858a2019-03-08 13:22:06 -050072 }
73 r, w := utf8.DecodeRune(remains)
74 if r == '\n' {
Ian Cottrell2f43c6d2019-03-15 13:19:43 -040075 return Point{}, fmt.Errorf("FromUTF16Column: chr goes beyond the line")
Ian Cottrell5c2858a2019-03-08 13:22:06 -050076 }
77 remains = remains[w:]
78 if r >= 0x10000 {
79 // a two point rune
80 count++
81 // if we finished in a two point rune, do not advance past the first
82 if count >= chr {
83 break
84 }
85 }
Ian Cottrell2f43c6d2019-03-15 13:19:43 -040086 p.v.Column += w
87 p.v.Offset += w
Ian Cottrell5c2858a2019-03-08 13:22:06 -050088 }
Ian Cottrell2f43c6d2019-03-15 13:19:43 -040089 return p, nil
Ian Cottrell5c2858a2019-03-08 13:22:06 -050090}