blob: 561b3fa50a835d7174597f21d09c9f19e037922d [file] [log] [blame]
Ian Cottrell5c2858a2019-03-08 13:22:06 -05001// Copyright 2019 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5package span
6
7import (
Ian Cottrell2f43c6d2019-03-15 13:19:43 -04008 "fmt"
Ian Cottrell5c2858a2019-03-08 13:22:06 -05009 "unicode/utf16"
10 "unicode/utf8"
11)
12
13// ToUTF16Column calculates the utf16 column expressed by the point given the
14// supplied file contents.
15// This is used to convert from the native (always in bytes) column
16// representation and the utf16 counts used by some editors.
Ian Cottrell2f43c6d2019-03-15 13:19:43 -040017func ToUTF16Column(p Point, content []byte) (int, error) {
18 if content == nil {
19 return -1, fmt.Errorf("ToUTF16Column: missing content")
Ian Cottrell5c2858a2019-03-08 13:22:06 -050020 }
Ian Cottrell2f43c6d2019-03-15 13:19:43 -040021 if !p.HasPosition() {
22 return -1, fmt.Errorf("ToUTF16Column: point is missing position")
Ian Cottrell48d47c42019-03-13 18:00:13 -040023 }
Ian Cottrell2f43c6d2019-03-15 13:19:43 -040024 if !p.HasOffset() {
25 return -1, fmt.Errorf("ToUTF16Column: point is missing offset")
26 }
Paul Jolly7af74662019-04-29 18:36:30 +010027 offset := p.Offset() // 0-based
28 colZero := p.Column() - 1 // 0-based
29 if colZero == 0 {
30 // 0-based column 0, so it must be chr 1
Ian Cottrell2f43c6d2019-03-15 13:19:43 -040031 return 1, nil
Rebecca Stamblerc85f9fa2019-09-20 14:05:10 -040032 } else if colZero < 0 {
33 return -1, fmt.Errorf("ToUTF16Column: column is invalid (%v)", colZero)
Ian Cottrell2f43c6d2019-03-15 13:19:43 -040034 }
35 // work out the offset at the start of the line using the column
Paul Jolly7af74662019-04-29 18:36:30 +010036 lineOffset := offset - colZero
Ian Cottrell2f43c6d2019-03-15 13:19:43 -040037 if lineOffset < 0 || offset > len(content) {
38 return -1, fmt.Errorf("ToUTF16Column: offsets %v-%v outside file contents (%v)", lineOffset, offset, len(content))
Ian Cottrell5c2858a2019-03-08 13:22:06 -050039 }
Rebecca Stamblerf9398172019-04-18 16:55:24 -040040 // Use the offset to pick out the line start.
Rebecca Stambler4bf14f72019-04-23 15:08:15 -040041 // This cannot panic: offset > len(content) and lineOffset < offset.
Ian Cottrell5c2858a2019-03-08 13:22:06 -050042 start := content[lineOffset:]
Rebecca Stamblerf9398172019-04-18 16:55:24 -040043
44 // Now, truncate down to the supplied column.
Paul Jolly7af74662019-04-29 18:36:30 +010045 start = start[:colZero]
Rebecca Stamblerc85f9fa2019-09-20 14:05:10 -040046
Ian Cottrell5c2858a2019-03-08 13:22:06 -050047 // and count the number of utf16 characters
48 // in theory we could do this by hand more efficiently...
Paul Jolly7af74662019-04-29 18:36:30 +010049 return len(utf16.Encode([]rune(string(start)))) + 1, nil
Ian Cottrell5c2858a2019-03-08 13:22:06 -050050}
51
Ian Cottrell2f43c6d2019-03-15 13:19:43 -040052// FromUTF16Column advances the point by the utf16 character offset given the
53// supplied line contents.
Ian Cottrell5c2858a2019-03-08 13:22:06 -050054// This is used to convert from the utf16 counts used by some editors to the
55// native (always in bytes) column representation.
Ian Cottrell2f43c6d2019-03-15 13:19:43 -040056func FromUTF16Column(p Point, chr int, content []byte) (Point, error) {
57 if !p.HasOffset() {
58 return Point{}, fmt.Errorf("FromUTF16Column: point is missing offset")
Ian Cottrell5c2858a2019-03-08 13:22:06 -050059 }
Ian Cottrell2f43c6d2019-03-15 13:19:43 -040060 // if chr is 1 then no adjustment needed
61 if chr <= 1 {
62 return p, nil
63 }
Rebecca Stambleraa385af2019-04-16 16:19:03 -040064 if p.Offset() >= len(content) {
Ian Cottrellb5495a52019-04-25 17:07:38 -040065 return p, fmt.Errorf("FromUTF16Column: offset (%v) greater than length of content (%v)", p.Offset(), len(content))
Rebecca Stambleraa385af2019-04-16 16:19:03 -040066 }
Ian Cottrell2f43c6d2019-03-15 13:19:43 -040067 remains := content[p.Offset():]
68 // scan forward the specified number of characters
Ian Cottrell5c2858a2019-03-08 13:22:06 -050069 for count := 1; count < chr; count++ {
70 if len(remains) <= 0 {
Ian Cottrell2f43c6d2019-03-15 13:19:43 -040071 return Point{}, fmt.Errorf("FromUTF16Column: chr goes beyond the content")
Ian Cottrell5c2858a2019-03-08 13:22:06 -050072 }
73 r, w := utf8.DecodeRune(remains)
74 if r == '\n' {
Paul Jolly72ffa072019-07-04 09:55:57 +010075 // Per the LSP spec:
76 //
77 // > If the character value is greater than the line length it
78 // > defaults back to the line length.
79 break
Ian Cottrell5c2858a2019-03-08 13:22:06 -050080 }
81 remains = remains[w:]
82 if r >= 0x10000 {
83 // a two point rune
84 count++
85 // if we finished in a two point rune, do not advance past the first
86 if count >= chr {
87 break
88 }
89 }
Ian Cottrell2f43c6d2019-03-15 13:19:43 -040090 p.v.Column += w
91 p.v.Offset += w
Ian Cottrell5c2858a2019-03-08 13:22:06 -050092 }
Ian Cottrell2f43c6d2019-03-15 13:19:43 -040093 return p, nil
Ian Cottrell5c2858a2019-03-08 13:22:06 -050094}