Ian Cottrell | 5c2858a | 2019-03-08 13:22:06 -0500 | [diff] [blame] | 1 | // Copyright 2019 The Go Authors. All rights reserved. |
| 2 | // Use of this source code is governed by a BSD-style |
| 3 | // license that can be found in the LICENSE file. |
| 4 | |
| 5 | package span |
| 6 | |
| 7 | import ( |
Ian Cottrell | 2f43c6d | 2019-03-15 13:19:43 -0400 | [diff] [blame] | 8 | "fmt" |
Ian Cottrell | 5c2858a | 2019-03-08 13:22:06 -0500 | [diff] [blame] | 9 | "unicode/utf16" |
| 10 | "unicode/utf8" |
| 11 | ) |
| 12 | |
| 13 | // ToUTF16Column calculates the utf16 column expressed by the point given the |
| 14 | // supplied file contents. |
| 15 | // This is used to convert from the native (always in bytes) column |
| 16 | // representation and the utf16 counts used by some editors. |
Ian Cottrell | 2f43c6d | 2019-03-15 13:19:43 -0400 | [diff] [blame] | 17 | func ToUTF16Column(p Point, content []byte) (int, error) { |
| 18 | if content == nil { |
| 19 | return -1, fmt.Errorf("ToUTF16Column: missing content") |
Ian Cottrell | 5c2858a | 2019-03-08 13:22:06 -0500 | [diff] [blame] | 20 | } |
Ian Cottrell | 2f43c6d | 2019-03-15 13:19:43 -0400 | [diff] [blame] | 21 | if !p.HasPosition() { |
| 22 | return -1, fmt.Errorf("ToUTF16Column: point is missing position") |
Ian Cottrell | 48d47c4 | 2019-03-13 18:00:13 -0400 | [diff] [blame] | 23 | } |
Ian Cottrell | 2f43c6d | 2019-03-15 13:19:43 -0400 | [diff] [blame] | 24 | if !p.HasOffset() { |
| 25 | return -1, fmt.Errorf("ToUTF16Column: point is missing offset") |
| 26 | } |
Paul Jolly | 7af7466 | 2019-04-29 18:36:30 +0100 | [diff] [blame] | 27 | offset := p.Offset() // 0-based |
| 28 | colZero := p.Column() - 1 // 0-based |
| 29 | if colZero == 0 { |
| 30 | // 0-based column 0, so it must be chr 1 |
Ian Cottrell | 2f43c6d | 2019-03-15 13:19:43 -0400 | [diff] [blame] | 31 | return 1, nil |
Rebecca Stambler | c85f9fa | 2019-09-20 14:05:10 -0400 | [diff] [blame] | 32 | } else if colZero < 0 { |
| 33 | return -1, fmt.Errorf("ToUTF16Column: column is invalid (%v)", colZero) |
Ian Cottrell | 2f43c6d | 2019-03-15 13:19:43 -0400 | [diff] [blame] | 34 | } |
| 35 | // work out the offset at the start of the line using the column |
Paul Jolly | 7af7466 | 2019-04-29 18:36:30 +0100 | [diff] [blame] | 36 | lineOffset := offset - colZero |
Ian Cottrell | 2f43c6d | 2019-03-15 13:19:43 -0400 | [diff] [blame] | 37 | if lineOffset < 0 || offset > len(content) { |
| 38 | return -1, fmt.Errorf("ToUTF16Column: offsets %v-%v outside file contents (%v)", lineOffset, offset, len(content)) |
Ian Cottrell | 5c2858a | 2019-03-08 13:22:06 -0500 | [diff] [blame] | 39 | } |
Rebecca Stambler | f939817 | 2019-04-18 16:55:24 -0400 | [diff] [blame] | 40 | // Use the offset to pick out the line start. |
Rebecca Stambler | 4bf14f7 | 2019-04-23 15:08:15 -0400 | [diff] [blame] | 41 | // This cannot panic: offset > len(content) and lineOffset < offset. |
Ian Cottrell | 5c2858a | 2019-03-08 13:22:06 -0500 | [diff] [blame] | 42 | start := content[lineOffset:] |
Rebecca Stambler | f939817 | 2019-04-18 16:55:24 -0400 | [diff] [blame] | 43 | |
| 44 | // Now, truncate down to the supplied column. |
Paul Jolly | 7af7466 | 2019-04-29 18:36:30 +0100 | [diff] [blame] | 45 | start = start[:colZero] |
Rebecca Stambler | c85f9fa | 2019-09-20 14:05:10 -0400 | [diff] [blame] | 46 | |
Ian Cottrell | 5c2858a | 2019-03-08 13:22:06 -0500 | [diff] [blame] | 47 | // and count the number of utf16 characters |
| 48 | // in theory we could do this by hand more efficiently... |
Paul Jolly | 7af7466 | 2019-04-29 18:36:30 +0100 | [diff] [blame] | 49 | return len(utf16.Encode([]rune(string(start)))) + 1, nil |
Ian Cottrell | 5c2858a | 2019-03-08 13:22:06 -0500 | [diff] [blame] | 50 | } |
| 51 | |
Ian Cottrell | 2f43c6d | 2019-03-15 13:19:43 -0400 | [diff] [blame] | 52 | // FromUTF16Column advances the point by the utf16 character offset given the |
| 53 | // supplied line contents. |
Ian Cottrell | 5c2858a | 2019-03-08 13:22:06 -0500 | [diff] [blame] | 54 | // This is used to convert from the utf16 counts used by some editors to the |
| 55 | // native (always in bytes) column representation. |
Ian Cottrell | 2f43c6d | 2019-03-15 13:19:43 -0400 | [diff] [blame] | 56 | func FromUTF16Column(p Point, chr int, content []byte) (Point, error) { |
| 57 | if !p.HasOffset() { |
| 58 | return Point{}, fmt.Errorf("FromUTF16Column: point is missing offset") |
Ian Cottrell | 5c2858a | 2019-03-08 13:22:06 -0500 | [diff] [blame] | 59 | } |
Ian Cottrell | 2f43c6d | 2019-03-15 13:19:43 -0400 | [diff] [blame] | 60 | // if chr is 1 then no adjustment needed |
| 61 | if chr <= 1 { |
| 62 | return p, nil |
| 63 | } |
Rebecca Stambler | aa385af | 2019-04-16 16:19:03 -0400 | [diff] [blame] | 64 | if p.Offset() >= len(content) { |
Ian Cottrell | b5495a5 | 2019-04-25 17:07:38 -0400 | [diff] [blame] | 65 | return p, fmt.Errorf("FromUTF16Column: offset (%v) greater than length of content (%v)", p.Offset(), len(content)) |
Rebecca Stambler | aa385af | 2019-04-16 16:19:03 -0400 | [diff] [blame] | 66 | } |
Ian Cottrell | 2f43c6d | 2019-03-15 13:19:43 -0400 | [diff] [blame] | 67 | remains := content[p.Offset():] |
| 68 | // scan forward the specified number of characters |
Ian Cottrell | 5c2858a | 2019-03-08 13:22:06 -0500 | [diff] [blame] | 69 | for count := 1; count < chr; count++ { |
| 70 | if len(remains) <= 0 { |
Ian Cottrell | 2f43c6d | 2019-03-15 13:19:43 -0400 | [diff] [blame] | 71 | return Point{}, fmt.Errorf("FromUTF16Column: chr goes beyond the content") |
Ian Cottrell | 5c2858a | 2019-03-08 13:22:06 -0500 | [diff] [blame] | 72 | } |
| 73 | r, w := utf8.DecodeRune(remains) |
| 74 | if r == '\n' { |
Paul Jolly | 72ffa07 | 2019-07-04 09:55:57 +0100 | [diff] [blame] | 75 | // Per the LSP spec: |
| 76 | // |
| 77 | // > If the character value is greater than the line length it |
| 78 | // > defaults back to the line length. |
| 79 | break |
Ian Cottrell | 5c2858a | 2019-03-08 13:22:06 -0500 | [diff] [blame] | 80 | } |
| 81 | remains = remains[w:] |
| 82 | if r >= 0x10000 { |
| 83 | // a two point rune |
| 84 | count++ |
| 85 | // if we finished in a two point rune, do not advance past the first |
| 86 | if count >= chr { |
| 87 | break |
| 88 | } |
| 89 | } |
Ian Cottrell | 2f43c6d | 2019-03-15 13:19:43 -0400 | [diff] [blame] | 90 | p.v.Column += w |
| 91 | p.v.Offset += w |
Ian Cottrell | 5c2858a | 2019-03-08 13:22:06 -0500 | [diff] [blame] | 92 | } |
Ian Cottrell | 2f43c6d | 2019-03-15 13:19:43 -0400 | [diff] [blame] | 93 | return p, nil |
Ian Cottrell | 5c2858a | 2019-03-08 13:22:06 -0500 | [diff] [blame] | 94 | } |