Ian Cottrell | 5c2858a | 2019-03-08 13:22:06 -0500 | [diff] [blame] | 1 | // Copyright 2019 The Go Authors. All rights reserved. |
| 2 | // Use of this source code is governed by a BSD-style |
| 3 | // license that can be found in the LICENSE file. |
| 4 | |
| 5 | package span |
| 6 | |
| 7 | import ( |
Ian Cottrell | 2f43c6d | 2019-03-15 13:19:43 -0400 | [diff] [blame] | 8 | "fmt" |
Ian Cottrell | 5c2858a | 2019-03-08 13:22:06 -0500 | [diff] [blame] | 9 | "unicode/utf16" |
| 10 | "unicode/utf8" |
| 11 | ) |
| 12 | |
| 13 | // ToUTF16Column calculates the utf16 column expressed by the point given the |
| 14 | // supplied file contents. |
| 15 | // This is used to convert from the native (always in bytes) column |
| 16 | // representation and the utf16 counts used by some editors. |
Ian Cottrell | 2f43c6d | 2019-03-15 13:19:43 -0400 | [diff] [blame] | 17 | func ToUTF16Column(p Point, content []byte) (int, error) { |
| 18 | if content == nil { |
| 19 | return -1, fmt.Errorf("ToUTF16Column: missing content") |
Ian Cottrell | 5c2858a | 2019-03-08 13:22:06 -0500 | [diff] [blame] | 20 | } |
Ian Cottrell | 2f43c6d | 2019-03-15 13:19:43 -0400 | [diff] [blame] | 21 | if !p.HasPosition() { |
| 22 | return -1, fmt.Errorf("ToUTF16Column: point is missing position") |
Ian Cottrell | 48d47c4 | 2019-03-13 18:00:13 -0400 | [diff] [blame] | 23 | } |
Ian Cottrell | 2f43c6d | 2019-03-15 13:19:43 -0400 | [diff] [blame] | 24 | if !p.HasOffset() { |
| 25 | return -1, fmt.Errorf("ToUTF16Column: point is missing offset") |
| 26 | } |
| 27 | offset := p.Offset() |
| 28 | col := p.Column() |
| 29 | if col == 1 { |
| 30 | // column 1, so it must be chr 1 |
| 31 | return 1, nil |
| 32 | } |
| 33 | // work out the offset at the start of the line using the column |
| 34 | lineOffset := offset - (col - 1) |
| 35 | if lineOffset < 0 || offset > len(content) { |
| 36 | return -1, fmt.Errorf("ToUTF16Column: offsets %v-%v outside file contents (%v)", lineOffset, offset, len(content)) |
Ian Cottrell | 5c2858a | 2019-03-08 13:22:06 -0500 | [diff] [blame] | 37 | } |
Rebecca Stambler | f939817 | 2019-04-18 16:55:24 -0400 | [diff] [blame^] | 38 | // Use the offset to pick out the line start. |
| 39 | // This cannot panic: offset > len(content) and lineOffset < offset. |
Ian Cottrell | 5c2858a | 2019-03-08 13:22:06 -0500 | [diff] [blame] | 40 | start := content[lineOffset:] |
Rebecca Stambler | f939817 | 2019-04-18 16:55:24 -0400 | [diff] [blame^] | 41 | |
| 42 | // Now, truncate down to the supplied column. |
| 43 | if col >= len(start) { |
| 44 | return -1, fmt.Errorf("ToUTF16Column: line (%v) is shorter than column (%v)", len(start), col) |
| 45 | } |
Ian Cottrell | 2f43c6d | 2019-03-15 13:19:43 -0400 | [diff] [blame] | 46 | start = start[:col] |
Ian Cottrell | 5c2858a | 2019-03-08 13:22:06 -0500 | [diff] [blame] | 47 | // and count the number of utf16 characters |
| 48 | // in theory we could do this by hand more efficiently... |
Ian Cottrell | 2f43c6d | 2019-03-15 13:19:43 -0400 | [diff] [blame] | 49 | return len(utf16.Encode([]rune(string(start)))), nil |
Ian Cottrell | 5c2858a | 2019-03-08 13:22:06 -0500 | [diff] [blame] | 50 | } |
| 51 | |
Ian Cottrell | 2f43c6d | 2019-03-15 13:19:43 -0400 | [diff] [blame] | 52 | // FromUTF16Column advances the point by the utf16 character offset given the |
| 53 | // supplied line contents. |
Ian Cottrell | 5c2858a | 2019-03-08 13:22:06 -0500 | [diff] [blame] | 54 | // This is used to convert from the utf16 counts used by some editors to the |
| 55 | // native (always in bytes) column representation. |
Ian Cottrell | 2f43c6d | 2019-03-15 13:19:43 -0400 | [diff] [blame] | 56 | func FromUTF16Column(p Point, chr int, content []byte) (Point, error) { |
| 57 | if !p.HasOffset() { |
| 58 | return Point{}, fmt.Errorf("FromUTF16Column: point is missing offset") |
Ian Cottrell | 5c2858a | 2019-03-08 13:22:06 -0500 | [diff] [blame] | 59 | } |
Ian Cottrell | 2f43c6d | 2019-03-15 13:19:43 -0400 | [diff] [blame] | 60 | // if chr is 1 then no adjustment needed |
| 61 | if chr <= 1 { |
| 62 | return p, nil |
| 63 | } |
Rebecca Stambler | aa385af | 2019-04-16 16:19:03 -0400 | [diff] [blame] | 64 | if p.Offset() >= len(content) { |
| 65 | return p, fmt.Errorf("offset (%v) greater than length of content (%v)", p.Offset(), len(content)) |
| 66 | } |
Ian Cottrell | 2f43c6d | 2019-03-15 13:19:43 -0400 | [diff] [blame] | 67 | remains := content[p.Offset():] |
| 68 | // scan forward the specified number of characters |
Ian Cottrell | 5c2858a | 2019-03-08 13:22:06 -0500 | [diff] [blame] | 69 | for count := 1; count < chr; count++ { |
| 70 | if len(remains) <= 0 { |
Ian Cottrell | 2f43c6d | 2019-03-15 13:19:43 -0400 | [diff] [blame] | 71 | return Point{}, fmt.Errorf("FromUTF16Column: chr goes beyond the content") |
Ian Cottrell | 5c2858a | 2019-03-08 13:22:06 -0500 | [diff] [blame] | 72 | } |
| 73 | r, w := utf8.DecodeRune(remains) |
| 74 | if r == '\n' { |
Ian Cottrell | 2f43c6d | 2019-03-15 13:19:43 -0400 | [diff] [blame] | 75 | return Point{}, fmt.Errorf("FromUTF16Column: chr goes beyond the line") |
Ian Cottrell | 5c2858a | 2019-03-08 13:22:06 -0500 | [diff] [blame] | 76 | } |
| 77 | remains = remains[w:] |
| 78 | if r >= 0x10000 { |
| 79 | // a two point rune |
| 80 | count++ |
| 81 | // if we finished in a two point rune, do not advance past the first |
| 82 | if count >= chr { |
| 83 | break |
| 84 | } |
| 85 | } |
Ian Cottrell | 2f43c6d | 2019-03-15 13:19:43 -0400 | [diff] [blame] | 86 | p.v.Column += w |
| 87 | p.v.Offset += w |
Ian Cottrell | 5c2858a | 2019-03-08 13:22:06 -0500 | [diff] [blame] | 88 | } |
Ian Cottrell | 2f43c6d | 2019-03-15 13:19:43 -0400 | [diff] [blame] | 89 | return p, nil |
Ian Cottrell | 5c2858a | 2019-03-08 13:22:06 -0500 | [diff] [blame] | 90 | } |