internal/span: fix another off-by-one in ToUTF16Column
The current tests contain a bug in the priming of funnyString; the
subslicing leaves the resulting content slice with a capacity greater
than its length. This allowed a bug ToUTF16Column to sneak through where
we were not using 0-based column as the offset within the line.
Fix the priming of funnyString, and fix the implementation of
ToUTF16Column.
Change-Id: I2618878d85bba26f52f99a3fc136ad21fe198dfc
Reviewed-on: https://go-review.googlesource.com/c/tools/+/174357
Reviewed-by: Ian Cottrell <iancottrell@google.com>
Run-TryBot: Ian Cottrell <iancottrell@google.com>
diff --git a/internal/span/utf16.go b/internal/span/utf16.go
index 97c06c9..5f90a51 100644
--- a/internal/span/utf16.go
+++ b/internal/span/utf16.go
@@ -24,14 +24,14 @@
if !p.HasOffset() {
return -1, fmt.Errorf("ToUTF16Column: point is missing offset")
}
- offset := p.Offset()
- col := p.Column()
- if col == 1 {
- // column 1, so it must be chr 1
+ offset := p.Offset() // 0-based
+ colZero := p.Column() - 1 // 0-based
+ if colZero == 0 {
+ // 0-based column 0, so it must be chr 1
return 1, nil
}
// work out the offset at the start of the line using the column
- lineOffset := offset - (col - 1)
+ lineOffset := offset - colZero
if lineOffset < 0 || offset > len(content) {
return -1, fmt.Errorf("ToUTF16Column: offsets %v-%v outside file contents (%v)", lineOffset, offset, len(content))
}
@@ -40,10 +40,10 @@
start := content[lineOffset:]
// Now, truncate down to the supplied column.
- start = start[:col]
+ start = start[:colZero]
// and count the number of utf16 characters
// in theory we could do this by hand more efficiently...
- return len(utf16.Encode([]rune(string(start)))), nil
+ return len(utf16.Encode([]rune(string(start)))) + 1, nil
}
// FromUTF16Column advances the point by the utf16 character offset given the
diff --git a/internal/span/utf16_test.go b/internal/span/utf16_test.go
index 32e9568..2762772 100644
--- a/internal/span/utf16_test.go
+++ b/internal/span/utf16_test.go
@@ -12,9 +12,7 @@
)
// The funny character below is 4 bytes long in UTF-8; two UTF-16 code points
-var funnyString = []byte(`
-š23
-š45`[1:])
+var funnyString = []byte("š23\nš45")
var toUTF16Tests = []struct {
scenario string