internal/span: fix another off-by-one in ToUTF16Column The current tests contain a bug in the priming of funnyString; the subslicing leaves the resulting content slice with a capacity greater than its length. This allowed a bug ToUTF16Column to sneak through where we were not using 0-based column as the offset within the line. Fix the priming of funnyString, and fix the implementation of ToUTF16Column. Change-Id: I2618878d85bba26f52f99a3fc136ad21fe198dfc Reviewed-on: https://go-review.googlesource.com/c/tools/+/174357 Reviewed-by: Ian Cottrell <iancottrell@google.com> Run-TryBot: Ian Cottrell <iancottrell@google.com>

commit: 7af746645d5165109de0b5cb499980c22812dfc2 [log] [tgz]
author: Paul Jolly <paul@myitcv.io> Mon Apr 29 18:36:30 2019 +0100
committer: Paul Jolly <paul@myitcv.org.uk> Mon Apr 29 18:16:56 2019 +0000
tree: 016c5b6304a247840d7bfc8daa0471ed58d2b0cd
parent: c6e1543aba625eb8e00a24270b07e67eb90c4108 [diff]
diff --git a/internal/span/utf16.go b/internal/span/utf16.go
index 97c06c9..5f90a51 100644
--- a/internal/span/utf16.go
+++ b/internal/span/utf16.go

@@ -24,14 +24,14 @@
 	if !p.HasOffset() {
 		return -1, fmt.Errorf("ToUTF16Column: point is missing offset")
 	}
-	offset := p.Offset()
-	col := p.Column()
-	if col == 1 {
-		// column 1, so it must be chr 1
+	offset := p.Offset()      // 0-based
+	colZero := p.Column() - 1 // 0-based
+	if colZero == 0 {
+		// 0-based column 0, so it must be chr 1
 		return 1, nil
 	}
 	// work out the offset at the start of the line using the column
-	lineOffset := offset - (col - 1)
+	lineOffset := offset - colZero
 	if lineOffset < 0 || offset > len(content) {
 		return -1, fmt.Errorf("ToUTF16Column: offsets %v-%v outside file contents (%v)", lineOffset, offset, len(content))
 	}
@@ -40,10 +40,10 @@
 	start := content[lineOffset:]
 
 	// Now, truncate down to the supplied column.
-	start = start[:col]
+	start = start[:colZero]
 	// and count the number of utf16 characters
 	// in theory we could do this by hand more efficiently...
-	return len(utf16.Encode([]rune(string(start)))), nil
+	return len(utf16.Encode([]rune(string(start)))) + 1, nil
 }
 
 // FromUTF16Column advances the point by the utf16 character offset given the

diff --git a/internal/span/utf16_test.go b/internal/span/utf16_test.go
index 32e9568..2762772 100644
--- a/internal/span/utf16_test.go
+++ b/internal/span/utf16_test.go

@@ -12,9 +12,7 @@
 )
 
 // The funny character below is 4 bytes long in UTF-8; two UTF-16 code points
-var funnyString = []byte(`
-𐐀23
-𐐀45`[1:])
+var funnyString = []byte("𐐀23\n𐐀45")
 
 var toUTF16Tests = []struct {
 	scenario    string
commit	7af746645d5165109de0b5cb499980c22812dfc2	[log] [tgz]
author	Paul Jolly <paul@myitcv.io>	Mon Apr 29 18:36:30 2019 +0100
committer	Paul Jolly <paul@myitcv.org.uk>	Mon Apr 29 18:16:56 2019 +0000
tree	016c5b6304a247840d7bfc8daa0471ed58d2b0cd
parent	c6e1543aba625eb8e00a24270b07e67eb90c4108 [diff]