internal/gcimporter: discard position info for unneeded lines

This change implements an optimization in the size of the
shallow export data: instead of saving the entire line
start offset table of each file, it saves a sparse representation
of each file capable of restoring accurate position information
for the entirety of each line that contains an encoded token.Pos,
but no others.

This reduces the total size of export data for the standard library
to 665KB, down 31% from 957KB, and only 18% more than the original
baseline of 564KB.

The shallow_test asserts (for a couple of specific objects) that
the end positions may be derived from the start positions either
before or after converting token.Pos to token.Position.

Change-Id: I5c2a1648398d0bee979dc36ba6224072ea183bec
Reviewed-on: https://go-review.googlesource.com/c/tools/+/462096
gopls-CI: kokoro <noreply+kokoro@google.com>
Run-TryBot: Alan Donovan <adonovan@google.com>
TryBot-Result: Gopher Robot <gobot@golang.org>
Reviewed-by: Robert Findley <rfindley@google.com>
diff --git a/internal/gcimporter/iimport.go b/internal/gcimporter/iimport.go
index df381d3..448f903 100644
--- a/internal/gcimporter/iimport.go
+++ b/internal/gcimporter/iimport.go
@@ -373,26 +373,51 @@
 	file := p.fileCache[index]
 	if file == nil {
 		off := p.fileOffset[index]
-		rd := intReader{bytes.NewReader(p.fileData[off:]), p.ipath}
-		filename := p.stringAt(rd.uint64())
-		size := int(rd.uint64())
-		file = p.fake.fset.AddFile(filename, -1, size)
-
-		if n := int(rd.uint64()); n > 0 {
-			lines := make([]int, n) // initial element always implicitly zero
-			for i := 1; i < n; i++ {
-				lines[i] = lines[i-1] + int(rd.uint64())
-			}
-			if !file.SetLines(lines) {
-				errorf("SetLines failed") // can't happen
-			}
-		}
-
+		file = p.decodeFile(intReader{bytes.NewReader(p.fileData[off:]), p.ipath})
 		p.fileCache[index] = file
 	}
 	return file
 }
 
+func (p *iimporter) decodeFile(rd intReader) *token.File {
+	filename := p.stringAt(rd.uint64())
+	size := int(rd.uint64())
+	file := p.fake.fset.AddFile(filename, -1, size)
+
+	// SetLines requires a nondecreasing sequence.
+	// Because it is common for clients to derive the interval
+	// [start, start+len(name)] from a start position, and we
+	// want to ensure that the end offset is on the same line,
+	// we fill in the gaps of the sparse encoding with values
+	// that strictly increase by the largest possible amount.
+	// This allows us to avoid having to record the actual end
+	// offset of each needed line.
+
+	lines := make([]int, int(rd.uint64()))
+	var index, offset int
+	for i, n := 0, int(rd.uint64()); i < n; i++ {
+		index += int(rd.uint64())
+		offset += int(rd.uint64())
+		lines[index] = offset
+
+		// Ensure monotonicity between points.
+		for j := index - 1; j > 0 && lines[j] == 0; j-- {
+			lines[j] = lines[j+1] - 1
+		}
+	}
+
+	// Ensure monotonicity after last point.
+	for j := len(lines) - 1; j > 0 && lines[j] == 0; j-- {
+		size--
+		lines[j] = size
+	}
+
+	if !file.SetLines(lines) {
+		errorf("SetLines failed: %d", lines) // can't happen
+	}
+	return file
+}
+
 func (p *iimporter) pkgAt(off uint64) *types.Package {
 	if pkg, ok := p.pkgCache[off]; ok {
 		return pkg