internal/gcimporter: discard position info for unneeded lines This change implements an optimization in the size of the shallow export data: instead of saving the entire line start offset table of each file, it saves a sparse representation of each file capable of restoring accurate position information for the entirety of each line that contains an encoded token.Pos, but no others. This reduces the total size of export data for the standard library to 665KB, down 31% from 957KB, and only 18% more than the original baseline of 564KB. The shallow_test asserts (for a couple of specific objects) that the end positions may be derived from the start positions either before or after converting token.Pos to token.Position. Change-Id: I5c2a1648398d0bee979dc36ba6224072ea183bec Reviewed-on: https://go-review.googlesource.com/c/tools/+/462096 gopls-CI: kokoro <noreply+kokoro@google.com> Run-TryBot: Alan Donovan <adonovan@google.com> TryBot-Result: Gopher Robot <gobot@golang.org> Reviewed-by: Robert Findley <rfindley@google.com>
diff --git a/internal/gcimporter/iimport.go b/internal/gcimporter/iimport.go index df381d3..448f903 100644 --- a/internal/gcimporter/iimport.go +++ b/internal/gcimporter/iimport.go
@@ -373,26 +373,51 @@ file := p.fileCache[index] if file == nil { off := p.fileOffset[index] - rd := intReader{bytes.NewReader(p.fileData[off:]), p.ipath} - filename := p.stringAt(rd.uint64()) - size := int(rd.uint64()) - file = p.fake.fset.AddFile(filename, -1, size) - - if n := int(rd.uint64()); n > 0 { - lines := make([]int, n) // initial element always implicitly zero - for i := 1; i < n; i++ { - lines[i] = lines[i-1] + int(rd.uint64()) - } - if !file.SetLines(lines) { - errorf("SetLines failed") // can't happen - } - } - + file = p.decodeFile(intReader{bytes.NewReader(p.fileData[off:]), p.ipath}) p.fileCache[index] = file } return file } +func (p *iimporter) decodeFile(rd intReader) *token.File { + filename := p.stringAt(rd.uint64()) + size := int(rd.uint64()) + file := p.fake.fset.AddFile(filename, -1, size) + + // SetLines requires a nondecreasing sequence. + // Because it is common for clients to derive the interval + // [start, start+len(name)] from a start position, and we + // want to ensure that the end offset is on the same line, + // we fill in the gaps of the sparse encoding with values + // that strictly increase by the largest possible amount. + // This allows us to avoid having to record the actual end + // offset of each needed line. + + lines := make([]int, int(rd.uint64())) + var index, offset int + for i, n := 0, int(rd.uint64()); i < n; i++ { + index += int(rd.uint64()) + offset += int(rd.uint64()) + lines[index] = offset + + // Ensure monotonicity between points. + for j := index - 1; j > 0 && lines[j] == 0; j-- { + lines[j] = lines[j+1] - 1 + } + } + + // Ensure monotonicity after last point. + for j := len(lines) - 1; j > 0 && lines[j] == 0; j-- { + size-- + lines[j] = size + } + + if !file.SetLines(lines) { + errorf("SetLines failed: %d", lines) // can't happen + } + return file +} + func (p *iimporter) pkgAt(off uint64) *types.Package { if pkg, ok := p.pkgCache[off]; ok { return pkg