ogle/program/server: do less I/O when printing maps
The buckets are stored in contiguous blocks of memory preceded by a header.
Introduce a cache so the entire structure can be read once in the typical
case, with printValueAt using the cache to print values in the bucket.
If the structure is a map of maps, the code will still work but
only the innermost map will exploit the cache well.
That's a rare case anyway, and no worse than the current situation.

LGTM=nigeltao
R=nigeltao
https://golang.org/cl/114050043
diff --git a/program/server/print.go b/program/server/print.go
index 7a11262..8f4235f 100644
--- a/program/server/print.go
+++ b/program/server/print.go
@@ -27,6 +27,11 @@
 	printBuf bytes.Buffer     // Accumulates the output.
 	tmp      []byte           // Temporary used for I/O.
 	visited  map[uintptr]bool // Prevents looping on cyclic data.
+	// The cache stores a local copy of part of the address space.
+	// Saves I/O overhead when scanning map buckets by letting
+	// printValueAt use the contents of already-read bucket data.
+	cache    []byte  // Already-read data.
+	cacheOff uintptr // Starting address of cache.
 }
 
 // printf prints to printBuf, unless there has been an error.
@@ -56,11 +61,37 @@
 }
 
 // peek reads len bytes at offset, leaving p.tmp with the data and sized appropriately.
-func (p *Printer) peek(offset uintptr, len int64) bool {
-	p.tmp = p.tmp[:len]
+// It uses the cache if the request is within it.
+func (p *Printer) peek(offset uintptr, length int64) bool {
+	p.tmp = p.tmp[:length]
+	if p.cacheOff <= offset && offset+uintptr(length) <= p.cacheOff+uintptr(len(p.cache)) {
+		copy(p.tmp, p.cache[offset-p.cacheOff:])
+		return true
+	}
 	return p.ok(p.peeker.peek(offset, p.tmp))
 }
 
+// setCache initializes the cache to contain the contents of the
+// address space at the specified offset.
+func (p *Printer) setCache(offset, length uintptr) bool {
+	if uintptr(cap(p.cache)) >= length {
+		p.cache = p.cache[:length]
+	} else {
+		p.cache = make([]byte, length)
+	}
+	p.cacheOff = offset
+	ok := p.ok(p.peeker.peek(offset, p.cache))
+	if !ok {
+		p.resetCache()
+	}
+	return ok
+}
+
+func (p *Printer) resetCache() {
+	p.cache = p.cache[0:0]
+	p.cacheOff = 0
+}
+
 // Peeker is like a read that probes the remote address space.
 type Peeker interface {
 	peek(offset uintptr, buf []byte) error
@@ -83,6 +114,7 @@
 func (p *Printer) reset() {
 	p.err = nil
 	p.printBuf.Reset()
+	p.resetCache()
 	// Just wipe the map rather than reallocating. It's almost always tiny.
 	for k := range p.visited {
 		delete(p.visited, k)
@@ -290,7 +322,6 @@
 	keySize    uintptr
 	elemSize   uintptr
 	bucketSize uintptr
-	buf        []byte
 }
 
 func (p *Printer) printMapAt(typ *dwarf.MapType, addr uintptr) {
@@ -305,7 +336,7 @@
 	if !p.peek(addr, structType.ByteSize) {
 		return
 	}
-	// From runtime/hashmap.*; We need to walk the map data structure.
+	// From runtime/hashmap.go; We need to walk the map data structure.
 	// Load the struct, then iterate over the buckets.
 	// uintgo count (occupancy).
 	offset := int(structType.Field[0].ByteOffset)
@@ -333,16 +364,15 @@
 		keySize:    keySize,
 		elemSize:   elemSize,
 		bucketSize: bucketSize,
-		buf:        make([]byte, bucketSize),
 	}
 	p.printMapBucketsAt(desc, bucketPtr)
 	p.printMapBucketsAt(desc, oldBucketPtr)
 	p.printf("}")
 }
 
-// Map bucket layout from runtime/hashmap.*
+// Map bucket layout from runtime/hashmap.go
 const (
-	bucketCnt  = 8 // BUCKETSIZE (poor name) in C code.
+	bucketCnt  = 8
 	minTopHash = 4
 )
 
@@ -351,19 +381,23 @@
 		return
 	}
 	for i := 0; desc.count > 0 && i < desc.numBuckets; i++ {
-		// Load this bucket struct into memory and initialize "pointers" to the key and value slices.
 		// After the header, the bucket struct has an array of keys followed by an array of elements.
-		if !p.ok(p.peeker.peek(addr, desc.buf)) {
+		// Load this bucket struct into p's cache and initialize "pointers" to the key and value slices.
+		if !p.setCache(addr, desc.bucketSize) {
 			return
 		}
-		// TODO: We have loaded the data but printValueAt loads from remote addresses
-		// so we need to keep track of addresses and read memory twice. It would
-		// be nice to avoid this overhead.
 		keyAddr := addr + bucketCnt + uintptr(p.arch.PointerSize)
 		elemAddr := keyAddr + bucketCnt*desc.keySize
-		addr += desc.bucketSize // Advance to next bucket; buf, keyAddr and elemAddr are all we need now.
+		addr += desc.bucketSize // Advance to next bucket; keyAddr and elemAddr are all we need now.
 		// tophash uint8 [bucketCnt] tells us which buckets are occupied.
-		tophash := desc.buf[:bucketCnt]
+		// p.cache has the data but calls to printValueAt below may overwrite the
+		// cache, so grab a copy of the relevant data.
+		var tophash [bucketCnt]byte
+		if copy(tophash[:], p.cache) != bucketCnt {
+			p.errorf("bad count copying hash flags")
+			return
+		}
+		overflow := uintptr(p.arch.Uintptr(p.cache[bucketCnt : bucketCnt+p.arch.PointerSize]))
 		for j := 0; desc.count > 0 && j < bucketCnt; j++ {
 			if tophash[j] >= minTopHash {
 				p.printValueAt(desc.typ.KeyType, keyAddr)
@@ -378,7 +412,6 @@
 			elemAddr += desc.elemSize
 		}
 		// pointer to overflow bucket, if any.
-		overflow := uintptr(p.arch.Uintptr(desc.buf[bucketCnt : bucketCnt+p.arch.PointerSize]))
 		p.printMapBucketsAt(desc, overflow)
 	}
 }