go/buildutil, cmd/guru: extract overlay context into buildutil

This extracts the overlay context and archive parsing from guru into
buildutil.

At least one tool (gogetdoc) has a vendored copy of this code already,
and more tools implementing the same functionality will follow.

The new code in buildutil is an almost identical copy of the code in
guru (names aside), except for the following changes:

- Instead of reading into a bytes.Buffer, we read directly into a []byte
  of appropriate size

- sameFile first attempts a simple comparison of path.Clean'ed paths.

Change-Id: I97cd978ccc10722e3648e5e10625fa7f1407f202
Reviewed-on: https://go-review.googlesource.com/21805
Reviewed-by: Alan Donovan <adonovan@google.com>
diff --git a/cmd/guru/main.go b/cmd/guru/main.go
index b704ef5..28ea2b3 100644
--- a/cmd/guru/main.go
+++ b/cmd/guru/main.go
@@ -13,18 +13,14 @@
 
 import (
 	"bufio"
-	"bytes"
 	"flag"
 	"fmt"
 	"go/build"
 	"go/token"
 	"io"
-	"io/ioutil"
 	"log"
 	"os"
-	"path/filepath"
 	"runtime/pprof"
-	"strconv"
 	"strings"
 	"sync"
 
@@ -168,7 +164,7 @@
 	// read them from the standard input and
 	// overlay them on the build context.
 	if *modifiedFlag {
-		modified, err := parseArchive(os.Stdin)
+		modified, err := buildutil.ParseOverlayArchive(os.Stdin)
 		if err != nil {
 			log.Fatal(err)
 		}
@@ -178,7 +174,7 @@
 		// but the loader's cgo preprocessing currently does not.
 
 		if len(modified) > 0 {
-			ctxt = useModifiedFiles(ctxt, modified)
+			ctxt = buildutil.OverlayContext(ctxt, modified)
 		}
 	}
 
@@ -212,68 +208,3 @@
 		log.Fatal(err)
 	}
 }
-
-func parseArchive(archive io.Reader) (map[string][]byte, error) {
-	modified := make(map[string][]byte)
-	r := bufio.NewReader(archive)
-	for {
-		// Read file name.
-		filename, err := r.ReadString('\n')
-		if err != nil {
-			if err == io.EOF {
-				break // OK
-			}
-			return nil, fmt.Errorf("reading modified file name: %v", err)
-		}
-		filename = filepath.Clean(strings.TrimSpace(filename))
-
-		// Read file size.
-		sz, err := r.ReadString('\n')
-		if err != nil {
-			return nil, fmt.Errorf("reading size of modified file %s: %v", filename, err)
-		}
-		sz = strings.TrimSpace(sz)
-		size, err := strconv.ParseInt(sz, 10, 32)
-		if err != nil {
-			return nil, fmt.Errorf("parsing size of modified file %s: %v", filename, err)
-		}
-
-		// Read file content.
-		var content bytes.Buffer
-		content.Grow(int(size))
-		if _, err := io.CopyN(&content, r, size); err != nil {
-			return nil, fmt.Errorf("reading modified file %s: %v", filename, err)
-		}
-		modified[filename] = content.Bytes()
-	}
-
-	return modified, nil
-}
-
-// useModifiedFiles augments the provided build.Context by the
-// mapping from file names to alternative contents.
-func useModifiedFiles(orig *build.Context, modified map[string][]byte) *build.Context {
-	rc := func(data []byte) (io.ReadCloser, error) {
-		return ioutil.NopCloser(bytes.NewBuffer(data)), nil
-	}
-
-	copy := *orig // make a copy
-	ctxt := &copy
-	ctxt.OpenFile = func(path string) (io.ReadCloser, error) {
-		// Fast path: names match exactly.
-		if content, ok := modified[path]; ok {
-			return rc(content)
-		}
-
-		// Slow path: check for same file under a different
-		// alias, perhaps due to a symbolic link.
-		for filename, content := range modified {
-			if sameFile(path, filename) {
-				return rc(content)
-			}
-		}
-
-		return buildutil.OpenFile(orig, path)
-	}
-	return ctxt
-}
diff --git a/go/buildutil/overlay.go b/go/buildutil/overlay.go
new file mode 100644
index 0000000..3c4911f
--- /dev/null
+++ b/go/buildutil/overlay.go
@@ -0,0 +1,103 @@
+// Copyright 2016 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package buildutil
+
+import (
+	"bufio"
+	"bytes"
+	"fmt"
+	"go/build"
+	"io"
+	"io/ioutil"
+	"path/filepath"
+	"strconv"
+	"strings"
+)
+
+// OverlayContext overlays a build.Context with additional files from
+// a map. Files in the map take precedence over other files.
+//
+// In addition to plain string comparison, two file names are
+// considered equal if their base names match and their directory
+// components point at the same directory on the file system. That is,
+// symbolic links are followed for directories, but not files.
+//
+// A common use case for OverlayContext is to allow editors to pass in
+// a set of unsaved, modified files.
+//
+// Currently, only the Context.OpenFile function will respect the
+// overlay. This may change in the future.
+func OverlayContext(orig *build.Context, overlay map[string][]byte) *build.Context {
+	// TODO(dominikh): Implement IsDir, HasSubdir and ReadDir
+
+	rc := func(data []byte) (io.ReadCloser, error) {
+		return ioutil.NopCloser(bytes.NewBuffer(data)), nil
+	}
+
+	copy := *orig // make a copy
+	ctxt := &copy
+	ctxt.OpenFile = func(path string) (io.ReadCloser, error) {
+		// Fast path: names match exactly.
+		if content, ok := overlay[path]; ok {
+			return rc(content)
+		}
+
+		// Slow path: check for same file under a different
+		// alias, perhaps due to a symbolic link.
+		for filename, content := range overlay {
+			if sameFile(path, filename) {
+				return rc(content)
+			}
+		}
+
+		return OpenFile(orig, path)
+	}
+	return ctxt
+}
+
+// ParseOverlayArchive parses an archive containing Go files and their
+// contents. The result is intended to be used with Overlay.
+//
+//
+// Archive format
+//
+// The archive consists of a series of files. Each file consists of a
+// name, a decimal file size and the file contents, separated by
+// newlinews. No newline follows after the file contents.
+func ParseOverlayArchive(archive io.Reader) (map[string][]byte, error) {
+	overlay := make(map[string][]byte)
+	r := bufio.NewReader(archive)
+	for {
+		// Read file name.
+		filename, err := r.ReadString('\n')
+		if err != nil {
+			if err == io.EOF {
+				break // OK
+			}
+			return nil, fmt.Errorf("reading archive file name: %v", err)
+		}
+		filename = filepath.Clean(strings.TrimSpace(filename))
+
+		// Read file size.
+		sz, err := r.ReadString('\n')
+		if err != nil {
+			return nil, fmt.Errorf("reading size of archive file %s: %v", filename, err)
+		}
+		sz = strings.TrimSpace(sz)
+		size, err := strconv.ParseUint(sz, 10, 32)
+		if err != nil {
+			return nil, fmt.Errorf("parsing size of archive file %s: %v", filename, err)
+		}
+
+		// Read file content.
+		content := make([]byte, size)
+		if _, err := io.ReadFull(r, content); err != nil {
+			return nil, fmt.Errorf("reading archive file %s: %v", filename, err)
+		}
+		overlay[filename] = content
+	}
+
+	return overlay, nil
+}
diff --git a/go/buildutil/overlay_test.go b/go/buildutil/overlay_test.go
new file mode 100644
index 0000000..92e2258
--- /dev/null
+++ b/go/buildutil/overlay_test.go
@@ -0,0 +1,70 @@
+package buildutil_test
+
+import (
+	"go/build"
+	"io/ioutil"
+	"reflect"
+	"strings"
+	"testing"
+
+	"golang.org/x/tools/go/buildutil"
+)
+
+func TestParseOverlayArchive(t *testing.T) {
+	var tt = []struct {
+		in     string
+		out    map[string][]byte
+		hasErr bool
+	}{
+		{
+			"a.go\n5\n12345",
+			map[string][]byte{"a.go": []byte("12345")},
+			false,
+		},
+		{
+			"a.go\n5\n1234",
+			nil,
+			true,
+		},
+		{
+			"a.go\n5\n12345b.go\n4\n1234",
+			map[string][]byte{"a.go": []byte("12345"), "b.go": []byte("1234")},
+			false,
+		},
+	}
+
+	for _, test := range tt {
+		got, err := buildutil.ParseOverlayArchive(strings.NewReader(test.in))
+		if err == nil && test.hasErr {
+			t.Errorf("expected error for %q", test.in)
+		}
+		if err != nil && !test.hasErr {
+			t.Errorf("unexpected error %v for %q", err, test.in)
+		}
+		if !reflect.DeepEqual(got, test.out) {
+			t.Errorf("got %#v, want %#v", got, test.out)
+		}
+	}
+}
+
+func TestOverlay(t *testing.T) {
+	ctx := &build.Default
+	ov := map[string][]byte{
+		"/somewhere/a.go": []byte("file contents"),
+	}
+	names := []string{"/somewhere/a.go", "/somewhere//a.go"}
+	ctx = buildutil.OverlayContext(ctx, ov)
+	for _, name := range names {
+		f, err := buildutil.OpenFile(ctx, name)
+		if err != nil {
+			t.Errorf("unexpected error %v", err)
+		}
+		b, err := ioutil.ReadAll(f)
+		if err != nil {
+			t.Errorf("unexpected error %v", err)
+		}
+		if got, expected := string(b), string(ov["/somewhere/a.go"]); got != expected {
+			t.Errorf("read %q, expected %q", got, expected)
+		}
+	}
+}
diff --git a/go/buildutil/util.go b/go/buildutil/util.go
index 0e093fc..eefe1f2 100644
--- a/go/buildutil/util.go
+++ b/go/buildutil/util.go
@@ -165,3 +165,20 @@
 	}
 	return filepath.SplitList(s)
 }
+
+// sameFile returns true if x and y have the same basename and denote
+// the same file.
+//
+func sameFile(x, y string) bool {
+	if path.Clean(x) == path.Clean(y) {
+		return true
+	}
+	if filepath.Base(x) == filepath.Base(y) { // (optimisation)
+		if xi, err := os.Stat(x); err == nil {
+			if yi, err := os.Stat(y); err == nil {
+				return os.SameFile(xi, yi)
+			}
+		}
+	}
+	return false
+}