sweet: avoid a temporary file by streaming GCS file contents in "get"

Currently `sweet get` is very inefficient: it completely downloads the
compressed assets into a temporary file, then it hashes it, then it
extracts it. There's no reason this can't all be done while streaming
from the start, and it saves a ton of temporary disk space (currently,
assets are around 4 GiB in size).

Change-Id: Id47423e0ddeac5f7084a5ca02038f3061ea726dd
Reviewed-on: https://go-review.googlesource.com/c/benchmarks/+/382096
Trust: Michael Knyszek <mknyszek@google.com>
Run-TryBot: Michael Knyszek <mknyszek@google.com>
TryBot-Result: Gopher Robot <gobot@golang.org>
Reviewed-by: Michael Pratt <mpratt@google.com>
diff --git a/sweet/cli/bootstrap/gcs.go b/sweet/cli/bootstrap/gcs.go
index 9eab0bb..b107655 100644
--- a/sweet/cli/bootstrap/gcs.go
+++ b/sweet/cli/bootstrap/gcs.go
@@ -86,31 +86,24 @@
 	return nil
 }
 
-func DownloadArchive(w io.Writer, bucket, version string, auth AuthOption) error {
+func NewStorageReader(bucket, version string, auth AuthOption) (*storage.Reader, error) {
 	ctx := context.Background()
 	opts := []option.ClientOption{option.WithScopes(storage.ScopeReadOnly)}
 	switch auth {
 	case AuthAppDefault:
 		creds, err := google.FindDefaultCredentials(ctx, storage.ScopeReadOnly)
 		if err != nil {
-			return err
+			return nil, err
 		}
 		opts = append(opts, option.WithCredentials(creds))
 	case AuthNone:
 		opts = append(opts, option.WithoutAuthentication())
 	default:
-		return fmt.Errorf("unknown authentication method")
+		return nil, fmt.Errorf("unknown authentication method")
 	}
 	client, err := storage.NewClient(ctx, opts...)
 	if err != nil {
-		return err
+		return nil, err
 	}
-	rc, err := client.Bucket(bucket).Object(VersionArchiveName(version)).NewReader(ctx)
-	if err != nil {
-		return err
-	}
-	if _, err = io.Copy(w, rc); err != nil {
-		return err
-	}
-	return rc.Close()
+	return client.Bucket(bucket).Object(VersionArchiveName(version)).NewReader(ctx)
 }
diff --git a/sweet/cli/bootstrap/hash.go b/sweet/cli/bootstrap/hash.go
index 980b1d3..6677d88 100644
--- a/sweet/cli/bootstrap/hash.go
+++ b/sweet/cli/bootstrap/hash.go
@@ -50,14 +50,18 @@
 	return json.NewEncoder(f).Encode(&h)
 }
 
-func canonicalizeHash(h hash.Hash) string {
+func CanonicalizeHash(h hash.Hash) string {
 	return fmt.Sprintf("%x", h.Sum(nil))
 }
 
+func Hash() hash.Hash {
+	return sha256.New()
+}
+
 func HashStream(r io.Reader) (string, error) {
-	hash := sha256.New()
+	hash := Hash()
 	if _, err := io.Copy(hash, r); err != nil {
 		return "", err
 	}
-	return canonicalizeHash(hash), nil
+	return CanonicalizeHash(hash), nil
 }
diff --git a/sweet/cmd/sweet/get.go b/sweet/cmd/sweet/get.go
index 2ab645a..bc605cd 100644
--- a/sweet/cmd/sweet/get.go
+++ b/sweet/cmd/sweet/get.go
@@ -10,7 +10,6 @@
 	"flag"
 	"fmt"
 	"io"
-	"io/ioutil"
 	"os"
 	"path/filepath"
 	"strings"
@@ -115,39 +114,33 @@
 }
 
 func downloadAndExtract(todir, bucket, hashfile, version string, auth bootstrap.AuthOption, readonly bool) error {
-	tf, err := ioutil.TempFile("", "go-sweet-assets")
+	log.Printf("Downloading assets archive for version %s to %s", version, todir)
+
+	// Create storage reader for streaming.
+	rc, err := bootstrap.NewStorageReader(bucket, version, auth)
 	if err != nil {
 		return err
 	}
-	defer tf.Close()
-	log.Printf("Downloading assets archive for version %s", version)
-	if err := bootstrap.DownloadArchive(tf, bucket, version, auth); err != nil {
-		tf.Close()
+	defer rc.Close()
+
+	// Pass everything we read through a hash.
+	hash := bootstrap.Hash()
+	r := io.TeeReader(rc, hash)
+
+	// Stream and extract the results.
+	if err := extractAssets(r, todir, readonly); err != nil {
 		return err
 	}
-	if _, err := tf.Seek(0, 0); err != nil {
-		return err
-	}
-	log.Printf("Verifying archive checksum...")
-	if err := checkAssetsHash(tf, hashfile, version); err != nil {
-		return err
-	}
-	if _, err := tf.Seek(0, 0); err != nil {
-		return err
-	}
-	log.Printf("Installing assets to %s", todir)
-	return extractAssets(tf, todir, readonly)
+
+	// Check the hash.
+	return checkAssetsHash(bootstrap.CanonicalizeHash(hash), hashfile, version)
 }
 
-func checkAssetsHash(tf io.Reader, hashfile, version string) error {
+func checkAssetsHash(hash, hashfile, version string) error {
 	vals, err := bootstrap.ReadHashesFile(hashfile)
 	if err != nil {
 		return err
 	}
-	hash, err := bootstrap.HashStream(tf)
-	if err != nil {
-		return err
-	}
 	check, ok := vals.Get(version)
 	if !ok {
 		return fmt.Errorf("hash for version %s not found", version)
@@ -158,11 +151,11 @@
 	return nil
 }
 
-func extractAssets(tf io.Reader, outdir string, readonly bool) error {
+func extractAssets(r io.Reader, outdir string, readonly bool) error {
 	if err := os.MkdirAll(outdir, os.ModePerm); err != nil {
 		return fmt.Errorf("create assets directory: %v", err)
 	}
-	gr, err := gzip.NewReader(tf)
+	gr, err := gzip.NewReader(r)
 	if err != nil {
 		return err
 	}