sweet: avoid a temporary file by streaming GCS file contents in "get"
Currently `sweet get` is very inefficient: it completely downloads the
compressed assets into a temporary file, then it hashes it, then it
extracts it. There's no reason this can't all be done while streaming
from the start, and it saves a ton of temporary disk space (currently,
assets are around 4 GiB in size).
Change-Id: Id47423e0ddeac5f7084a5ca02038f3061ea726dd
Reviewed-on: https://go-review.googlesource.com/c/benchmarks/+/382096
Trust: Michael Knyszek <mknyszek@google.com>
Run-TryBot: Michael Knyszek <mknyszek@google.com>
TryBot-Result: Gopher Robot <gobot@golang.org>
Reviewed-by: Michael Pratt <mpratt@google.com>
diff --git a/sweet/cli/bootstrap/gcs.go b/sweet/cli/bootstrap/gcs.go
index 9eab0bb..b107655 100644
--- a/sweet/cli/bootstrap/gcs.go
+++ b/sweet/cli/bootstrap/gcs.go
@@ -86,31 +86,24 @@
return nil
}
-func DownloadArchive(w io.Writer, bucket, version string, auth AuthOption) error {
+func NewStorageReader(bucket, version string, auth AuthOption) (*storage.Reader, error) {
ctx := context.Background()
opts := []option.ClientOption{option.WithScopes(storage.ScopeReadOnly)}
switch auth {
case AuthAppDefault:
creds, err := google.FindDefaultCredentials(ctx, storage.ScopeReadOnly)
if err != nil {
- return err
+ return nil, err
}
opts = append(opts, option.WithCredentials(creds))
case AuthNone:
opts = append(opts, option.WithoutAuthentication())
default:
- return fmt.Errorf("unknown authentication method")
+ return nil, fmt.Errorf("unknown authentication method")
}
client, err := storage.NewClient(ctx, opts...)
if err != nil {
- return err
+ return nil, err
}
- rc, err := client.Bucket(bucket).Object(VersionArchiveName(version)).NewReader(ctx)
- if err != nil {
- return err
- }
- if _, err = io.Copy(w, rc); err != nil {
- return err
- }
- return rc.Close()
+ return client.Bucket(bucket).Object(VersionArchiveName(version)).NewReader(ctx)
}
diff --git a/sweet/cli/bootstrap/hash.go b/sweet/cli/bootstrap/hash.go
index 980b1d3..6677d88 100644
--- a/sweet/cli/bootstrap/hash.go
+++ b/sweet/cli/bootstrap/hash.go
@@ -50,14 +50,18 @@
return json.NewEncoder(f).Encode(&h)
}
-func canonicalizeHash(h hash.Hash) string {
+func CanonicalizeHash(h hash.Hash) string {
return fmt.Sprintf("%x", h.Sum(nil))
}
+func Hash() hash.Hash {
+ return sha256.New()
+}
+
func HashStream(r io.Reader) (string, error) {
- hash := sha256.New()
+ hash := Hash()
if _, err := io.Copy(hash, r); err != nil {
return "", err
}
- return canonicalizeHash(hash), nil
+ return CanonicalizeHash(hash), nil
}
diff --git a/sweet/cmd/sweet/get.go b/sweet/cmd/sweet/get.go
index 2ab645a..bc605cd 100644
--- a/sweet/cmd/sweet/get.go
+++ b/sweet/cmd/sweet/get.go
@@ -10,7 +10,6 @@
"flag"
"fmt"
"io"
- "io/ioutil"
"os"
"path/filepath"
"strings"
@@ -115,39 +114,33 @@
}
func downloadAndExtract(todir, bucket, hashfile, version string, auth bootstrap.AuthOption, readonly bool) error {
- tf, err := ioutil.TempFile("", "go-sweet-assets")
+ log.Printf("Downloading assets archive for version %s to %s", version, todir)
+
+ // Create storage reader for streaming.
+ rc, err := bootstrap.NewStorageReader(bucket, version, auth)
if err != nil {
return err
}
- defer tf.Close()
- log.Printf("Downloading assets archive for version %s", version)
- if err := bootstrap.DownloadArchive(tf, bucket, version, auth); err != nil {
- tf.Close()
+ defer rc.Close()
+
+ // Pass everything we read through a hash.
+ hash := bootstrap.Hash()
+ r := io.TeeReader(rc, hash)
+
+ // Stream and extract the results.
+ if err := extractAssets(r, todir, readonly); err != nil {
return err
}
- if _, err := tf.Seek(0, 0); err != nil {
- return err
- }
- log.Printf("Verifying archive checksum...")
- if err := checkAssetsHash(tf, hashfile, version); err != nil {
- return err
- }
- if _, err := tf.Seek(0, 0); err != nil {
- return err
- }
- log.Printf("Installing assets to %s", todir)
- return extractAssets(tf, todir, readonly)
+
+ // Check the hash.
+ return checkAssetsHash(bootstrap.CanonicalizeHash(hash), hashfile, version)
}
-func checkAssetsHash(tf io.Reader, hashfile, version string) error {
+func checkAssetsHash(hash, hashfile, version string) error {
vals, err := bootstrap.ReadHashesFile(hashfile)
if err != nil {
return err
}
- hash, err := bootstrap.HashStream(tf)
- if err != nil {
- return err
- }
check, ok := vals.Get(version)
if !ok {
return fmt.Errorf("hash for version %s not found", version)
@@ -158,11 +151,11 @@
return nil
}
-func extractAssets(tf io.Reader, outdir string, readonly bool) error {
+func extractAssets(r io.Reader, outdir string, readonly bool) error {
if err := os.MkdirAll(outdir, os.ModePerm); err != nil {
return fmt.Errorf("create assets directory: %v", err)
}
- gr, err := gzip.NewReader(tf)
+ gr, err := gzip.NewReader(r)
if err != nil {
return err
}