sweet: avoid a temporary file by streaming file contents in "put"
Currently `sweet put` is very inefficient: it completely compresses and
archives assets into a temporary file, then uploads it, then hashes it.
There's no reason this can't all be done while streaming from the start,
and it saves a ton of temporary disk space (currently, assets are around
4.7 GiB in size).
While we're here, this change also removes the -public flag from the put
subcommand. The GCS bucket's access granularity is bucket-wide so it
doesn't even make any sense.
Change-Id: I78b6720407b0b922c349a9569bf3d1d0df88047c
Reviewed-on: https://go-review.googlesource.com/c/benchmarks/+/382654
Trust: Michael Knyszek <mknyszek@google.com>
Run-TryBot: Michael Knyszek <mknyszek@google.com>
TryBot-Result: Gopher Robot <gobot@golang.org>
Reviewed-by: Michael Pratt <mpratt@google.com>
diff --git a/sweet/cli/bootstrap/gcs.go b/sweet/cli/bootstrap/gcs.go
index b107655..35fd72a 100644
--- a/sweet/cli/bootstrap/gcs.go
+++ b/sweet/cli/bootstrap/gcs.go
@@ -7,7 +7,6 @@
import (
"context"
"fmt"
- "io"
"cloud.google.com/go/storage"
@@ -43,47 +42,32 @@
return fmt.Errorf("unrecognized authentication option: %s", input)
}
-func UploadArchive(r io.Reader, bucket, version string, auth AuthOption, force, public bool) error {
+func NewStorageWriter(bucket, version string, auth AuthOption, force bool) (*storage.Writer, error) {
ctx := context.Background()
var opts []option.ClientOption
switch auth {
case AuthAppDefault:
creds, err := google.FindDefaultCredentials(ctx, storage.ScopeReadWrite)
if err != nil {
- return err
+ return nil, err
}
opts = append(opts, option.WithCredentials(creds))
case AuthNone:
- return fmt.Errorf("authentication required for upload")
+ return nil, fmt.Errorf("authentication required for upload")
default:
- return fmt.Errorf("unknown authentication method")
+ return nil, fmt.Errorf("unknown authentication method")
}
client, err := storage.NewClient(ctx, opts...)
if err != nil {
- return err
+ return nil, err
}
o := client.Bucket(bucket).Object(VersionArchiveName(version))
if _, err := o.Attrs(ctx); err != nil && err != storage.ErrObjectNotExist {
- return fmt.Errorf("checking if object exists: %v", err)
+ return nil, fmt.Errorf("checking if object exists: %v", err)
} else if err == nil && !force {
- return fmt.Errorf("assets object already exists for version %s", version)
+ return nil, fmt.Errorf("assets object already exists for version %s", version)
}
-
- // Write the archive to GCS.
- wc := o.NewWriter(ctx)
- if _, err = io.Copy(wc, r); err != nil {
- return err
- }
- if err := wc.Close(); err != nil {
- return err
- }
-
- if public {
- // Make the archive public.
- acl := o.ACL()
- return acl.Set(ctx, storage.AllUsers, storage.RoleReader)
- }
- return nil
+ return o.NewWriter(ctx), nil
}
func NewStorageReader(bucket, version string, auth AuthOption) (*storage.Reader, error) {
diff --git a/sweet/cli/bootstrap/hash.go b/sweet/cli/bootstrap/hash.go
index 6677d88..cf82707 100644
--- a/sweet/cli/bootstrap/hash.go
+++ b/sweet/cli/bootstrap/hash.go
@@ -9,7 +9,6 @@
"encoding/json"
"fmt"
"hash"
- "io"
"os"
)
@@ -57,11 +56,3 @@
func Hash() hash.Hash {
return sha256.New()
}
-
-func HashStream(r io.Reader) (string, error) {
- hash := Hash()
- if _, err := io.Copy(hash, r); err != nil {
- return "", err
- }
- return CanonicalizeHash(hash), nil
-}
diff --git a/sweet/cmd/sweet/put.go b/sweet/cmd/sweet/put.go
index 5b75910..bce707f 100644
--- a/sweet/cmd/sweet/put.go
+++ b/sweet/cmd/sweet/put.go
@@ -10,7 +10,6 @@
"flag"
"fmt"
"io"
- "io/ioutil"
"os"
"path/filepath"
@@ -29,7 +28,6 @@
type putCmd struct {
auth bootstrap.AuthOption
force bool
- public bool
cache string
bucket string
assetsDir string
@@ -49,7 +47,6 @@
c.auth = bootstrap.AuthAppDefault
f.Var(&c.auth, "auth", fmt.Sprintf("authentication method (options: %s)", authOpts(false)))
f.BoolVar(&c.force, "force", false, "force upload even if assets for this version exist")
- f.BoolVar(&c.public, "public", false, "make the new assets archive public")
f.StringVar(&c.version, "version", common.Version, "the version to upload assets for")
f.StringVar(&c.bucket, "bucket", "go-sweet-assets", "GCS bucket to upload assets to")
f.StringVar(&c.assetsDir, "assets-dir", "./assets", "assets directory to tar, compress, and upload")
@@ -62,28 +59,28 @@
if err := bootstrap.ValidateVersion(c.version); err != nil {
return err
}
- tf, err := ioutil.TempFile("", "go-sweet-assets")
+
+ log.Printf("Archiving, compressing, and uploading: %s", c.assetsDir)
+
+ // Create storage writer for streaming.
+ wc, err := bootstrap.NewStorageWriter(c.bucket, c.version, c.auth, c.force)
if err != nil {
return err
}
- defer tf.Close()
+ defer wc.Close()
- log.Printf("Archiving and compressing: %s", c.assetsDir)
- if err := createAssetsArchive(tf, c.assetsDir, c.version); err != nil {
+ // Pass everything we write through a hash.
+ hash := bootstrap.Hash()
+ w := io.MultiWriter(wc, hash)
+
+ // Write the archive.
+ if err := createAssetsArchive(w, c.assetsDir, c.version); err != nil {
return err
}
- if _, err := tf.Seek(0, 0); err != nil {
- return err
- }
- log.Printf("Uploading archive to %s", c.bucket)
- if err := bootstrap.UploadArchive(tf, c.bucket, c.version, c.auth, c.force, c.public); err != nil {
- return err
- }
- if _, err := tf.Seek(0, 0); err != nil {
- return err
- }
+
+ // Update hash file.
log.Printf("Updating hash file...")
- return hashAssetsArchive(tf, c.assetsHashFile, c.version, c.force)
+ return updateAssetsHash(bootstrap.CanonicalizeHash(hash), c.assetsHashFile, c.version, c.force)
}
func createAssetsArchive(w io.Writer, assetsDir, version string) error {
@@ -134,15 +131,11 @@
})
}
-func hashAssetsArchive(tf io.Reader, hashfile, version string, force bool) error {
+func updateAssetsHash(hash, hashfile, version string, force bool) error {
vals, err := bootstrap.ReadHashesFile(hashfile)
if err != nil {
return err
}
- hash, err := bootstrap.HashStream(tf)
- if err != nil {
- return err
- }
if ok := vals.Put(version, hash, force); !ok {
return fmt.Errorf("hash for this version already exists")
}