internal/gcsfs: create

gcsfs is an io.FS implementation on top of GCS, with a bit of write
support added. I anticipate this being useful for writing relui tasks.

For testing purposes, also includes a copy of os.DirFS with write
support added.

For golang/go#51797.

Change-Id: I294aac481857126dad9071158b2329f6d9a3805a
Reviewed-on: https://go-review.googlesource.com/c/build/+/394360
Run-TryBot: Heschi Kreinick <heschi@google.com>
Reviewed-by: Alex Rakoczy <alex@golang.org>
Reviewed-by: Dmitri Shuralyov <dmitshur@golang.org>
TryBot-Result: Gopher Robot <gobot@golang.org>
Reviewed-by: Peter Weinberger <pjw@google.com>
diff --git a/internal/gcsfs/gcsfs.go b/internal/gcsfs/gcsfs.go
new file mode 100644
index 0000000..88234f9
--- /dev/null
+++ b/internal/gcsfs/gcsfs.go
@@ -0,0 +1,281 @@
+// Copyright 2022 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// gcsfs implements io/fs for GCS, adding writability.
+package gcsfs
+
+import (
+	"context"
+	"errors"
+	"io"
+	"io/fs"
+	"path"
+	"strings"
+	"time"
+
+	"cloud.google.com/go/storage"
+	"google.golang.org/api/iterator"
+)
+
+// Create creates a new file on fsys, which must be a CreateFS.
+func Create(fsys fs.FS, name string) (WriteFile, error) {
+	cfs, ok := fsys.(CreateFS)
+	if !ok {
+		return nil, &fs.PathError{Op: "create", Path: name, Err: errors.New("not implemented")}
+	}
+	return cfs.Create(name)
+}
+
+// CreateFS is an fs.FS that supports creating writable files.
+type CreateFS interface {
+	fs.FS
+	Create(string) (WriteFile, error)
+}
+
+// WriteFile is an fs.File that can be written to.
+// The behavior of writing and reading the same file is undefined.
+type WriteFile interface {
+	fs.File
+	io.Writer
+}
+
+// gcsFS implements fs.FS for GCS.
+type gcsFS struct {
+	ctx    context.Context
+	client *storage.Client
+	bucket *storage.BucketHandle
+	prefix string
+}
+
+var _ = fs.FS((*gcsFS)(nil))
+var _ = CreateFS((*gcsFS)(nil))
+
+// NewFS creates a new fs.FS that uses ctx for all of its operations.
+// Creating a new FS does not access the network, so they can be created
+// and destroyed per-context.
+//
+// Once the context has finished, all objects created by this FS should
+// be considered invalid. In particular, Writers and Readers will be canceled.
+func NewFS(ctx context.Context, client *storage.Client, bucket string) fs.FS {
+	return &gcsFS{
+		ctx:    ctx,
+		client: client,
+		bucket: client.Bucket(bucket),
+	}
+}
+
+func (fsys *gcsFS) object(name string) *storage.ObjectHandle {
+	return fsys.bucket.Object(path.Join(fsys.prefix, name))
+}
+
+// Open opens the named file.
+func (fsys *gcsFS) Open(name string) (fs.File, error) {
+	if !validPath(name) {
+		return nil, &fs.PathError{Op: "open", Path: name, Err: fs.ErrInvalid}
+	}
+	if name == "." {
+		name = ""
+	}
+	return &GCSFile{
+		fs:   fsys,
+		name: strings.TrimSuffix(name, "/"),
+	}, nil
+}
+
+// Create creates the named file.
+func (fsys *gcsFS) Create(name string) (WriteFile, error) {
+	f, err := fsys.Open(name)
+	if err != nil {
+		return nil, err
+	}
+	return f.(*GCSFile), nil
+}
+
+// fstest likes to send us backslashes. Treat them as invalid.
+func validPath(name string) bool {
+	return fs.ValidPath(name) && !strings.ContainsRune(name, '\\')
+}
+
+// GCSFile implements fs.File for GCS. It is also a WriteFile.
+type GCSFile struct {
+	fs   *gcsFS
+	name string
+
+	reader   io.ReadCloser
+	writer   io.WriteCloser
+	iterator *storage.ObjectIterator
+}
+
+var _ = fs.File((*GCSFile)(nil))
+var _ = fs.ReadDirFile((*GCSFile)(nil))
+var _ = io.WriteCloser((*GCSFile)(nil))
+
+func (f *GCSFile) Close() error {
+	if f.reader != nil {
+		defer f.reader.Close()
+	}
+	if f.writer != nil {
+		defer f.writer.Close()
+	}
+
+	if f.reader != nil {
+		err := f.reader.Close()
+		if err != nil {
+			return f.translateError("close", err)
+		}
+	}
+	if f.writer != nil {
+		err := f.writer.Close()
+		if err != nil {
+			return f.translateError("close", err)
+		}
+	}
+	return nil
+}
+
+func (f *GCSFile) Read(b []byte) (int, error) {
+	if f.reader == nil {
+		var err error
+		f.reader, err = f.fs.object(f.name).NewReader(f.fs.ctx)
+		if err != nil {
+			return 0, f.translateError("read", err)
+		}
+	}
+	n, err := f.reader.Read(b)
+	return n, f.translateError("read", err)
+}
+
+// Write writes to the GCS object associated with this File.
+//
+// A new object will be created unless an object with this name already exists.
+// Otherwise any previous object with the same name will be replaced.
+// The object will not be available (and any previous object will remain)
+// until Close has been called.
+func (f *GCSFile) Write(b []byte) (int, error) {
+	if f.writer == nil {
+		f.writer = f.fs.object(f.name).NewWriter(f.fs.ctx)
+	}
+	return f.writer.Write(b)
+}
+
+// ReadDir implements io/fs.ReadDirFile.
+func (f *GCSFile) ReadDir(n int) ([]fs.DirEntry, error) {
+	if f.iterator == nil {
+		f.iterator = f.fs.iterator(f.name)
+	}
+	var result []fs.DirEntry
+	var err error
+	for {
+		var info *storage.ObjectAttrs
+		info, err = f.iterator.Next()
+		if err != nil {
+			break
+		}
+		result = append(result, &gcsFileInfo{info})
+		if len(result) == n {
+			break
+		}
+	}
+	if err == iterator.Done {
+		if n <= 0 {
+			err = nil
+		} else {
+			err = io.EOF
+		}
+	}
+	return result, f.translateError("readdir", err)
+}
+
+// Stats the file.
+// The returned FileInfo exposes *storage.ObjectAttrs as its Sys() result.
+func (f *GCSFile) Stat() (fs.FileInfo, error) {
+	// Check for a real file.
+	attrs, err := f.fs.object(f.name).Attrs(f.fs.ctx)
+	if err != nil && err != storage.ErrObjectNotExist {
+		return nil, f.translateError("stat", err)
+	}
+	if err == nil {
+		return &gcsFileInfo{attrs: attrs}, nil
+	}
+	// Check for a "directory".
+	iter := f.fs.iterator(f.name)
+	if _, err := iter.Next(); err == nil {
+		return &gcsFileInfo{
+			attrs: &storage.ObjectAttrs{
+				Prefix: f.name + "/",
+			},
+		}, nil
+	}
+	return nil, f.translateError("stat", storage.ErrObjectNotExist)
+}
+
+func (f *GCSFile) translateError(op string, err error) error {
+	if err == nil || err == io.EOF {
+		return err
+	}
+	nested := err
+	if err == storage.ErrBucketNotExist || err == storage.ErrObjectNotExist {
+		nested = fs.ErrNotExist
+	} else if pe, ok := err.(*fs.PathError); ok {
+		nested = pe.Err
+	}
+	return &fs.PathError{Op: op, Path: strings.TrimPrefix(f.name, f.fs.prefix), Err: nested}
+}
+
+// gcsFileInfo implements fs.FileInfo and fs.DirEntry.
+type gcsFileInfo struct {
+	attrs *storage.ObjectAttrs
+}
+
+var _ = fs.FileInfo((*gcsFileInfo)(nil))
+var _ = fs.DirEntry((*gcsFileInfo)(nil))
+
+func (fi *gcsFileInfo) Name() string {
+	if fi.attrs.Prefix != "" {
+		return path.Base(fi.attrs.Prefix)
+	}
+	return path.Base(fi.attrs.Name)
+}
+
+func (fi *gcsFileInfo) Size() int64 {
+	return fi.attrs.Size
+}
+
+func (fi *gcsFileInfo) Mode() fs.FileMode {
+	if fi.IsDir() {
+		return fs.ModeDir | 0777
+	}
+	return 0666 // check fi.attrs.ACL?
+}
+
+func (fi *gcsFileInfo) ModTime() time.Time {
+	return fi.attrs.Updated
+}
+
+func (fi *gcsFileInfo) IsDir() bool {
+	return fi.attrs.Prefix != ""
+}
+
+func (fi *gcsFileInfo) Sys() interface{} {
+	return fi.attrs
+}
+
+func (fi *gcsFileInfo) Info() (fs.FileInfo, error) {
+	return fi, nil
+}
+
+func (fi *gcsFileInfo) Type() fs.FileMode {
+	return fi.Mode() & fs.ModeType
+}
+
+func (fsys *gcsFS) iterator(name string) *storage.ObjectIterator {
+	prefix := path.Join(fsys.prefix, name)
+	if prefix != "" {
+		prefix += "/"
+	}
+	return fsys.bucket.Objects(fsys.ctx, &storage.Query{
+		Delimiter: "/",
+		Prefix:    prefix,
+	})
+}
diff --git a/internal/gcsfs/gcsfs_test.go b/internal/gcsfs/gcsfs_test.go
new file mode 100644
index 0000000..d66e939
--- /dev/null
+++ b/internal/gcsfs/gcsfs_test.go
@@ -0,0 +1,77 @@
+// Copyright 2022 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package gcsfs
+
+import (
+	"context"
+	"flag"
+	"io/fs"
+	"io/ioutil"
+	"path/filepath"
+	"testing"
+	"testing/fstest"
+	"time"
+
+	"cloud.google.com/go/storage"
+	"google.golang.org/api/option"
+)
+
+var slowTest = flag.Bool("slow", false, "run slow tests that access GCS")
+
+func TestGCSFS(t *testing.T) {
+	if !*slowTest {
+		t.Skip("reads a largeish GCS bucket")
+	}
+	ctx, cancel := context.WithTimeout(context.Background(), 2*time.Minute)
+	defer cancel()
+	client, err := storage.NewClient(context.Background(), option.WithScopes(storage.ScopeReadOnly))
+	if err != nil {
+		t.Fatal(err)
+	}
+	fsys := NewFS(ctx, client, "vcs-test")
+	expected := []string{
+		"auth/or401.zip",
+		"bzr/hello.zip",
+	}
+	if err := fstest.TestFS(fsys, expected...); err != nil {
+		t.Error(err)
+	}
+
+	sub, err := fs.Sub(fsys, "auth")
+	if err != nil {
+		t.Fatal(err)
+	}
+	if err := fstest.TestFS(sub, "or401.zip"); err != nil {
+		t.Error(err)
+	}
+}
+
+func TestDirFS(t *testing.T) {
+	if err := fstest.TestFS(DirFS("./testdata/dirfs"), "a", "b", "dir/x"); err != nil {
+		t.Fatal(err)
+	}
+}
+
+func TestDirFSWrite(t *testing.T) {
+	temp := t.TempDir()
+	fsys := DirFS(temp)
+	f, err := Create(fsys, "fsystest.txt")
+	if err != nil {
+		t.Fatal(err)
+	}
+	if _, err := f.Write([]byte("hey\n")); err != nil {
+		t.Fatal(err)
+	}
+	if err := f.Close(); err != nil {
+		t.Fatal(err)
+	}
+	b, err := ioutil.ReadFile(filepath.Join(temp, "fsystest.txt"))
+	if err != nil {
+		t.Fatal(err)
+	}
+	if string(b) != "hey\n" {
+		t.Fatalf("unexpected file contents %q, want %q", string(b), "hey\n")
+	}
+}
diff --git a/internal/gcsfs/osfs.go b/internal/gcsfs/osfs.go
new file mode 100644
index 0000000..f9da94e
--- /dev/null
+++ b/internal/gcsfs/osfs.go
@@ -0,0 +1,65 @@
+package gcsfs
+
+import (
+	"io/fs"
+	"os"
+	"path"
+	"runtime"
+)
+
+var _ = fs.FS((*dirFS)(nil))
+var _ = CreateFS((*dirFS)(nil))
+
+func DirFS(dir string) fs.FS {
+	return dirFS(dir)
+}
+
+func containsAny(s, chars string) bool {
+	for i := 0; i < len(s); i++ {
+		for j := 0; j < len(chars); j++ {
+			if s[i] == chars[j] {
+				return true
+			}
+		}
+	}
+	return false
+}
+
+type dirFS string
+
+func (dir dirFS) Open(name string) (fs.File, error) {
+	if !fs.ValidPath(name) || runtime.GOOS == "windows" && containsAny(name, `\:`) {
+		return nil, &fs.PathError{Op: "open", Path: name, Err: fs.ErrInvalid}
+	}
+	f, err := os.Open(string(dir) + "/" + name)
+	if err != nil {
+		return nil, err // nil fs.File
+	}
+	return f, nil
+}
+
+func (dir dirFS) Stat(name string) (fs.FileInfo, error) {
+	if !fs.ValidPath(name) || runtime.GOOS == "windows" && containsAny(name, `\:`) {
+		return nil, &fs.PathError{Op: "stat", Path: name, Err: fs.ErrInvalid}
+	}
+	f, err := os.Stat(string(dir) + "/" + name)
+	if err != nil {
+		return nil, err
+	}
+	return f, nil
+}
+
+func (dir dirFS) Create(name string) (WriteFile, error) {
+	if !fs.ValidPath(name) || runtime.GOOS == "windows" && containsAny(name, `\:`) {
+		return nil, &fs.PathError{Op: "open", Path: name, Err: fs.ErrInvalid}
+	}
+	f, err := os.Create(string(dir) + "/" + name)
+	if err != nil {
+		return nil, err
+	}
+	return f, nil
+}
+
+func (dir dirFS) Sub(subDir string) (fs.FS, error) {
+	return dirFS(path.Join(string(dir), subDir)), nil
+}
diff --git a/internal/gcsfs/testdata/dirfs/a b/internal/gcsfs/testdata/dirfs/a
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/internal/gcsfs/testdata/dirfs/a
diff --git a/internal/gcsfs/testdata/dirfs/b b/internal/gcsfs/testdata/dirfs/b
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/internal/gcsfs/testdata/dirfs/b
diff --git a/internal/gcsfs/testdata/dirfs/dir/x b/internal/gcsfs/testdata/dirfs/dir/x
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/internal/gcsfs/testdata/dirfs/dir/x