blob: f4e99858baf9a268c69a73b9b0d404d4862e1298 [file] [log] [blame]
// Copyright 2024 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package firestore
import (
"context"
"fmt"
"testing"
"cloud.google.com/go/firestore"
"golang.org/x/oscar/internal/gcp/grpcerrors"
"golang.org/x/oscar/internal/llm"
"golang.org/x/oscar/internal/storage"
"golang.org/x/oscar/internal/testutil"
"google.golang.org/api/iterator"
)
func TestVectorDB(t *testing.T) {
rr, project := openRR(t, "testdata/vectordb.grpcrr")
ctx := context.Background()
if rr.Recording() {
deleteVectorDBs(t, project, firestoreTestDatabase)
}
storage.TestVectorDB(t, func() storage.VectorDB {
vdb, err := NewVectorDB(ctx, testutil.Slogger(t), project, firestoreTestDatabase, "test", rr.ClientOptions()...)
if err != nil {
t.Fatal(err)
}
return vdb
})
}
// TestDBLimit checks that [VectorDB.All] properly restarts from query limits (see docLimit).
func TestVectorDBLimit(t *testing.T) {
// Re-record with
// OSCAR_PROJECT=oscar-go-1 go test -v -run=TestVectorDBLimit -grpcrecord=/vectordblimit
rr, project := openRR(t, "testdata/vectordblimit.grpcrr")
ctx := context.Background()
if rr.Recording() {
deleteVectorDBs(t, project, firestoreTestDatabase)
}
vdb, err := NewVectorDB(ctx, testutil.Slogger(t), project, firestoreTestDatabase, "test", rr.ClientOptions()...)
if err != nil {
t.Fatal(err)
}
defer vdb.Close()
vdb.fs.docQueryLimit = 5
N := (2 * vdb.fs.docQueryLimit) + 1 // ensure we make a few iterations
b := vdb.Batch()
limitKey := func(n int) string {
return fmt.Sprintf("limit.%09d", n)
}
const vecSize = 16 // matches storage.TestVectorDB and test database index config
for i := range N {
b.Set(limitKey(i), make(llm.Vector, vecSize))
b.MaybeApply()
}
b.Apply()
n := 0
for k := range vdb.All() {
if string(k) != limitKey(n) {
t.Fatalf("limit read #%d: have %q want %q", n, k, limitKey(n))
}
n++
}
if n != N {
t.Errorf("limit reads: scanned %d keys, want %d", n, N)
}
}
// deleteVectorDBs deletes all the vectors and their related collections from the
// given Firestore DB, specified as a project and database name.
func deleteVectorDBs(t *testing.T, project, database string) {
// Delete all documents in all collections named "vectors".
// Although these all live under the "vectorDBs" collection, it isn't possible
// to delete that collection. In Firestore, only documents can be deleted,
// and they can only be iterated over from their immediate parent collection.
// The CollectionGroup call selects all collections named "vectors", regardless
// of their parents. (There is a way to recursively walk the hierarchy, but using
// a collection group is simpler.)
ctx := context.Background()
client, err := firestore.NewClientWithDatabase(ctx, project, database)
if err != nil {
t.Fatal(err)
}
defer client.Close()
bw := client.BulkWriter(ctx)
// Iterate over all "vectors" collections, regardless of namespace.
// These collections are the immediate parents of the vector documents,
// which are what we want to delete.
Restart:
iter := client.CollectionGroup("vectors").Documents(ctx)
last := ""
for {
ds, err := iter.Next()
if err == iterator.Done {
break
}
if grpcerrors.IsUnavailable(err) && last != "" {
t.Logf("vector delete timeout; restarting; last=%s", decodeKey(last))
goto Restart
}
if err != nil {
t.Fatal(err)
}
bw.Delete(ds.Ref)
last = ds.Ref.ID
}
bw.Flush()
}