blob: 3fbd5bf51de75b04f6a3beaf8673b4ff01cdd833 [file] [log] [blame]
// Copyright 2024 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Package storage defines the storage abstractions needed for Oscar:
// [DB], a basic key-value store, and [VectorDB], a vector database.
// The storage needs are intentionally minimal (avoiding, for example,
// a requirement on SQL), to admit as many implementations as possible.
package storage
import (
"encoding/json"
"fmt"
"iter"
"log/slog"
"strconv"
"strings"
"rsc.io/ordered"
)
// A DB is a key-value database.
//
// DB operations are assumed not to fail.
// They panic, intending to take down the program,
// if there is an error accessing the database.
// The assumption is that the program cannot possibly
// continue without the database, since that's where all the state is stored.
// Similarly, clients of DB conventionally panic
// using [DB.Panic] if the database returns corrupted data.
// Code using multiple parallel database operations can recover
// at the outermost calls.
type DB interface {
// Lock acquires a lock on the given name, which need not exist in the database.
// After a successful Lock(name),
// any other call to Lock(name) from any other client of the database
// (including in another process, for shared databases)
// must block until Unlock(name) has been called.
// In a shared database, a lock may also unlock
// when the client disconnects or times out.
Lock(name string)
// Unlock releases the lock with the given name,
// which the caller must have locked.
Unlock(name string)
// Set sets the value associated with key to val.
// The key must not be of length zero.
Set(key, val []byte)
// Get looks up the value associated with key.
// If there is no entry for key in the database, Get returns nil, false.
// Otherwise it returns val, true.
Get(key []byte) (val []byte, ok bool)
// Scan returns an iterator over all key-value pairs with start ≤ key ≤ end.
// The second value in each iteration pair is a function returning the value,
// not the value itself:
//
// for key, getVal := range db.Scan([]byte("aaa"), []byte("zzz")) {
// val := getVal()
// fmt.Printf("%q: %q\n", key, val)
// }
//
// In iterations that only need the keys or only need the values for a subset of keys,
// some DB implementations may avoid work when the value function is not called.
//
// A Scan may or may not observe concurrent modifications made
// using Set, Delete, and DeleteRange.
Scan(start, end []byte) iter.Seq2[[]byte, func() []byte]
// Delete deletes any value associated with key.
// Delete of an unset key is a no-op.
Delete(key []byte)
// DeleteRange deletes all key-value pairs with start ≤ key ≤ end.
DeleteRange(start, end []byte)
// Batch returns a new [Batch] that accumulates database mutations
// to apply in an atomic operation. In addition to the atomicity, using a
// Batch for bulk operations is more efficient than making each
// change using repeated calls to DB's Set, Delete, and DeleteRange methods.
Batch() Batch
// Flush flushes DB changes to permanent storage.
// Flush must be called before the process crashes or exits,
// or else any changes since the previous Flush may be lost.
Flush()
// Close flushes and then closes the database.
// Like the other routines, it panics if an error happens,
// so there is no error result.
Close()
// Panic logs the error message and args using the database's slog.Logger
// and then panics with the text formatting of its arguments.
// It is meant to be called when database corruption or other
// database-related “can't happen” conditions have been detected.
Panic(msg string, args ...any)
}
// A Batch accumulates database mutations that are applied to a [DB]
// as a single atomic operation. Applying bulk operations in a batch
// is also more efficient than making individual [DB] method calls.
// The batched operations apply in the order they are made.
// For example, Set("a", "b") followed by Delete("a") is the same as
// Delete("a"), while Delete("a") followed by Set("a", "b") is the same
// as Set("a", "b").
type Batch interface {
// Delete deletes any value associated with key.
// Delete of an unset key is a no-op.
//
// Delete does not retain any reference to key after returning.
Delete(key []byte)
// DeleteRange deletes all key-value pairs with start ≤ key ≤ end.
//
// DeleteRange does not retain any reference to start or end after returning.
DeleteRange(start, end []byte)
// Set sets the value associated with key to val.
//
// Set does not retain any reference to key or val after returning.
Set(key, val []byte)
// MaybeApply calls Apply if the batch is getting close to full.
// Every Batch has a limit to how many operations can be batched,
// so in a bulk operation where atomicity of the entire batch is not a concern,
// calling MaybeApply gives the Batch implementation
// permission to flush the batch at specific “safe points”.
// A typical limit for a batch is about 100MB worth of logged operations.
// MaybeApply reports whether it called Apply.
MaybeApply() bool
// Apply applies all the batched operations to the underlying DB
// as a single atomic unit.
// When Apply returns, the Batch is an empty batch ready for
// more operations.
Apply()
}
// Panic panics with the text formatting of its arguments.
// It is meant to be called for database errors or corruption,
// which have been defined to be impossible.
// (See the [DB] documentation.)
//
// Panic is expected to be used by DB implementations.
// DB clients should use the [DB.Panic] method instead.
func Panic(msg string, args ...any) {
var b strings.Builder
slog.New(slog.NewTextHandler(&b, nil)).Error(msg, args...)
s := b.String()
if _, rest, ok := strings.Cut(s, " level=ERROR msg="); ok {
s = rest
}
panic(strings.TrimSpace(s))
}
// JSON converts x to JSON and returns the result.
// It panics if there is any error converting x to JSON.
// Since whether x can be converted to JSON depends
// almost entirely on its type, a marshaling error indicates a
// bug at the call site.
//
// (The exception is certain malformed UTF-8 and floating-point
// infinity and NaN. Code must be careful not to use JSON with those.)
func JSON(x any) []byte {
js, err := json.Marshal(x)
if err != nil {
panic(fmt.Sprintf("json.Marshal: %v", err))
}
return js
}
// Fmt formats data for printing,
// first trying [ordered.DecodeFmt] in case data is an [ordered encoding],
// then trying a backquoted string if possible
// (handling simple JSON data),
// and finally resorting to [strconv.QuoteToASCII].
func Fmt(data []byte) string {
if s, err := ordered.DecodeFmt(data); err == nil {
return s
}
s := string(data)
if strconv.CanBackquote(s) {
return "`" + s + "`"
}
return strconv.QuoteToASCII(s)
}