blob: 6106b3de45cfc333c87c6a727c41bde38273f338 [file] [log] [blame]
// Copyright 2024 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package docs
import (
"iter"
"golang.org/x/oscar/internal/storage/timed"
)
// Source is a data source to pull into a [Corpus].
type Source[T Entry] interface {
// DocWatcher returns the watcher to use to keep track
// of last [Sync] for this data source.
DocWatcher() *timed.Watcher[T]
// ToDocs converts the data to an iterator of [*Doc] values
// that can be stored in a [Corpus].
// It returns (nil, false) if the data should not be stored
// in the [Corpus].
ToDocs(T) (iter.Seq[*Doc], bool)
}
// Entry is a timed entry in a [Source].
type Entry interface {
// LastWritten returns the DBTime this piece of data was last written
// to its data source.
LastWritten() timed.DBTime
}
// Sync reads new embeddable values from src and adds the
// documents to the corpus dc.
//
// Sync uses [Source.DocWatcher] to save its position across multiple calls.
//
// Sync logs status and unexpected problems to lg.
func Sync[T Entry, S Source[T]](dc *Corpus, src S) {
w := src.DocWatcher()
for e := range w.Recent() {
ds, ok := src.ToDocs(e)
if !ok {
// Not embeddable, skip.
continue
}
dc.slog.Debug("docs.Sync", "event", e, "dbtime", e.LastWritten())
for d := range ds {
dc.Add(d.ID, d.Title, d.Text)
}
w.MarkOld(e.LastWritten())
}
}
// Restart causes the next call to [Sync] to behave as if
// it has never sync'ed any data before for the src.
// The result is that all data will be reconverted to doc form
// and re-added.
// Docs that have not changed since the last addition to the corpus
// will appear unmodified; others will be marked new in the corpus.
func Restart[T Entry](src Source[T]) {
src.DocWatcher().Restart()
}
// Latest returns the latest known DBTime marked old by the source's DocWatcher.
func Latest[T Entry](src Source[T]) timed.DBTime {
return src.DocWatcher().Latest()
}
// Latest returns a function that returns the latest known DBTime marked
// old by the source's DocWatcher.
func LatestFunc[T Entry](src Source[T]) func() timed.DBTime {
return func() timed.DBTime { return Latest[T](src) }
}