blob: 4c7d90a4c91dca64ce241cbce967ab87ce62c69e [file] [log] [blame]
// Copyright 2024 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package search
import (
"context"
"fmt"
"golang.org/x/oscar/internal/docs"
"golang.org/x/oscar/internal/llmapp"
"golang.org/x/oscar/internal/storage"
)
// OverviewResult is the result of [Overview].
type OverviewResult struct {
*llmapp.OverviewResult // the LLM-generated overview
}
// Overview returns an LLM-generated overview of a document and its related documents.
// id is the ID of the main document, which must be present in both the docs corpus and the vector db.
// Overview finds related documents using vector search (see [Vector]) with fixed options.
func Overview(ctx context.Context, lc *llmapp.Client, vdb storage.VectorDB, dc *docs.Corpus, id string) (*OverviewResult, error) {
doc, ok := llmDoc(dc, "main", id)
if !ok {
return nil, fmt.Errorf("search.Overview: main doc %q not in docs corpus", id)
}
rs, err := searchRelated(vdb, dc, id)
if err != nil {
return nil, err
}
var related []*llmapp.Doc
for _, r := range rs {
d, ok := llmDoc(dc, "related", r.ID)
if !ok {
return nil, fmt.Errorf("search.Overview: related doc %s not in docs corpus", id)
}
related = append(related, d)
}
overview, err := lc.RelatedOverview(ctx, doc, related)
if err != nil {
return nil, err
}
return &OverviewResult{overview}, nil
}
var maxResults = 5
// searchRelated finds up to 5 documents related to the document
// identified by id in vdb.
func searchRelated(vdb storage.VectorDB, dc *docs.Corpus, id string) ([]Result, error) {
v, ok := vdb.Get(id)
if !ok {
return nil, fmt.Errorf("search: main doc %q not in vector db", id)
}
rs := Vector(vdb, dc, &VectorRequest{
Options: Options{
Limit: maxResults + 1, // buffer for self
},
Vector: v,
})
// Remove the query itself if present.
if len(rs) > 0 && rs[0].ID == id {
rs = rs[1:]
}
// Trim length.
if len(rs) > maxResults {
rs = rs[:maxResults]
}
return rs, nil
}
// llmDoc converts the document in dc identified by id into
// an [*llmapp.Doc].
func llmDoc(dc *docs.Corpus, t string, id string) (*llmapp.Doc, bool) {
d, ok := dc.Get(id)
if !ok {
return nil, false
}
doc := &llmapp.Doc{
Type: t,
Title: d.Title,
Text: d.Text,
}
if isURL(d.ID) {
doc.URL = d.ID
}
return doc, true
}