blob: 779562a7f2efa880f8829a2801ee83ae3ae2e3da [file] [log] [blame] [edit]
// Copyright 2024 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package llmapp
import (
"context"
"encoding/json"
"errors"
"fmt"
"testing"
"golang.org/x/oscar/internal/llm"
"golang.org/x/oscar/internal/storage"
)
// RelatedAnalysis is the output of [AnalyzeRelated].
type RelatedAnalysis struct {
Result
// The LLM's response, unmarshaled into a Go struct.
Output Related
}
// Related represents the desired JSON structure of the LLM output
// requested by [AnalyzeRelated].
// See [relatedSchema] for a description of the fields.
//
// IMPORTANT: If you add, remove or edit the types or JSON names of
// fields in this struct, edit [relatedSchema] and
// [relatedTestOutput] accordingly.
type Related struct {
Summary string `json:"original_summary"`
Related []RelatedDoc `json:"related"`
}
// RelatedDoc represents the desired JSON structure of the
// LLM output for a single related document.
type RelatedDoc struct {
Title string `json:"title"`
URL string `json:"url"`
Summary string `json:"summary"`
Relationship string `json:"relationship"`
Relevance string `json:"relevance"`
RelevanceReason string `json:"relevance_reason"`
}
// The [*llm.Schema] corresponding to the [Related] type.
//
// IMPORTANT: If you add, remove, or edit the names or types of objects
// in this schema, edit [Related] and [relatedTestOutput] accordingly.
var relatedSchema = &llm.Schema{
Type: llm.TypeObject,
Properties: map[string]*llm.Schema{
"original_summary": {
Type: llm.TypeString,
},
"related": {
Type: llm.TypeArray,
Items: &llm.Schema{
Type: llm.TypeObject,
Properties: map[string]*llm.Schema{
"title": {
Type: llm.TypeString,
Description: "The title of the document.",
},
"url": {
Type: llm.TypeString,
Description: "The URL of the document.",
},
"summary": {
Type: llm.TypeString,
Description: "Summarize the document in one sentence.",
},
"relationship": {
Type: llm.TypeString,
Description: "Explain how the document is related to the original document.",
},
"relevance": {
Type: llm.TypeString,
Enum: []string{
"HIGH",
"MEDIUM",
"LOW",
"NONE",
},
Description: "How relevant is the document?",
},
"relevance_reason": {
Type: llm.TypeString,
Description: "Explain reasoning for relevance score.",
},
},
Required: []string{"title", "url", "summary", "relationship", "relevance", "relevance_reason"},
},
},
},
Required: []string{"original_summary", "related"},
}
// AnalyzeRelated returns a structured, LLM-generated analysis of the given
// document and related documents.
// AnalyzeRelated returns an error if no initial document is provided, no related docs are
// provided, or the LLM is unable to generate a response.
// TODO(tatianabradley): Return an error if the LLM generates JSON with missing or unexpected elements.
func (c *Client) AnalyzeRelated(ctx context.Context, doc *Doc, related []*Doc) (*RelatedAnalysis, error) {
if doc == nil {
return nil, errors.New("llmapp AnalyzeRelated: no doc")
}
if len(related) == 0 {
return nil, errors.New("llmapp AnalyzeRelated: no related docs")
}
result, err := c.overview(ctx, docAndRelated,
&docGroup{label: "original", docs: []*Doc{doc}},
&docGroup{label: "related", docs: related},
)
if err != nil {
return nil, fmt.Errorf("llmapp AnalyzeRelated: cannot generate response: %w", err)
}
var typed Related
if err := json.Unmarshal([]byte(result.Response), &typed); err != nil {
return nil, fmt.Errorf("llmapp AnalyzeRelated: cannot unmarshal response: %w\nresponse: %s", err, result.Response)
}
if len(typed.Related) != len(related) {
return nil, fmt.Errorf("llmapp AnalyzeRelated: malformed LLM output (unexpected number of related docs: want %d, got %d)", len(related), len(typed.Related))
}
return &RelatedAnalysis{Result: *result, Output: typed}, nil
}
// RelatedTestGenerator returns an [llm.ContentGenerator] that can be used
// in tests of the [Client.AnalyzeRelated] method.
// numRelated is the number of related documents that will be provided to the
// [Client.AnalyzeRelated] call.
//
// For testing.
func RelatedTestGenerator(t *testing.T, numRelated int) llm.ContentGenerator {
t.Helper()
raw, _ := relatedTestOutput(t, numRelated)
return llm.TestContentGenerator(
"related-test-generator",
func(context.Context, *llm.Schema, []llm.Part) (string, error) {
return raw, nil
},
)
}
// relatedTestOutput returns a JSON string (and its corresponding [Related] struct) that
// would be considered valid if output by the LLM call in [AnalyzeRelated].
// numRelated is the number of related documents that will be provided to the
// [AnalyzeRelated] call.
//
// For testing.
func relatedTestOutput(t *testing.T, numRelated int) (raw string, typed Related) {
t.Helper()
var rd = make([]RelatedDoc, numRelated)
for i := range numRelated {
rd[i] = RelatedDoc{
Title: "title",
URL: "URL",
Summary: "related summary",
Relationship: "related relationship",
Relevance: "related relevance",
RelevanceReason: "related relevance reason",
}
}
r := Related{
Summary: "original summary",
Related: rd,
}
return string(storage.JSON(r)), r
}