blob: 741d374cf4f00976ed98e90ad8f6d5a370c89d32 [file] [log] [blame]
// Copyright 2024 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package main
import (
"bytes"
"context"
"encoding/json"
"fmt"
"io"
"math"
"net/http"
"net/url"
"path"
"strings"
"github.com/google/safehtml/template"
"golang.org/x/oscar/internal/llm"
"golang.org/x/oscar/internal/storage"
)
type searchPage struct {
Query string
Results []searchResult
}
func (g *Gaby) handleSearch(w http.ResponseWriter, r *http.Request) {
data, err := g.doSearch(r)
if err != nil {
http.Error(w, err.Error(), http.StatusInternalServerError)
} else {
_, _ = w.Write(data)
}
}
// doSearch returns the contents of the vector search page.
func (g *Gaby) doSearch(r *http.Request) ([]byte, error) {
page := searchPage{
Query: r.FormValue("q"),
}
if page.Query != "" {
var err error
page.Results, err = g.search(r.Context(), &searchRequest{EmbedDoc: llm.EmbedDoc{Text: page.Query}})
if err != nil {
return nil, err
}
for i := range page.Results {
page.Results[i].round()
}
}
var buf bytes.Buffer
if err := searchPageTmpl.Execute(&buf, page); err != nil {
return nil, err
}
return buf.Bytes(), nil
}
type searchRequest struct {
Threshold float64 // lowest score to keep; default 0. Max is 1.
Limit int // max results (fewer if Threshold is set); 0 means use a fixed default
llm.EmbedDoc
}
type searchResult struct {
Kind string // kind of document: issue, doc page, etc.
Title string
storage.VectorResult
}
// Round rounds r.Score to three decimal places.
func (r *searchResult) round() {
r.Score = math.Round(r.Score*1e3) / 1e3
}
// Maximum number of search results to return by default.
const defaultLimit = 20
// search does a search for query over Gaby's vector database.
func (g *Gaby) search(ctx context.Context, sreq *searchRequest) ([]searchResult, error) {
vecs, err := g.embed.EmbedDocs(ctx, []llm.EmbedDoc{sreq.EmbedDoc})
if err != nil {
return nil, fmt.Errorf("EmbedDocs: %w", err)
}
vec := vecs[0]
limit := defaultLimit
if sreq.Limit > 0 {
limit = sreq.Limit
}
// Search uses normalized dot product, so higher numbers are better.
// Max is 1, min is 0.
threshold := 0.0
if sreq.Threshold > 0 {
threshold = sreq.Threshold
}
var srs []searchResult
for _, r := range g.vector.Search(vec, limit) {
if r.Score < threshold {
break
}
title := ""
if d, ok := g.docs.Get(r.ID); ok {
title = d.Title
}
srs = append(srs, searchResult{
Kind: docIDKind(r.ID),
Title: title,
VectorResult: r,
})
}
return srs, nil
}
// docIDKind determines the kind of document from its ID.
// It returns the empty string if it cannot do so.
func docIDKind(id string) string {
u, err := url.Parse(id)
if err != nil {
return ""
}
hp := path.Join(u.Host, u.Path)
switch {
case strings.HasPrefix(hp, "github.com/golang/go/issues/"):
return "GitHubIssue"
case strings.HasPrefix(hp, "go.dev/wiki/"):
return "GoWiki"
case strings.HasPrefix(hp, "go.dev/doc/"):
return "GoDocumentation"
case strings.HasPrefix(hp, "go.dev/ref/"):
return "GoReference"
case strings.HasPrefix(hp, "go.dev/blog/"):
return "GoBlog"
case strings.HasPrefix(hp, "go.dev/"):
return "GoDevPage"
default:
return ""
}
}
// This template assumes that if a result's Kind is non-empty, it is a URL,
// and vice versa.
var searchPageTmpl = template.Must(template.New("").Parse(`
<!doctype html>
<html>
<head>
<title>Oscar Search</title>
</head>
<body>
<h1>Gaby search</h1>
<p>Search Gaby's database of GitHub issues and Go documentation.</p>
<form id="form" action="/search" method="GET">
<input type="text" name="q" value="{{.Query}}" required autofocus />
<input type="submit" value="Search"/>
</form>
<div id="working"></div>
<script>
const form = document.getElementById("form");
form.addEventListener("submit", (event) => {
document.getElementById("working").innerHTML = "<p style='margin-top:1rem'>Working...</p>"
})
</script>
{{with .Results -}}
{{- range . -}}
<p>{{with .Title}}{{.}}: {{end -}}
{{if .Kind -}}
<a href="{{.ID}}">{{.ID}}</a>
{{else -}}
{{.ID -}}
{{end -}}
{{" "}}({{.Score}})</p>
{{end}}
{{- else -}}
{{if .Query}}No results.{{end}}
{{- end}}
</body>
</html>
`))
func (g *Gaby) handleSearchAPI(w http.ResponseWriter, r *http.Request) {
sreq, err := readJSONBody[searchRequest](r)
if err != nil {
// The error could also come from failing to read the body, but then the
// connection is probably broken so it doesn't matter what status we send.
http.Error(w, err.Error(), http.StatusBadRequest)
return
}
sres, err := g.search(r.Context(), sreq)
if err != nil {
http.Error(w, err.Error(), http.StatusInternalServerError)
return
}
data, err := json.Marshal(sres)
if err != nil {
http.Error(w, "json.Marshal: "+err.Error(), http.StatusInternalServerError)
return
}
_, _ = w.Write(data)
}
func readJSONBody[T any](r *http.Request) (*T, error) {
defer r.Body.Close()
data, err := io.ReadAll(r.Body)
if err != nil {
return nil, err
}
t := new(T)
if err := json.Unmarshal(data, t); err != nil {
return nil, err
}
return t, nil
}