blob: 6ef3007e4147c7e1c46b22211af80943a1d69d56 [file] [log] [blame]
// Copyright 2024 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Package githubdocs implements converting GitHub issues into text docs
// for [golang.org/x/oscar/internal/docs].
package githubdocs
import (
"context"
"fmt"
"log/slog"
"golang.org/x/oscar/internal/docs"
"golang.org/x/oscar/internal/github"
"golang.org/x/oscar/internal/storage/timed"
)
// Sync writes to dc docs corresponding to each issue in gh that is
// new since the last call to Sync.
//
// If an issue is edited on GitHub, it will appear new in gh and
// the new text will be written to dc, replacing the old issue text.
// Only the issue body (what looks like the top comment in the UI)
// is saved as a document.
// The document ID for each issue is its GitHub URL: "https://github.com/<org>/<repo>/issues/<n>".
func Sync(ctx context.Context, lg *slog.Logger, dc *docs.Corpus, gh *github.Client) {
w := gh.EventWatcher("githubdocs")
for e := range w.Recent() {
if e.API != "/issues" {
continue
}
lg.Debug("githubdocs sync", "issue", e.Issue, "dbtime", e.DBTime)
issue := e.Typed.(*github.Issue)
title := cleanTitle(issue.Title)
text := cleanBody(issue.Body)
dc.Add(fmt.Sprintf("https://github.com/%s/issues/%d", e.Project, e.Issue), title, text)
w.MarkOld(e.DBTime)
}
}
// Restart causes the next call to Sync to behave as if
// it has never sync'ed any issues before.
// The result is that all issues will be reconverted to doc form
// and re-added.
// Docs that have not changed since the last addition to the corpus
// will appear unmodified; others will be marked new in the corpus.
func Restart(lg *slog.Logger, gh *github.Client) {
gh.EventWatcher("githubdocs").Restart()
}
// Latest returns the latest known DBTime marked old by the client's Watcher.
func Latest(gh *github.Client) timed.DBTime {
return gh.EventWatcher("githubdocs").Latest()
}
// cleanTitle should clean the title for indexing.
// For now we assume the LLM is good enough at Markdown not to bother.
func cleanTitle(title string) string {
// TODO
return title
}
// cleanBody should clean the body for indexing.
// For now we assume the LLM is good enough at Markdown not to bother.
// In the future we may want to make various changes like inlining
// the programs associated with playground URLs,
// and we may also want to remove any HTML tags from the Markdown.
func cleanBody(body string) string {
// TODO
return body
}