blob: 44d6ed1ec83e50c609b9378ce8306443eb78a81c [file] [log] [blame]
// Copyright 2024 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Package discussiondocs implements converting GitHub discussions into text docs
// for [golang.org/x/oscar/internal/docs].
package discussiondocs
import (
"context"
"log/slog"
"golang.org/x/oscar/internal/discussion"
"golang.org/x/oscar/internal/docs"
"golang.org/x/oscar/internal/storage/timed"
)
// Sync writes to dc docs corresponding to each discussion in gh that is
// new since the last call to Sync.
//
// If a discussion is edited on GitHub, it will appear new in gh and
// the new text will be written to dc, replacing the old issue text.
// Only the discussion body is saved as a document.
//
// The document ID for each discussion is its GitHub URL: "https://github.com/<org>/<repo>/discussions/<n>".
func Sync(ctx context.Context, lg *slog.Logger, dc *docs.Corpus, gh *discussion.Client) error {
w := gh.EventWatcher(watcherID)
for e := range w.Recent() {
if e.API != discussion.DiscussionAPI {
continue
}
lg.Debug("discussiondocs sync", "discussion", e.Discussion, "dbtime", e.DBTime)
d := e.Typed.(*discussion.Discussion)
title := cleanTitle(d.Title)
text := cleanBody(d.Body)
dc.Add(d.URL, title, text)
w.MarkOld(e.DBTime)
}
return nil
}
const watcherID = "discussiondocs"
// Restart causes the next call to [Sync] to behave as if
// it has never sync'ed any issues before.
// The result is that all issues will be reconverted to doc form
// and re-added.
// Docs that have not changed since the last addition to the corpus
// will appear unmodified; others will be marked new in the corpus.
func Restart(lg *slog.Logger, gh *discussion.Client) {
gh.EventWatcher(watcherID).Restart()
}
// Latest returns the latest known DBTime marked old by the client's Watcher.
func Latest(gh *discussion.Client) timed.DBTime {
return gh.EventWatcher(watcherID).Latest()
}
// cleanTitle should clean the title for indexing.
// For now we assume the LLM is good enough at Markdown not to bother.
func cleanTitle(title string) string {
// TODO
return title
}
// cleanBody should clean the body for indexing.
// For now we assume the LLM is good enough at Markdown not to bother.
// In the future we may want to make various changes like inlining
// the programs associated with playground URLs,
// and we may also want to remove any HTML tags from the Markdown.
func cleanBody(body string) string {
// TODO
return body
}