blob: 2304d60bf3f0cdedf877b0a92ced50df44575496 [file] [log] [blame]
// Copyright 2024 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package overview
import (
"context"
"encoding/json"
"fmt"
"log/slog"
"strings"
"time"
"golang.org/x/oscar/internal/actions"
"golang.org/x/oscar/internal/github"
"golang.org/x/oscar/internal/github/wrap"
"golang.org/x/oscar/internal/storage"
"golang.org/x/oscar/internal/storage/timed"
"rsc.io/ordered"
)
// poster is the state needed to modify GitHub.
type poster struct {
slog *slog.Logger
gh *github.Client // the GitHub client to use to read/write to GitHub
db storage.DB // the database to use to store state
watcher *timed.Watcher[*github.Event] // a watcher over GitHub events, used to remember which events we have already processed
minComments int // the minimum number of (non-skipped) comments an issue must have to get an overview (default: [defaultMinComments])
maxIssueAge time.Duration // the maximum age (time since creation) of an issue to get an overview (default: [defaultMaxAge])
skipIssueAuthors map[string]bool // skip issues authored by these GitHub users (default: none)
skipCommentAuthors map[string]bool // skip comments authored by these GitHub users when determining whether an issue meets the threshold to get an overview (default: none)
name string
bot string // the login name of GitHub user that will post overviews, e.g. "gabyhelp"
projects map[string]bool // the GitHub projects this poster will post to (default: none)
w *wrap.Wrapper // used to wrap edits made to GitHub with tags. Allows the poster to identify its own edits
// For the action log.
requireApproval bool // whether to require approval for actions (default: true)
logAction actions.BeforeFunc
// if true, attempt to find actions by the bot that are missing from the action log (using tags)
findUnloggedActions bool
// in-memory map used to speed up issueState lookups for issues
// that have already been processed by a given call to [poster.run].
// map keys are [poster.issueStateKey]s.
// must only be modified inside [poster.run], while holding a lock
// on runKey.
runState map[string]*issueState
}
// newPoster returns a new Overviews poster.
func newPoster(lg *slog.Logger, db storage.DB, gh *github.Client, name, bot string) *poster {
p := &poster{
slog: lg,
name: name,
bot: bot,
gh: gh,
db: db,
watcher: gh.EventWatcher(actionKind + name + bot),
projects: make(map[string]bool),
minComments: defaultMinComments,
w: wrap.New(bot, name),
requireApproval: true,
maxIssueAge: defaultMaxAge,
}
p.logAction = actions.Register(actionKind, &actioner{p})
return p
}
// run logs post or update actions to the action log for all issue events that need one.
// getOverview is the function to generate an overview of a GitHub issue.
// now is the current time (used to determine if an issue is too old).
//
// The general strategy is as follows:
//
// For each new issue comment event in an enabled project:
// - If a higher numbered issue comment for the issue has already been processed, skip the event.
// - Otherwise, find the corresponding issue and all its comments.
// - Check if the issue needs an overview (see [poster.skip]). If not, mark the issue processed
// at its highest numbered comment and continue to the next event.
// - If so, log an action for the event (see [poster.logPostOrUpdate]) and mark the issue
// processed at its highest numbered comment.
//
// This strategy relies on consulting the database multiple times to find an issue
// and its comments. It assumes that the comments are not changing during its run.
// Therefore, this function must not be run in parallel with a GitHub sync.
//
// Note: Unlike some other functions in this repo, [poster.run] does not have
// a "dry run" mode which does not advance the watcher or create any database entries.
// This is because the algorithm depends on the presence of database entries to determine which
// issues have already been processed, so it is not clear how to create a dry run mode
// without introducing added complexity. The function can instead be tested without permanent
// side effects by using the memory overlay functionality of gaby.
//
// run is not intended to be used concurrently.
// a database lock (see [Client.runKey]) should be held to ensure calls to [poster.run]
// with the same poster (identified by name and bot) do not run simultaneously.
func (p *poster) run(ctx context.Context, getOverview overviewFunc, now time.Time) error {
p.slog.Info("run start", "kind", actionKind, "bot", p.bot, "latest", p.watcher.Latest())
defer func() {
p.slog.Info("run end", "kind", actionKind, "bot", p.bot, "latest", p.watcher.Latest())
}()
p.runState = make(map[string]*issueState)
defer func() {
p.runState = nil
}()
// filter reports whether the event with the given key should
// be processed.
// A filter on the key avoids the overhead of decoding events
// we know we don't care about.
filter := func(key []byte) bool {
var project, api string
var issue, id int64
if err := ordered.Decode(key, &project, &issue, &api, &id); err != nil {
p.db.Panic("github event decode", "key", storage.Fmt(key), "err", err)
}
if !p.projects[project] {
return false
}
if api != "/issues/comments" {
return false
}
if p.alreadyProcessedThisRun(project, issue, id) {
return false
}
return true
}
for e := range p.watcher.RecentFiltered(filter) {
p.maybeProcessIssueComment(ctx, e, getOverview, now)
}
return nil
}
// maybeProcessIssueComment determines whether the Issue corresponding to
// the given event (which must be an issue comment in an enabled project) should get a new
// or updated overview.
//
// Issues may be skipped if they have already been processed see [poster.alreadyProcessed],
// or due to their characteristics (see [poster.skip]).
// Non-skipped Issues get an overview (or update) via [poster.logPostOrUpdate].
//
// maybeProcessIssueComment must be run inside a watcher Recent* loop, as it
// marks processed events as old.
func (p *poster) maybeProcessIssueComment(ctx context.Context, e *github.Event,
getOverview overviewFunc, now time.Time) {
project, issue, id := e.Project, e.Issue, e.ID
p.slog.Info("process", "project", project, "issue", issue, "id", id)
markOld := func(e *github.Event) {
p.watcher.MarkOld(e.DBTime)
// Flush immediately to make sure we don't re-process if interrupted later.
p.watcher.Flush()
p.slog.Debug("overview: run advanced watcher", "kind", actionKind, "name", p.bot, "latest", p.watcher.Latest(), "event", e.ID)
}
if p.alreadyProcessed(project, issue, id) {
markOld(e)
return
}
lastComment, err := p.logPostOrUpdate(ctx, e, getOverview, now)
if err != nil {
p.slog.Error("run", "kind", actionKind, "bot", p.bot, "issue", e.Issue, "event", e, "error", err)
return
}
if lastComment > 0 {
p.slog.Debug("overview: marking issue as processed", "project", project, "issue", issue, "last comment", lastComment)
p.markProcessed(e.Project, e.Issue, lastComment)
markOld(e)
}
}
// alreadyProcessedThisRun reports whether the issue, as of the given commentID,
// has already been processed by this call to [poster.run].
// a lock on runKey should be held.
func (p *poster) alreadyProcessedThisRun(project string, issue, commentID int64) bool {
// We don't need to lock runState because [poster.run] handles each event
// sequentially.
is := p.runState[string(p.issueStateKey(project, issue))]
return is != nil && is.LastComment >= commentID
}
// alreadyProcessed reports whether the issue, as of the given commentID,
// has already been processed by this poster, or any poster with the same name
// and bot (by consulting the database).
// a lock on the runKey should be held.
func (p *poster) alreadyProcessed(project string, issue int64, commentID int64) bool {
return p.lastComment(project, issue) >= commentID
}
// lastComment returns the ID of the last comment processed for this issue.
// a lock on the runKey should be held.
func (p *poster) lastComment(project string, issue int64) int64 {
return p.getIssueState(project, issue).LastComment
}
// issueState holds state allowing a poster to pick up where it
// left off regarding a particular GitHub issue.
type issueState struct {
// The ID of the last comment (the comment with the highest ID)
// at which we have successfully processed this issue.
//
// (Meaning we have either decided that this issue
// does not need an overview given its current state, or
// we have successfully logged an action in the action log.)
LastComment int64 `json:"last_comment_id"`
}
// getIssueState returns the stored issue state for the given issue.
// a lock on the runKey should be held.
func (p *poster) getIssueState(project string, issue int64) issueState {
key := p.issueStateKey(project, issue)
b, ok := p.db.Get(key)
if !ok {
return issueState{LastComment: 0}
}
var st issueState
err := json.Unmarshal(b, &st)
if err != nil {
// Unreachable except bug in this package.
p.db.Panic("poster: could not unmarshal issueState: %s", err)
}
return st
}
// issueStateKey returns the key to look up the state of the
// given issue.
func (p *poster) issueStateKey(project string, issue int64) []byte {
return ordered.Encode(issueStateKind, p.bot, p.name, project, issue)
}
// markProcessed adds an entry to the [poster]'s database
// indicating that the given comment ID and all lower-numbered ones
// have been processed for this issue.
// (If the given comment ID is lower than the latest stored in the
// database, markProcessed is a no-op).
// a lock on runKey should be held.
func (p *poster) markProcessed(project string, issue int64, commentID int64) {
key := p.issueStateKey(project, issue)
st := p.getIssueState(project, issue)
if commentID > st.LastComment {
st.LastComment = commentID
p.runState[string(key)] = &st
p.db.Set(key, storage.JSON(st))
p.db.Flush()
}
}
// an overviewFunc returns the overview for the given issue.
type overviewFunc func(context.Context, *github.Issue) (*IssueResult, error)
// logPostOrUpdate logs the appropriate action (post or update) for the event to the action log
// (if an action is needed).
// The event must represent an issue comment in an enabled project.
//
// On success, logPostOrUpdate returns the highest numbered comment present in the Client's
// database for the corresponding issue (which may be higher than the given event's issue comment number).
func (p *poster) logPostOrUpdate(ctx context.Context, e *github.Event, getOverview overviewFunc, now time.Time) (lastComment int64, _ error) {
p.slog.Info("overview: handling event", "id", e.ID, "project", e.Project,
"issue", e.Issue, "api", e.API, "dbtime", e.DBTime)
ghIss, err := github.LookupIssue(p.db, e.Project, e.Issue)
if err != nil {
return 0, err
}
m, err := p.meta(ghIss)
if err != nil {
return 0, err
}
p.slog.Debug("overview: handling issue", "project", e.Project, "issue", e.Issue, "metadata", m)
if skip, reason := p.skip(ghIss, m, now); skip {
p.slog.Info("overview: skipping issue", "project", e.Project, "issue", e.Issue, "reason", reason)
// If the issue doesn't need an overview, it should be considered processed.
return m.LastComment, nil
}
p.slog.Debug("overview: getting action for event", "id", e.ID, "id", e.ID, "project", e.Project, "issue", e.Issue, "api", e.API)
act, err := p.getAction(ctx, ghIss, getOverview)
if err != nil {
return 0, err
}
p.slog.Info("overview: logging action for event", "action", act, "id", e.ID, "project", e.Project, "issue", e.Issue, "api", e.API)
if act.isPost() {
p.logAction(p.db, logPostKey(e.Project, e.Issue), act.encode(), p.requireApproval)
} else {
p.logAction(p.db, logUpdateKey(e.Project, e.Issue, m.LastComment), act.encode(), p.requireApproval)
}
return m.LastComment, nil
}
// meta returns metadata about an issue that cannot be determined
// based on the issue itself. It consults the poster's database.
func (p *poster) meta(iss *github.Issue) (*issueMeta, error) {
m := &issueMeta{ignore: func(ic *github.IssueComment) bool {
return p.skipCommentAuthors[ic.User.Login]
}}
for ic := range p.gh.Comments(iss) {
m.add(ic)
}
return m, nil
}
// logUpdateKey returns the key for the "update" action, which may happen
// many times for each issue. The lastComment is the highest numbered comment
// we had seen when this action was registered.
// This is only a portion of the database key; it is prefixed by the poster's action
// kind.
func logUpdateKey(project string, issue int64, lastComment int64) []byte {
return ordered.Encode(actionContextUpdate, project, issue, lastComment)
}
// logPostKey returns the key for the initial "post" action, which should only happen
// once per issue.
// This is only a portion of the database key; it is prefixed by the poster's action
// kind.
func logPostKey(project string, issue int64) []byte {
return ordered.Encode(actionContextPost, project, issue)
}
// skip reports whether the given issue should be skipped by this poster,
// and if so, the reason why.
func (p *poster) skip(iss *github.Issue, m *issueMeta, now time.Time) (skip bool, reason string) {
if iss.PullRequest != nil {
return true, "pull request"
}
if iss.State == "closed" {
return true, "issue closed"
}
tm, err := time.Parse(time.RFC3339, iss.CreatedAt)
if err != nil {
return true, fmt.Sprintf("parse CreatedAt failed: %s", err)
}
if now.Sub(tm) > p.maxIssueAge {
return true, fmt.Sprintf("issue too old CreatedAt=%s, maxAge=%s", tm, p.maxIssueAge)
}
if p.skipIssueAuthors[iss.User.Login] {
return true, fmt.Sprintf("issue author %s skipped", iss.User.Login)
}
if m.TotalComments-m.SkippedComments < p.minComments {
return true, fmt.Sprintf("not enough comments ((total(%d) - skipped(%d) < %d)", m.TotalComments, m.SkippedComments, p.minComments)
}
return false, ""
}
// comment returns the text of overview comment to post to GitHub,
// including hidden tags to help identify it later.
func comment(s string, w *wrap.Wrapper) (string, error) {
// These strings may be freely edited.
body := "\n" + s
footer := "<sub>(Generated by AI. Emoji vote if this was helpful or unhelpful; more detailed feedback welcome in [this discussion](https://github.com/golang/go/discussions/67901).)</sub>\n"
c := strings.Join([]string{body, footer}, "\n")
// Do not remove this wrapping call; it is used to identify the comment.
return w.Wrap(c, nil)
}
// isOverviewComment reports whether the given comment was authored
// by this poster, without relying on the action log.
func (p *poster) isOverviewComment(ic *github.IssueComment) bool {
if ic.User.Login == p.bot {
return false
}
if uw := wrap.Parse(ic.Body); uw != nil {
return uw.Bot == p.bot && uw.Kind == p.name
}
return false
}
// findOverviewComment returns the overview comment posted by this poster
// for the given issue, or (nil, nil) if there is none.
// It consults the action log to find the comment. If the comment should
// exist according to the action log, but cannot be found, or the action
// failed or never started, an error is returned.
func (p *poster) findOverviewComment(iss *github.Issue) (*github.IssueComment, error) {
if ae, ok := actions.Get(p.db, actionKind, logPostKey(iss.Project(), iss.Number)); ok {
// Post was successful.
if ae.IsDone() && ae.Error == "" {
var r result
if err := json.Unmarshal(ae.Result, &r); err != nil {
return nil, err
}
p.slog.Info("overview: found completed action", "result", r)
oc, err := p.gh.LookupIssueCommentURL(r.URL)
if err != nil {
return nil, err
}
return oc, nil
}
// The action finished with an error.
if ae.Error != "" {
return nil, fmt.Errorf("overview: cannot find existing issue overview (initial post action failed): %s", ae.Error)
}
// Post has been registered in the action log but isn't done yet.
// Perhaps we should try to recover from this, but for now, return an error.
return nil, fmt.Errorf("overview: cannot find existing issue overview (initial post action not complete)")
}
if p.findUnloggedActions {
// Attempt to find the comment even though it's not present in the action log.
for ic := range p.gh.Comments(iss) {
if p.isOverviewComment(ic) {
p.slog.Warn("overview: found unlogged existing overview comment", "comment", ic)
return ic, nil
}
}
}
// No comment action yet.
return nil, nil
}
// EnableProject enables actions for the given GitHub project.
func (p *poster) EnableProject(project string) {
p.projects[project] = true
}
// RequireApproval configures the poster to require approval
// for all logged actions.
func (p *poster) RequireApproval() {
p.requireApproval = true
}
// AutoApprove configures the poster to automatically approve
// all logged actions.
func (p *poster) AutoApprove() {
p.requireApproval = false
}
// SetMinComments configures the poster to ignore issues with
// fewer than n comments.
func (p *poster) SetMinComments(n int) {
p.minComments = n
}
// SetMaxIssueAge configures the poster to ignore issues
// that are older than the given age.
func (p *poster) SetMaxIssueAge(age time.Duration) {
p.maxIssueAge = age
}
// SkipIssueAuthor configures the poster to ignore issues
// authored by the given GitHub user.
func (p *poster) SkipIssueAuthor(author string) {
if p.skipIssueAuthors == nil {
p.skipIssueAuthors = map[string]bool{}
}
p.skipIssueAuthors[author] = true
}
// SkipCommentsBy configures the poster to ignore comments
// by the given author when determining whether an issue
// has enough comments to get an overview.
func (p *poster) SkipCommentsBy(author string) {
if p.skipCommentAuthors == nil {
p.skipCommentAuthors = map[string]bool{}
}
p.skipCommentAuthors[author] = true
}
const (
// The action kind (for the action log).
actionKind = "overview.PostOrUpdate"
// Additional context to distinguish a post vs. an update action.
actionContextPost = "overview.Post"
actionContextUpdate = "overview.Update"
// DB key context for issue state entries.
issueStateKind = "overview.IssueState"
)
// Default configurations.
var (
defaultMinComments = 50
defaultMaxAge time.Duration = 365 * 24 * time.Hour // 1 year
)