blob: 6e6d49156f65ebf13d96d16aa2663b6aba7443a6 [file] [log] [blame]
// Copyright 2024 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package labels
import (
"context"
"encoding/json"
"fmt"
"log/slog"
"maps"
"slices"
"strings"
"time"
"golang.org/x/oscar/internal/actions"
"golang.org/x/oscar/internal/github"
"golang.org/x/oscar/internal/llm"
"golang.org/x/oscar/internal/storage"
"golang.org/x/oscar/internal/storage/timed"
"rsc.io/ordered"
)
// A Labeler labels GitHub issues.
// It uses the following database keys:
// - ["labels.Labeler"] for the action log.
// - ["labels.Categories", Project, Issue] to record the categories assigned to an issue.
type Labeler struct {
slog *slog.Logger
db storage.DB
github *github.Client
cgen llm.ContentGenerator
projects map[string]bool
watcher *timed.Watcher[*github.Event]
name string
timeLimit time.Time
skipAuthors map[string]bool
label bool
// For the action log.
requireApproval bool
actionKind string
logAction actions.BeforeFunc
}
// New creates and returns a new Labeler. It logs to lg, stores state in db,
// manipulates GitHub issues using gh, and classifies issues using cgen.
//
// For the purposes of storing its own state, it uses the given name.
// Future calls to New with the same name will use the same state.
//
// Use the [Labeler] methods to configure the posting parameters
// (especially [Labeler.EnableProject] and [Labeler.EnableLabels])
// before calling [Labeler.Run].
func New(lg *slog.Logger, db storage.DB, gh *github.Client, cgen llm.ContentGenerator, name string) *Labeler {
l := &Labeler{
slog: lg,
db: db,
github: gh,
cgen: cgen,
projects: make(map[string]bool),
watcher: gh.EventWatcher("labels.Labeler:" + name),
name: name,
timeLimit: time.Now().Add(-defaultTooOld),
}
// TODO: Perhaps the action kind should include name, but perhaps not.
// This makes sure we only ever label each issue once.
l.actionKind = "labels.Labeler"
l.logAction = actions.Register(l.actionKind, &actioner{l})
return l
}
// SetTimeLimit controls how old an issue can be for the Labeler to label it.
// Issues created before time t will be skipped.
// The default is not to post to issues that are more than 48 hours old
// at the time of the call to [New].
func (l *Labeler) SetTimeLimit(t time.Time) {
l.timeLimit = t
}
const defaultTooOld = 48 * time.Hour
// EnableProject enables the Labeler to post on issues in the given GitHub project (for example "golang/go").
// See also [Labeler.EnableLabels], which must also be called to post anything to GitHub.
func (l *Labeler) EnableProject(project string) {
l.projects[project] = true
}
// EnableLabels enables the Labeler to label GitHub issues.
// If EnableLabels has not been called, [Labeler.Run] logs what it would post but does not post the messages.
// See also [Labeler.EnableProject], which must also be called to set the projects being considered.
func (l *Labeler) EnableLabels() {
l.label = true
}
// RequireApproval configures the Labeler to log actions that require approval.
func (l *Labeler) RequireApproval() {
l.requireApproval = true
}
func (l *Labeler) SkipAuthor(author string) {
if l.skipAuthors == nil {
l.skipAuthors = map[string]bool{}
}
l.skipAuthors[author] = true
}
// An action has all the information needed to label a GitHub issue.
type action struct {
Issue *github.Issue
Categories []string // the names of the categories corresponding to the labels
NewLabels []string // labels to add
Explanations []string // an explanation for each category
}
// result is the result of apply an action.
type result struct {
URL string // URL of new comment
}
// Run runs a single round of labeling to GitHub.
// It scans all open issues that have been created since the last call to [Labeler.Run]
// using a Labeler with the same name (see [New]).
// Run skips closed issues, and it also skips pull requests.
func (l *Labeler) Run(ctx context.Context) error {
l.slog.Info("labels.Labeler start", "name", l.name, "label", l.label, "latest", l.watcher.Latest())
defer func() {
l.slog.Info("labels.Labeler end", "name", l.name, "latest", l.watcher.Latest())
}()
// Ensure that labels in GH match our config.
for p := range l.projects {
cats, ok := config.categories[p]
if !ok {
return fmt.Errorf("Labeler.Run: unknown project %q", p)
}
if err := l.syncLabels(ctx, p, cats); err != nil {
return err
}
}
defer l.watcher.Flush()
for e := range l.watcher.Recent() {
advance, err := l.logLabelIssue(ctx, e)
if err != nil {
l.slog.Error("labels.Labeler", "issue", e.Issue, "event", e, "error", err)
continue
}
eg := slog.Group("event",
"dbtime", e.DBTime,
"project", e.Project,
"issue", e.Issue,
"json", string(e.JSON))
if advance {
l.watcher.MarkOld(e.DBTime)
// Flush immediately to make sure we don't re-post if interrupted later in the loop.
l.watcher.Flush()
l.slog.Info("labels.Labeler advanced watcher", "latest", l.watcher.Latest(), eg)
} else {
l.slog.Info("labels.Labeler watcher not advanced", "latest", l.watcher.Latest(), eg)
}
}
return nil
}
// LabelIssue labels a single issue.
//
// It follows the same logic as [Labeler.Run] for a single issue, except
// that it does not rely on or modify the Labeler's watcher.
// This means that [Labeler.LabelIssue] can be called on any issue without
// affecting the starting point of future calls to [Labeler.Run].
//
// It requires that there be a database entry for the given issue.
func (l *Labeler) LabelIssue(ctx context.Context, project string, issue int64) error {
e := lookupIssueEvent(project, issue, l.github)
if e == nil {
return fmt.Errorf("labels.Labeler.LabelIssue(project=%s, issue=%d): event not found", project, issue)
}
_, err := l.logLabelIssue(ctx, e)
return err
}
// lookupIssueEvent returns the first event for the "/issues" API with
// the given ID in the database, or nil if not found.
func lookupIssueEvent(project string, issue int64, gh *github.Client) *github.Event {
for event := range gh.Events(project, issue, issue) {
if event.API == "/issues" {
return event
}
}
return nil
}
// logLabelIssue logs an action to post an issue for the event.
// advance is true if the event should be considered to have been
// handled by this or a previous run function, indicating
// that the Labelers's watcher can be advanced.
// An issue is handled if
// - labeling is enabled, AND
// - an issue labeling was successfully logged, or no labeling
// was needed because no label matched.
//
// Skipped issues are not considered handled.
func (l *Labeler) logLabelIssue(ctx context.Context, e *github.Event) (advance bool, _ error) {
if skip, reason := l.skip(e); skip {
l.slog.Info("labels.Labeler skip", "name", l.name, "project",
e.Project, "issue", e.Issue, "reason", reason, "event", e)
return false, nil
}
// If an action has already been logged for this event, do nothing.
// We don't need a lock. [actions.before] will lock to avoid multiple log entries.
if _, ok := actions.Get(l.db, l.actionKind, logKey(e)); ok {
l.slog.Info("labels.Labeler already logged", "name", l.name, "project", e.Project, "issue", e.Issue, "event", e)
// If labeling is enabled, we can advance the watcher because
// a comment has already been logged for this issue.
return l.label, nil
}
// If we didn't skip, it's definitely an issue.
issue := e.Typed.(*github.Issue)
l.slog.Debug("labels.Labeler consider", "url", issue.HTMLURL)
cat, explanation, err := IssueCategory(ctx, l.db, l.cgen, issue)
if err != nil {
return false, fmt.Errorf("IssueCategory(%s): %w", issue.HTMLURL, err)
}
l.slog.Info("labels.Labeler chose label", "name", l.name, "project", e.Project, "issue", e.Issue,
"label", cat.Label, "explanation", explanation)
if !l.label {
// Labeling is disabled so we did not handle this issue.
return false, nil
}
act := &action{
Issue: issue,
Categories: []string{cat.Name},
NewLabels: []string{cat.Label},
Explanations: []string{explanation},
}
l.logAction(l.db, logKey(e), storage.JSON(act), l.requireApproval)
return true, nil
}
func (l *Labeler) skip(e *github.Event) (bool, string) {
if !l.projects[e.Project] {
return true, fmt.Sprintf("project %s not enabled for this Labeler", e.Project)
}
if want := "/issues"; e.API != want {
return true, fmt.Sprintf("wrong API %s (expected %s)", e.API, want)
}
issue := e.Typed.(*github.Issue)
if issue.State == "closed" {
return true, "issue is closed"
}
tm, err := time.Parse(time.RFC3339, issue.CreatedAt)
if err != nil {
l.slog.Error("labels.Labeler parse time", "CreatedAt", issue.CreatedAt, "err", err)
return true, "could not parse CreatedAt"
}
if tm.Before(l.timeLimit) {
return true, fmt.Sprintf("created=%s before time limit=%s", tm, l.timeLimit)
}
if issue.PullRequest != nil {
return true, "pull request"
}
if author := issue.User.Login; l.skipAuthors[author] {
return true, fmt.Sprintf("skipping author %q", author)
}
return false, ""
}
// syncLabels attempts to reconcile the configured labels in cats with the labels on the issue tracker,
// modifying the issue tracker's labels to match.
// If a label in cats is not on the issue tracker, it is created.
// Otherwise, if the label description on the issue tracker is empty, it is set to the description in the Category.
// Otherwise, if the descriptions don't agree, a warning is logged and nothing is done on the issue tracker.
// This function makes no other changes. In particular, it never deletes labels.
func (l *Labeler) syncLabels(ctx context.Context, project string, cats []Category) error {
l.slog.Info("labels.Labeler syncing labels", "name", l.name, "project", project)
tlabList, err := l.github.ListLabels(ctx, project)
if err != nil {
return err
}
// Labels are case-insensitive, so the keys are lowercase.
tlabs := map[string]github.Label{}
for _, lab := range tlabList {
tlabs[strings.ToLower(lab.Name)] = lab
}
for _, cat := range cats {
lab, ok := tlabs[strings.ToLower(cat.Label)]
if !ok {
l.slog.Info("labels.Labeler creating label", "label", cat.Label)
if err := l.github.CreateLabel(ctx, project, github.Label{
Name: cat.Label,
Description: cat.Description,
Color: labelColor,
}); err != nil {
return err
}
} else if lab.Description == "" {
l.slog.Info("labels.Labeler setting empty label description", "label", lab.Name)
if err := l.github.EditLabel(ctx, project, lab.Name, github.LabelChanges{Description: cat.Description}); err != nil {
return err
}
} else if lab.Description != cat.Description {
l.slog.Warn("labels.Labeler descriptions disagree", "label", lab.Name)
}
}
return nil
}
// labelColor is the color of labels created by syncLabels.
const labelColor = "4d0070"
type actioner struct {
l *Labeler
}
func (ar *actioner) Run(ctx context.Context, data []byte) ([]byte, error) {
return ar.l.runFromActionLog(ctx, data)
}
func (ar *actioner) ForDisplay(data []byte) string {
var a action
if err := json.Unmarshal(data, &a); err != nil {
return fmt.Sprintf("ERROR: %v", err)
}
return a.Issue.HTMLURL + "\n" + strings.Join(a.NewLabels, ", ") + "\n" + strings.Join(a.Explanations, ", ")
}
// runFromActionLog is called by actions.Run to execute an action.
// It decodes the action, calls [Labeler.runAction], then encodes the result.
func (l *Labeler) runFromActionLog(ctx context.Context, data []byte) ([]byte, error) {
var a action
if err := json.Unmarshal(data, &a); err != nil {
return nil, err
}
res, err := l.runAction(ctx, &a)
if err != nil {
return nil, err
}
return storage.JSON(res), nil
}
// runAction runs the given action.
func (l *Labeler) runAction(ctx context.Context, a *action) (*result, error) {
// When updating an issue in GitHub, we must provide all the labels, both the
// existing and the new.
//
// There is an HTTP mechanism for atomic test-and-set, using the If-Match header
// with an ETag. Unfortunately, GitHub does not support it: it returns a 412
// Precondition Failed if it sees that header, then makes the change regardless
// of whether the ETags match. So the best we can do is read the existing labels
// and immediately write the new ones.
issue, err := l.github.DownloadIssue(ctx, a.Issue.URL)
if err != nil {
return nil, fmt.Errorf("Labeler, download %s: %w", a.Issue.URL, err)
}
// Compute the union of the old and new label names.
oldLabels := map[string]bool{}
for _, lab := range issue.Labels {
oldLabels[lab.Name] = true
}
newLabels := maps.Clone(oldLabels)
for _, name := range a.NewLabels {
newLabels[name] = true
}
if maps.Equal(oldLabels, newLabels) {
// Nothing to do.
return &result{issue.URL}, nil
}
labelNames := slices.Collect(maps.Keys(newLabels))
// Sort for determinism in tests.
slices.Sort(labelNames)
err = l.github.EditIssue(ctx, a.Issue, &github.IssueChanges{Labels: &labelNames})
// If GitHub returns an error, add it to the action log for this action.
if err != nil {
return nil, fmt.Errorf("Labeler: edit %s: %w", a.Issue.URL, err)
}
l.setCategories(a.Issue, a.Categories)
return &result{URL: issue.URL}, nil
}
// logKey returns the key for the event in the action log. This is only a portion
// of the database key; it is prefixed by the Labelers's action kind.
func logKey(e *github.Event) []byte {
return ordered.Encode(e.Project, e.Issue)
}
// Latest returns the latest known DBTime marked old by the Poster's Watcher.
func (l *Labeler) Latest() timed.DBTime {
return l.watcher.Latest()
}
func (l *Labeler) setCategories(i *github.Issue, cats []string) {
l.db.Set(categoriesKey(i.Project(), i.Number), []byte(strings.Join(cats, ",")))
}
const categoriesPrefix = "labels.Categories"
func categoriesKey(project string, num int64) []byte {
return ordered.Encode(categoriesPrefix, project, num)
}