blob: 1f57805b1e23f38f0c4f8579b053f9b6be36f882 [file] [log] [blame] [edit]
// Copyright 2024 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package rules
import (
"bytes"
"context"
"embed"
"encoding/json"
"errors"
"fmt"
"log"
"log/slog"
"regexp"
"strings"
"text/template"
"time"
"golang.org/x/oscar/internal/actions"
"golang.org/x/oscar/internal/github"
"golang.org/x/oscar/internal/labels"
"golang.org/x/oscar/internal/llm"
"golang.org/x/oscar/internal/storage"
"golang.org/x/oscar/internal/storage/timed"
"rsc.io/ordered"
)
// A Poster posts to GitHub about rule violations.
type Poster struct {
slog *slog.Logger
db storage.DB
llm llm.ContentGenerator
github *github.Client
projects map[string]bool
watcher *timed.Watcher[*github.Event]
name string
timeLimit time.Time
post bool
// For the action log.
requireApproval bool
actionKind string
logAction actions.BeforeFunc
}
// An action has all the information needed to post a comment to a GitHub issue.
type action struct {
Issue *github.Issue
Changes *github.IssueCommentChanges
}
func New(lg *slog.Logger, db storage.DB, gh *github.Client, llm llm.ContentGenerator, name string) *Poster {
p := &Poster{
slog: lg,
db: db,
llm: llm,
github: gh,
projects: make(map[string]bool),
watcher: gh.EventWatcher("rules.Poster:" + name),
name: name,
timeLimit: time.Now().Add(-defaultTooOld),
}
p.actionKind = "rules.Poster"
p.logAction = actions.Register(p.actionKind, &actioner{p})
p.requireApproval = true // TODO: remove. hardcoded for safety, just for now
return p
}
// EnableProject enables the Poster to post on issues in the given GitHub project (for example "golang/go").
// See also [Poster.EnablePosts], which must also be called to post anything to GitHub.
func (p *Poster) EnableProject(project string) {
p.projects[project] = true
}
// EnablePosts enables the Poster to post to GitHub.
// If EnablePosts has not been called, [Poster.Run] logs what it would post but does not post the messages.
// See also [Poster.EnableProject], which must also be called to set the projects being considered.
func (p *Poster) EnablePosts() {
p.post = true
}
// RequireApproval configures the Poster to log actions that require approval.
func (p *Poster) RequireApproval() {
p.requireApproval = true
}
const defaultTooOld = 48 * time.Hour
func (p *Poster) Run(ctx context.Context) error {
p.slog.Info("rules.Poster start", "name", p.name, "post", p.post, "latest", p.watcher.Latest())
defer func() {
p.slog.Info("rules.Poster end", "name", p.name, "latest", p.watcher.Latest())
}()
defer p.watcher.Flush()
for e := range p.watcher.Recent() {
advance, err := p.logPostIssue(ctx, e)
if err != nil {
p.slog.Error("rules.Poster", "issue", e.Issue, "event", e, "error", err)
continue
}
if advance {
p.watcher.MarkOld(e.DBTime)
// Flush immediately to make sure we don't re-post if interrupted later in the loop.
p.watcher.Flush()
}
}
return nil
}
// logKey returns the key for the event in the action log.
// This is only a portion of the database key; it is prefixed by the Poster's action
// kind.
func logKey(e *github.Event) []byte {
return ordered.Encode(e.Project, e.Issue)
}
func (p *Poster) logPostIssue(ctx context.Context, e *github.Event) (advance bool, _ error) {
if skip, reason := p.skip(e); skip {
p.slog.Info("rules.Poster skip", "name", p.name, "project",
e.Project, "issue", e.Issue, "reason", reason)
return false, nil
}
i, ok := e.Typed.(*github.Issue)
if !ok {
p.slog.Error("event does not have *github.Issue type", "typed", e.Typed)
return true, nil
}
// If an action has already been logged for this event, do nothing.
if _, ok := actions.Get(p.db, p.actionKind, logKey(e)); ok {
p.slog.Info("rules.Poster already logged", "name", p.name, "project", e.Project, "issue", e.Issue)
// If posting is enabled, we can advance the watcher because
// a comment has already been logged for this issue.
return p.post, nil
}
p.slog.Info("rules.Poster considering", "issue", i.Number)
r, err := Issue(ctx, p.db, p.llm, i, false /*debug*/)
if err != nil {
p.slog.Info("rules engine failed", "error", err)
return false, nil
}
if !p.post {
// Posting is disabled so we did not handle this issue.
return false, nil
}
if r.Response == "" {
p.slog.Info("no rule violations for", "issue", i.Number)
return true, nil
}
act := &action{
Issue: i,
Changes: &github.IssueCommentChanges{Body: r.Response},
}
p.slog.Info("queueing response for", "issue", i.Number, "response", r.Response)
p.logAction(p.db, logKey(e), storage.JSON(act), p.requireApproval)
return true, nil
}
// skip reports whether the event should be skipped and why.
func (p *Poster) skip(e *github.Event) (_ bool, reason string) {
if !p.projects[e.Project] {
return true, fmt.Sprintf("project %s not enabled for this Poster", e.Project)
}
if e.API != "/issues" {
return true, fmt.Sprintf("wrong API %s (expected %s)", e.API, "/issues")
}
issue := e.Typed.(*github.Issue)
if issue.State == "closed" {
return true, "issue is closed"
}
if issue.PullRequest != nil {
return true, "pull request"
}
tm, err := time.Parse(time.RFC3339, issue.CreatedAt)
if err != nil {
p.slog.Error("rules.Poster parse createdat", "CreatedAt", issue.CreatedAt, "err", err)
return true, "could not parse createdat"
}
if tm.Before(p.timeLimit) {
return true, fmt.Sprintf("created=%s before time limit=%s", tm, p.timeLimit)
}
return false, ""
}
type actioner struct {
p *Poster
}
func (ar *actioner) Run(ctx context.Context, data []byte) ([]byte, error) {
p := ar.p
var a action
if err := json.Unmarshal(data, &a); err != nil {
return nil, err
}
res, err := p.runAction(ctx, &a)
if err != nil {
return nil, err
}
return storage.JSON(res), nil
}
func (ar *actioner) ForDisplay(data []byte) string {
var a action
if err := json.Unmarshal(data, &a); err != nil {
return fmt.Sprintf("ERROR: %v", err)
}
return a.Issue.HTMLURL + "\n" + a.Changes.Body
}
type result struct {
URL string // URL of new comment
}
// runAction runs the given action.
func (p *Poster) runAction(ctx context.Context, a *action) (*result, error) {
_, url, err := p.github.PostIssueComment(ctx, a.Issue, a.Changes)
// If GitHub returns an error, add it to the action log for this action.
//
// Gaby's original behavior was to log the error, not advance the watcher,
// and continue iterating over watcher.Recent. So subsequent successful
// posts would advance the watcher over the failed one, leaving only the
// slog entry as evidence of the failure.
//
// The current behavior always advances the watcher and preserves the error
// in the action log.
//
// It is unclear what the right behavior is, but at least at present all
// failed actions are available to the program and could be re-run.
if err != nil {
return nil, fmt.Errorf("%w issue=%d: %v", errPostIssueCommentFailed, a.Issue.Number, err)
}
return &result{URL: url}, nil
}
// Latest returns the latest known DBTime marked old by the Poster's Watcher.
func (p *Poster) Latest() timed.DBTime {
return p.watcher.Latest()
}
var errPostIssueCommentFailed = errors.New("post issue comment failed")
// IssueResult is the result of [Issue].
// It contains the text (in markdown format) of a response to
// that issue mentioning any applicable rules that were violated.
// If Response=="", then nothing to report.
type IssueResult struct {
Response string
}
// Issue returns text describing the set of rules that the issue does not currently satisfy.
// If debug==true, then response contains additional llm debugging info.
func Issue(ctx context.Context, db storage.DB, cgen llm.ContentGenerator, i *github.Issue, debug bool) (*IssueResult, error) {
var result IssueResult
if i.PullRequest != nil {
if debug {
result.Response += "## Issue response text\n**None required (pull request)**"
}
return &result, nil
}
kind, reasoning, err := Classify(ctx, db, cgen, i)
if err != nil {
return nil, err
}
if debug {
result.Response += fmt.Sprintf("## Classification\n**%s**\n\n> %s\n\n", kind.Name, reasoning)
}
// Extract issue text into a string.
var issueText bytes.Buffer
err = template.Must(template.New("prompt").Parse(body)).Execute(&issueText, bodyArgs{
Title: i.Title,
Body: i.Body,
})
if err != nil {
return nil, err
}
// Now that we know the kind, ask about each of the rules for the kind.
var systemPrompt bytes.Buffer
var failed []Rule
var failedReason []string
for _, rule := range kind.Rules {
if rule.Regexp != "" {
// Use a regexp instead of an LLM to detect violations.
re, err := regexp.Compile(rule.Regexp)
if err != nil {
return nil, fmt.Errorf("bad regexp: %w\n", err)
}
if re.MatchString(i.Body) {
failed = append(failed, rule)
failedReason = append(failedReason, fmt.Sprintf("regexp %s matched", rule.Regexp))
}
continue
}
// Build system prompt to ask about rule violations.
systemPrompt.Reset()
systemPrompt.WriteString(fmt.Sprintf(rulePrompt, rule.Text, rule.Details))
res, err := cgen.GenerateContent(ctx, nil, []llm.Part{llm.Text(systemPrompt.String()), llm.Text(issueText.String())})
if err != nil {
return nil, fmt.Errorf("llm request failed: %w\n", err)
}
firstLine, remainingLines, _ := strings.Cut(res, "\n")
switch firstLine {
default:
// LLM failed. Treat as a "yes" so we don't spam
// people when the LLM is the problem.
log.Printf("invalid LLM response: %q", firstLine)
fallthrough
case "yes":
// Issue does satisfy the rule, nothing to do.
case "no":
failed = append(failed, rule)
failedReason = append(failedReason, remainingLines)
}
}
if len(failed) == 0 {
if debug {
result.Response += "## Issue response text\n**None required**"
}
return &result, nil
}
var response bytes.Buffer
fmt.Fprintf(&response, conversationText1)
for i, rule := range failed {
fmt.Fprintf(&response, "- %s\n\n", rule.Text)
if debug {
fmt.Fprintf(&response, " > %s\n\n", failedReason[i])
}
}
fmt.Fprintf(&response, conversationText2)
if debug {
result.Response += "## Issue response text\n"
}
result.Response += response.String()
return &result, nil
}
// Classify returns the kind of issue we're dealing with.
// Returns a description of the classification and a string describing
// the llm's reasoning.
func Classify(ctx context.Context, db storage.DB, cgen llm.ContentGenerator, i *github.Issue) (IssueKind, string, error) {
cat, explanation, err := labels.IssueCategory(ctx, db, cgen, i)
if err != nil {
return IssueKind{}, "", err
}
for _, kind := range rulesConfig.IssueKinds {
if kind.Name == cat.Name {
return kind, explanation, nil
}
}
return IssueKind{}, "", fmt.Errorf("unexpected category %s", cat.Name)
}
//go:embed static/*
var staticFS embed.FS
// TODO: put some of these in the staticFS
const rulePrompt = `
Your job is to decide whether a Go issue follows this rule: %s (%s)
The issue is described by a title and a body.
Report whether the issue is following the rule or not, with a single "yes" or "no"
on a line by itself, followed by an explanation of your decision.
`
const conversationText1 = `
We've identified some possible problems with your issue. Please review
these findings and fix any that you think are appropriate to fix.
`
const conversationText2 = `
I'm just a bot; you probably know better than I do whether these findings really need fixing.
<sub>(Emoji vote if this was helpful or unhelpful; more detailed feedback welcome in [this discussion](https://github.com/golang/go/discussions/67901).)</sub>
`
const body = `
The title of the issue is: {{.Title}}
The body of the issue is: {{.Body}}
`
type bodyArgs struct {
Title string
Body string
}
// Structure of JSON configuration file in static/ruleset.json
type RulesConfig struct {
IssueKinds []IssueKind
}
type IssueKind struct {
Name string // name of this kind of issue
Text string // one-line description of this kind of issue
Details string // additional text describing kind of issue to the LLM
Rules []Rule // rules that apply to this kind
Ignore bool // don't bother commenting on this kind of issue (just Rules==nil?)
}
type Rule struct {
Text string // what we would show to a user
Details string // additional text for the LLM
Regexp string // regular expression that matches for rule violations (instead of an LLM)
}
var rulesConfig RulesConfig
func init() {
content, err := staticFS.ReadFile("static/ruleset.json")
if err != nil {
log.Fatal(err)
}
err = json.Unmarshal(content, &rulesConfig)
if err != nil {
log.Fatal(err)
}
}