internal/rules: allow rule checker to post

Rule checker prepares a response to issues based on any rule
violations.

For now, actual posting will not be enabled because auto-approval is
not on yet.

*NOTE TO SELF*: after running, remove the "if true" code
in internal/gaby/main.go.

Change-Id: I7ac97e84f85567f7e460e33d174b48f2e4a02a87
Reviewed-on: https://go-review.googlesource.com/c/oscar/+/636576
Reviewed-by: Jonathan Amsterdam <jba@google.com>
Reviewed-by: Tatiana Bradley <tatianabradley@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
diff --git a/internal/gaby/main.go b/internal/gaby/main.go
index 9e96c8b..64215ee 100644
--- a/internal/gaby/main.go
+++ b/internal/gaby/main.go
@@ -44,10 +44,12 @@
 	"golang.org/x/oscar/internal/overview"
 	"golang.org/x/oscar/internal/pebble"
 	"golang.org/x/oscar/internal/related"
+	"golang.org/x/oscar/internal/rules"
 	"golang.org/x/oscar/internal/search"
 	"golang.org/x/oscar/internal/secret"
 	"golang.org/x/oscar/internal/storage"
 	"golang.org/x/oscar/internal/storage/timed"
+	"rsc.io/ordered"
 )
 
 type gabyFlags struct {
@@ -106,6 +108,7 @@
 	report    *errorreporting.Client // used to report important gaby errors to Cloud Error Reporting service
 
 	relatedPoster *related.Poster   // used to post related issues
+	rulesPoster   *rules.Poster     // used to post rule violations
 	commentFixer  *commentfix.Fixer // used to fix GitHub comments
 	overview      *overview.Client  // used to generate and post overviews
 }
@@ -214,6 +217,33 @@
 	}
 	g.relatedPoster = rp
 
+	rulep := rules.New(g.slog, g.db, g.github, g.llm, "rules")
+	for _, proj := range g.githubProjects {
+		rulep.EnableProject(proj)
+	}
+	rulep.EnablePosts()
+	if !slices.Contains(autoApprovePkgs, "rules") {
+		rulep.RequireApproval()
+	}
+	if true { // TODO: remove once launched
+		if _, ok := g.db.Get(ordered.Encode("github.EventWatcher", "rules.Poster:rules")); !ok {
+			// For testing (and startup). Set a timestamp 1 hour before
+			// the related issue poster.
+			qq, ok := g.db.Get(ordered.Encode("github.EventWatcher", "related.Poster:related"))
+			if !ok {
+				panic("can't find key")
+			}
+			var t int64
+			err := ordered.Decode(qq, &t)
+			if err != nil {
+				panic(err)
+			}
+			t -= 5 * 60 * 60 * 1e9 // back up 1 hour
+			g.db.Set(ordered.Encode("github.EventWatcher", "rules.Poster:rules"), ordered.Encode(t))
+		}
+	}
+	g.rulesPoster = rulep
+
 	// Named functions to retrieve latest Watcher times.
 	watcherLatests := map[string]func() timed.DBTime{
 		github.DocWatcherID:       docs.LatestFunc(g.github),
@@ -226,6 +256,7 @@
 
 		"gerritlinks fix": cf.Latest,
 		"related":         rp.Latest,
+		"rules":           rulep.Latest,
 	}
 
 	// Install a metric that observes the latest values of the watchers each time metrics are sampled.
@@ -684,6 +715,7 @@
 		// Write all changes to the action log.
 		check(g.fixAllComments(ctx))
 		check(g.postAllRelated(ctx))
+		check(g.postAllRules(ctx))
 		// Apply all actions.
 		actions.Run(ctx, g.slog, g.db)
 	}
@@ -701,6 +733,7 @@
 
 	gabyFixCommentLock  = "gabyfixcommentaction"
 	gabyPostRelatedLock = "gabyrelatedaction"
+	gabyPostRulesLock   = "gabyrulesaction"
 )
 
 func (g *Gaby) syncGitHubIssues(ctx context.Context) error {
@@ -781,6 +814,13 @@
 	return g.relatedPoster.Run(ctx)
 }
 
+func (g *Gaby) postAllRules(ctx context.Context) error {
+	g.db.Lock(gabyPostRulesLock)
+	defer g.db.Unlock(gabyPostRulesLock)
+
+	return g.rulesPoster.Run(ctx)
+}
+
 // localCron simulates Cloud Scheduler by fetching our server's /cron endpoint once per minute.
 func (g *Gaby) localCron() {
 	for ; ; time.Sleep(1 * time.Minute) {
diff --git a/internal/gaby/rules.go b/internal/gaby/rules.go
index c372082..6ed7526 100644
--- a/internal/gaby/rules.go
+++ b/internal/gaby/rules.go
@@ -77,7 +77,7 @@
 		return p
 	}
 
-	rules, err := rules.Issue(r.Context(), g.llm, i)
+	rules, err := rules.Issue(r.Context(), g.llm, i, true)
 	if err != nil {
 		p.Error = err
 		return p
diff --git a/internal/rules/rules.go b/internal/rules/rules.go
index ec210a9..1c884fb 100644
--- a/internal/rules/rules.go
+++ b/internal/rules/rules.go
@@ -9,18 +9,235 @@
 	"context"
 	"embed"
 	"encoding/json"
+	"errors"
 	"fmt"
 	"log"
+	"log/slog"
 	"strings"
 	"text/template"
+	"time"
 
+	"golang.org/x/oscar/internal/actions"
 	"golang.org/x/oscar/internal/github"
 	"golang.org/x/oscar/internal/labels"
 	"golang.org/x/oscar/internal/llm"
+	"golang.org/x/oscar/internal/storage"
+	"golang.org/x/oscar/internal/storage/timed"
+	"rsc.io/ordered"
 )
 
-// TODO: this is a one-shot request/response version of this feature.
-// Implement the version that comments on issues as they come in.
+// A Poster posts to GitHub about rule violations.
+type Poster struct {
+	slog      *slog.Logger
+	db        storage.DB
+	llm       llm.ContentGenerator
+	github    *github.Client
+	projects  map[string]bool
+	watcher   *timed.Watcher[*github.Event]
+	name      string
+	timeLimit time.Time
+	post      bool
+	// For the action log.
+	requireApproval bool
+	actionKind      string
+	logAction       actions.BeforeFunc
+}
+
+// An action has all the information needed to post a comment to a GitHub issue.
+type action struct {
+	Issue   *github.Issue
+	Changes *github.IssueCommentChanges
+}
+
+func New(lg *slog.Logger, db storage.DB, gh *github.Client, llm llm.ContentGenerator, name string) *Poster {
+	p := &Poster{
+		slog:      lg,
+		db:        db,
+		llm:       llm,
+		github:    gh,
+		projects:  make(map[string]bool),
+		watcher:   gh.EventWatcher("rules.Poster:" + name),
+		name:      name,
+		timeLimit: time.Now().Add(-defaultTooOld),
+	}
+	p.actionKind = "rules.Poster"
+	p.logAction = actions.Register(p.actionKind, &actioner{p})
+	p.requireApproval = true // TODO: remove. hardcoded for safety, just for now
+	return p
+}
+
+// EnableProject enables the Poster to post on issues in the given GitHub project (for example "golang/go").
+// See also [Poster.EnablePosts], which must also be called to post anything to GitHub.
+func (p *Poster) EnableProject(project string) {
+	p.projects[project] = true
+}
+
+// EnablePosts enables the Poster to post to GitHub.
+// If EnablePosts has not been called, [Poster.Run] logs what it would post but does not post the messages.
+// See also [Poster.EnableProject], which must also be called to set the projects being considered.
+func (p *Poster) EnablePosts() {
+	p.post = true
+}
+
+// RequireApproval configures the Poster to log actions that require approval.
+func (p *Poster) RequireApproval() {
+	p.requireApproval = true
+}
+
+const defaultTooOld = 48 * time.Hour
+
+func (p *Poster) Run(ctx context.Context) error {
+	p.slog.Info("rules.Poster start", "name", p.name, "post", p.post, "latest", p.watcher.Latest())
+	defer func() {
+		p.slog.Info("rules.Poster end", "name", p.name, "latest", p.watcher.Latest())
+	}()
+	defer p.watcher.Flush()
+	for e := range p.watcher.Recent() {
+		advance, err := p.logPostIssue(ctx, e)
+		if err != nil {
+			p.slog.Error("rules.Poster", "issue", e.Issue, "event", e, "error", err)
+			continue
+		}
+		if advance {
+			p.watcher.MarkOld(e.DBTime)
+			// Flush immediately to make sure we don't re-post if interrupted later in the loop.
+			p.watcher.Flush()
+		}
+	}
+	return nil
+}
+
+// logKey returns the key for the event in the action log.
+// This is only a portion of the database key; it is prefixed by the Poster's action
+// kind.
+func logKey(e *github.Event) []byte {
+	return ordered.Encode(e.Project, e.Issue)
+}
+
+func (p *Poster) logPostIssue(ctx context.Context, e *github.Event) (advance bool, _ error) {
+	if skip, reason := p.skip(e); skip {
+		p.slog.Info("rules.Poster skip", "name", p.name, "project",
+			e.Project, "issue", e.Issue, "reason", reason)
+		return false, nil
+	}
+	i, ok := e.Typed.(*github.Issue)
+	if !ok {
+		p.slog.Error("event does not have *github.Issue type", "typed", e.Typed)
+		return true, nil
+	}
+	// If an action has already been logged for this event, do nothing.
+	if _, ok := actions.Get(p.db, p.actionKind, logKey(e)); ok {
+		p.slog.Info("rules.Poster already logged", "name", p.name, "project", e.Project, "issue", e.Issue)
+		// If posting is enabled, we can advance the watcher because
+		// a comment has already been logged for this issue.
+		return p.post, nil
+	}
+	p.slog.Info("rules.Poster considering", "issue", i.Number)
+	r, err := Issue(ctx, p.llm, i, false /*debug*/)
+	if err != nil {
+		p.slog.Info("rules engine failed", "error", err)
+		return false, nil
+	}
+	if !p.post {
+		// Posting is disabled so we did not handle this issue.
+		return false, nil
+	}
+	if r.Response == "" {
+		p.slog.Info("no rule violations for", "issue", i.Number)
+		return true, nil
+	}
+	act := &action{
+		Issue:   i,
+		Changes: &github.IssueCommentChanges{Body: r.Response},
+	}
+	p.slog.Info("queueing response for", "issue", i.Number, "response", r.Response)
+	p.logAction(p.db, logKey(e), storage.JSON(act), p.requireApproval)
+	return true, nil
+}
+
+// skip reports whether the event should be skipped and why.
+func (p *Poster) skip(e *github.Event) (_ bool, reason string) {
+	if !p.projects[e.Project] {
+		return true, fmt.Sprintf("project %s not enabled for this Poster", e.Project)
+	}
+	if e.API != "/issues" {
+		return true, fmt.Sprintf("wrong API %s (expected %s)", e.API, "/issues")
+	}
+	issue := e.Typed.(*github.Issue)
+	if issue.State == "closed" {
+		return true, "issue is closed"
+	}
+	if issue.PullRequest != nil {
+		return true, "pull request"
+	}
+	tm, err := time.Parse(time.RFC3339, issue.CreatedAt)
+	if err != nil {
+		p.slog.Error("rules.Poster parse createdat", "CreatedAt", issue.CreatedAt, "err", err)
+		return true, "could not parse createdat"
+	}
+	if tm.Before(p.timeLimit) {
+		return true, fmt.Sprintf("created=%s before time limit=%s", tm, p.timeLimit)
+	}
+	return false, ""
+}
+
+type actioner struct {
+	p *Poster
+}
+
+func (ar *actioner) Run(ctx context.Context, data []byte) ([]byte, error) {
+	p := ar.p
+	var a action
+	if err := json.Unmarshal(data, &a); err != nil {
+		return nil, err
+	}
+	res, err := p.runAction(ctx, &a)
+	if err != nil {
+		return nil, err
+	}
+	return storage.JSON(res), nil
+}
+
+func (ar *actioner) ForDisplay(data []byte) string {
+	var a action
+	if err := json.Unmarshal(data, &a); err != nil {
+		return fmt.Sprintf("ERROR: %v", err)
+	}
+	return a.Issue.HTMLURL + "\n" + a.Changes.Body
+}
+
+type result struct {
+	URL string // URL of new comment
+}
+
+// runAction runs the given action.
+func (p *Poster) runAction(ctx context.Context, a *action) (*result, error) {
+	_, url, err := p.github.PostIssueComment(ctx, a.Issue, a.Changes)
+	// If GitHub returns an error, add it to the action log for this action.
+	//
+	// Gaby's original behavior was to log the error, not advance the watcher,
+	// and continue iterating over watcher.Recent. So subsequent successful
+	// posts would advance the watcher over the failed one, leaving only the
+	// slog entry as evidence of the failure.
+	//
+	// The current behavior always advances the watcher and preserves the error
+	// in the action log.
+	//
+	// It is unclear what the right behavior is, but at least at present all
+	// failed actions are available to the program and could be re-run.
+	if err != nil {
+		return nil, fmt.Errorf("%w issue=%d: %v", errPostIssueCommentFailed, a.Issue.Number, err)
+	}
+
+	return &result{URL: url}, nil
+}
+
+// Latest returns the latest known DBTime marked old by the Poster's Watcher.
+func (p *Poster) Latest() timed.DBTime {
+	return p.watcher.Latest()
+}
+
+var errPostIssueCommentFailed = errors.New("post issue comment failed")
 
 // IssueResult is the result of [Issue].
 // It contains the text (in markdown format) of a response to
@@ -31,11 +248,14 @@
 }
 
 // Issue returns text describing the set of rules that the issue does not currently satisfy.
-func Issue(ctx context.Context, cgen llm.ContentGenerator, i *github.Issue) (*IssueResult, error) {
+// If debug==true, then response contains additional llm debugging info.
+func Issue(ctx context.Context, cgen llm.ContentGenerator, i *github.Issue, debug bool) (*IssueResult, error) {
 	var result IssueResult
 
 	if i.PullRequest != nil {
-		result.Response += "## Issue response text\n**None required (pull request)**"
+		if debug {
+			result.Response += "## Issue response text\n**None required (pull request)**"
+		}
 		return &result, nil
 	}
 
@@ -44,8 +264,9 @@
 		return nil, err
 	}
 
-	// For now, report the classification. We won't do this in the final version.
-	result.Response += fmt.Sprintf("## Classification\n**%s**\n\n> %s\n\n", kind.Name, reasoning)
+	if debug {
+		result.Response += fmt.Sprintf("## Classification\n**%s**\n\n> %s\n\n", kind.Name, reasoning)
+	}
 
 	// Extract issue text into a string.
 	var issueText bytes.Buffer
@@ -86,7 +307,9 @@
 	}
 
 	if len(failed) == 0 {
-		result.Response += "## Issue response text\n**None required**"
+		if debug {
+			result.Response += "## Issue response text\n**None required**"
+		}
 		return &result, nil
 	}
 
@@ -94,10 +317,15 @@
 	fmt.Fprintf(&response, conversationText1)
 	for i, rule := range failed {
 		fmt.Fprintf(&response, "- %s\n\n", rule.Text)
-		fmt.Fprintf(&response, "  > %s\n\n", failedReason[i])
+		if debug {
+			fmt.Fprintf(&response, "  > %s\n\n", failedReason[i])
+		}
 	}
 	fmt.Fprintf(&response, conversationText2)
-	result.Response += "## Issue response text\n" + response.String()
+	if debug {
+		result.Response += "## Issue response text\n"
+	}
+	result.Response += response.String()
 
 	return &result, nil
 }
@@ -145,10 +373,8 @@
 
 `
 const conversationText2 = `
-(I'm just a bot; you probably know better than I do whether these findings really need fixing.)
-
-(TODO: Emoji vote if this was helpful or unhelpful.)
-
+I'm just a bot; you probably know better than I do whether these findings really need fixing.
+<sub>(Emoji vote if this was helpful or unhelpful; more detailed feedback welcome in [this discussion](https://github.com/golang/go/discussions/67901).)</sub>
 `
 
 const body = `
diff --git a/internal/rules/rules_test.go b/internal/rules/rules_test.go
index 1f2d7b6..8cf29d8 100644
--- a/internal/rules/rules_test.go
+++ b/internal/rules/rules_test.go
@@ -6,11 +6,17 @@
 
 import (
 	"context"
+	"encoding/json"
+	"slices"
 	"strings"
 	"testing"
+	"time"
 
+	"golang.org/x/oscar/internal/actions"
 	"golang.org/x/oscar/internal/github"
 	"golang.org/x/oscar/internal/llm"
+	"golang.org/x/oscar/internal/storage"
+	"golang.org/x/oscar/internal/testutil"
 )
 
 func TestIssue(t *testing.T) {
@@ -25,7 +31,7 @@
 	i.Body = "body"
 
 	// Ask about rule violations.
-	r, err := Issue(ctx, llm, i)
+	r, err := Issue(ctx, llm, i, false)
 	if err != nil {
 		t.Fatalf("IssueRules failed with %v", err)
 	}
@@ -87,3 +93,150 @@
 			return "UNK", nil
 		})
 }
+
+func TestRun(t *testing.T) {
+	ctx := context.Background()
+	check := testutil.Checker(t)
+	lg := testutil.Slogger(t)
+	db := storage.MemDB()
+	gh := github.New(lg, db, nil, nil)
+	llm := ruleTestGenerator()
+
+	tc := gh.Testing()
+	testIssue := &github.Issue{
+		Number:    888,
+		Title:     "title",
+		Body:      "body",
+		CreatedAt: time.Now().Add(-1 * time.Hour).UTC().Format(time.RFC3339),
+	}
+	tc.AddIssue("test/project", testIssue)
+
+	p := New(lg, db, gh, llm, "postname")
+	p.EnableProject("test/project")
+	p.EnablePosts()
+
+	check(p.Run(ctx))
+	check(actions.Run(ctx, lg, db))
+
+	entries := slices.Collect(actions.ScanAfter(lg, db, time.Time{}, nil))
+	if len(entries) != 1 {
+		t.Fatalf("too many action entries. Want 1, got %d", len(entries))
+	}
+	e := entries[0]
+
+	var a action
+	if err := json.Unmarshal(e.Action, &a); err != nil {
+		t.Fatal(err)
+	}
+	if got, want := a.Issue.Project(), "test/project"; got != want {
+		t.Fatalf("posted to unexpected project: got %s, want %s", got, want)
+		return
+	}
+	if got, want := a.Issue.Number, int64(888); got != want {
+		t.Fatalf("posted to unexpected issue: got %d, want %d", got, want)
+	}
+	wantBody := strings.TrimSpace(`
+We've identified some possible problems with your issue report. Please review
+these findings and fix any that you think are appropriate to fix.
+
+- The issue title must start with a package name followed by a colon.
+
+
+I'm just a bot; you probably know better than I do whether these findings really need fixing.
+<sub>(Emoji vote if this was helpful or unhelpful; more detailed feedback welcome in [this discussion](https://github.com/golang/go/discussions/67901).)</sub>
+`)
+	if got := strings.TrimSpace(a.Changes.Body); got != wantBody {
+		t.Fatalf("body wrong: got %s, want %s", got, wantBody)
+	}
+}
+
+func TestOld(t *testing.T) {
+	ctx := context.Background()
+	check := testutil.Checker(t)
+	lg := testutil.Slogger(t)
+	db := storage.MemDB()
+	gh := github.New(lg, db, nil, nil)
+	llm := ruleTestGenerator()
+
+	tc := gh.Testing()
+	testIssue := &github.Issue{
+		Number:    888,
+		Title:     "title",
+		Body:      "body",
+		CreatedAt: time.Now().Add(-100 * time.Hour).UTC().Format(time.RFC3339),
+	}
+	tc.AddIssue("test/project", testIssue)
+
+	p := New(lg, db, gh, llm, "postname")
+	p.EnableProject("test/project")
+	p.EnablePosts()
+
+	check(p.Run(ctx))
+	check(actions.Run(ctx, lg, db))
+
+	entries := slices.Collect(actions.ScanAfter(lg, db, time.Time{}, nil))
+	if len(entries) != 0 {
+		t.Fatalf("too many action entries. Want 0, got %d", len(entries))
+	}
+}
+
+func TestClosed(t *testing.T) {
+	ctx := context.Background()
+	check := testutil.Checker(t)
+	lg := testutil.Slogger(t)
+	db := storage.MemDB()
+	gh := github.New(lg, db, nil, nil)
+	llm := ruleTestGenerator()
+
+	tc := gh.Testing()
+	testIssue := &github.Issue{
+		Number:    888,
+		Title:     "title",
+		Body:      "body",
+		CreatedAt: time.Now().Add(-1 * time.Hour).UTC().Format(time.RFC3339),
+		State:     "closed",
+	}
+	tc.AddIssue("test/project", testIssue)
+
+	p := New(lg, db, gh, llm, "postname")
+	p.EnableProject("test/project")
+	p.EnablePosts()
+
+	check(p.Run(ctx))
+	check(actions.Run(ctx, lg, db))
+
+	entries := slices.Collect(actions.ScanAfter(lg, db, time.Time{}, nil))
+	if len(entries) != 0 {
+		t.Fatalf("too many action entries. Want 0, got %d", len(entries))
+	}
+}
+
+func TestProjectFilter(t *testing.T) {
+	ctx := context.Background()
+	check := testutil.Checker(t)
+	lg := testutil.Slogger(t)
+	db := storage.MemDB()
+	gh := github.New(lg, db, nil, nil)
+	llm := ruleTestGenerator()
+
+	tc := gh.Testing()
+	testIssue := &github.Issue{
+		Number:    888,
+		Title:     "title",
+		Body:      "body",
+		CreatedAt: time.Now().Add(-1 * time.Hour).UTC().Format(time.RFC3339),
+	}
+	tc.AddIssue("test/project1", testIssue)
+
+	p := New(lg, db, gh, llm, "postname")
+	p.EnableProject("test/project2")
+	p.EnablePosts()
+
+	check(p.Run(ctx))
+	check(actions.Run(ctx, lg, db))
+
+	entries := slices.Collect(actions.ScanAfter(lg, db, time.Time{}, nil))
+	if len(entries) != 0 {
+		t.Fatalf("too many action entries. Want 0, got %d", len(entries))
+	}
+}