blob: 09b0b4f11435310dec5cb6e6b28869f7ea00007a [file] [log] [blame]
// Copyright 2024 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Package commentfix implements rule-based rewriting of issue comments.
package commentfix
import (
"context"
"encoding/json"
"errors"
"fmt"
"io"
"log/slog"
"os"
"reflect"
"regexp"
"strings"
"testing"
"time"
"golang.org/x/oscar/internal/actions"
"golang.org/x/oscar/internal/diff"
"golang.org/x/oscar/internal/github"
"golang.org/x/oscar/internal/storage"
"golang.org/x/oscar/internal/storage/timed"
"rsc.io/markdown"
"rsc.io/ordered"
)
// A Fixer rewrites issue texts and issue comments using a set of rules.
// After creating a fixer with [New], new rules can be added using
// the [Fixer.AutoLink], [Fixer.ReplaceText], and [Fixer.ReplaceURL] methods,
// and then repeated calls to [Fixer.Run] apply the replacements on GitHub.
//
// The zero value of a Fixer can be used in “offline” mode with [Fixer.Fix],
// which returns rewritten Markdown.
//
// TODO(rsc): Separate the GitHub logic more cleanly from the rewrite logic.
type Fixer struct {
name string
slog *slog.Logger
github *github.Client
watcher *timed.Watcher[*github.Event]
fixes []func(any, int) any
projects map[string]bool
edit bool
timeLimit time.Time
db storage.DB
logAction actions.BeforeFunc
stderrw io.Writer
}
func (f *Fixer) stderr() io.Writer {
if f.stderrw != nil {
return f.stderrw
}
return os.Stderr
}
// SetStderr sets the writer to use for messages f intends to print to standard error.
// A Fixer writes directly to standard error (or this writer) so that it can print
// readable multiline debugging outputs. These are also logged via the slog.Logger
// passed to New, but multiline strings format as one very long Go-quoted string in slog
// and are not as easy to read.
func (f *Fixer) SetStderr(w io.Writer) {
f.stderrw = w
}
// New creates a new Fixer using the given logger and GitHub client.
//
// The Fixer logs status and errors to lg; if lg is nil, the Fixer does not log anything.
//
// The GitHub client is used to watch for new issues and comments
// and to edit issues and comments. If gh is nil, the Fixer can still be
// configured and applied to Markdown using [Fixer.Fix], but calling
// [Fixer.Run] will panic.
//
// The db is the database used to store locks.
//
// The name is the handle by which the Fixer's “last position” is retrieved
// across multiple program invocations; each differently configured
// Fixer needs a different name.
func New(lg *slog.Logger, gh *github.Client, db storage.DB, name string) *Fixer {
f := &Fixer{
name: name,
slog: lg,
github: gh,
projects: make(map[string]bool),
timeLimit: time.Now().Add(-30 * 24 * time.Hour),
db: db,
}
f.init() // set f.slog if lg==nil
if gh != nil {
f.watcher = gh.EventWatcher("commentfix.Fixer:" + name)
}
f.logAction = actions.Register("commentfix.Fixer:"+name, f.runFromActionLog)
return f
}
// SetTimeLimit sets the time before which comments are not edited.
func (f *Fixer) SetTimeLimit(limit time.Time) {
f.timeLimit = limit
}
// init makes sure slog is non-nil.
func (f *Fixer) init() {
if f.slog == nil {
f.slog = slog.New(slog.NewTextHandler(io.Discard, nil))
}
}
func (f *Fixer) EnableProject(name string) {
f.init()
if f.github == nil {
panic("commentfix.Fixer: EnableProject missing GitHub client")
}
f.projects[name] = true
}
// EnableEdits configures the fixer to make edits to comments on GitHub.
// If EnableEdits is not called, the Fixer only prints what it would do,
// and it does not mark the issues and comments as “old”.
// This default mode is useful for experimenting with a Fixer
// to gauge its effects.
//
// EnableEdits panics if the Fixer was not constructed by calling [New]
// with a non-nil [github.Client].
func (f *Fixer) EnableEdits() {
f.init()
if f.github == nil {
panic("commentfix.Fixer: EnableEdits missing GitHub client")
}
f.edit = true
}
// AutoLink instructs the fixer to turn any text matching the
// regular expression pattern into a link to the URL.
// The URL can contain substitution values like $1
// as supported by [regexp.Regexp.Expand].
//
// For example, to link CL nnn to https://go.dev/cl/nnn,
// you could use:
//
// f.AutoLink(`\bCL (\d+)\b`, "https://go.dev/cl/$1")
func (f *Fixer) AutoLink(pattern, url string) error {
f.init()
re, err := regexp.Compile(pattern)
if err != nil {
return err
}
f.fixes = append(f.fixes, func(x any, flags int) any {
if flags&flagLink != 0 {
// already inside link
return nil
}
plain, ok := x.(*markdown.Plain)
if !ok {
return nil
}
var out []markdown.Inline
start := 0
text := plain.Text
for _, m := range re.FindAllStringSubmatchIndex(text, -1) {
if start < m[0] {
out = append(out, &markdown.Plain{Text: text[start:m[0]]})
}
link := string(re.ExpandString(nil, url, text, m))
out = append(out, &markdown.Link{
Inner: []markdown.Inline{&markdown.Plain{Text: text[m[0]:m[1]]}},
URL: link,
})
start = m[1]
}
if start == 0 {
return nil
}
if start < len(text) {
out = append(out, &markdown.Plain{Text: text[start:]})
}
return out
})
return nil
}
// ReplaceText instructs the fixer to replace any text
// matching the regular expression pattern with the replacement repl.
// The replacement can contain substitution values like $1
// as supported by [regexp.Regexp.Expand].
//
// ReplaceText only applies in Markdown plain text.
// It does not apply in backticked code text, or in backticked
// or indented code blocks, or to URLs.
// It does apply to the plain text inside headings,
// inside bold, italic, or link markup.
//
// For example, you could correct “cancelled” to “canceled”,
// following Go's usual conventions, with:
//
// f.ReplaceText(`cancelled`, "canceled")
func (f *Fixer) ReplaceText(pattern, repl string) error {
f.init()
re, err := regexp.Compile(pattern)
if err != nil {
return err
}
f.fixes = append(f.fixes, func(x any, flags int) any {
plain, ok := x.(*markdown.Plain)
if !ok {
return nil
}
if re.FindStringSubmatchIndex(plain.Text) == nil {
return nil
}
plain.Text = re.ReplaceAllString(plain.Text, repl)
return plain
})
return nil
}
// ReplaceURL instructs the fixer to replace any linked URLs
// matching the regular expression pattern with the replacement URL repl.
// The replacement can contain substitution values like $1
// as supported by [regexp.Regexp.Expand].
//
// The regular expression pattern is automatically anchored
// to the start of the URL: there is no need to start it with \A or ^.
//
// For example, to replace links to golang.org with links to go.dev,
// you could use:
//
// f.ReplaceURL(`https://golang\.org(/?)`, "https://go.dev$1")
func (f *Fixer) ReplaceURL(pattern, repl string) error {
f.init()
re, err := regexp.Compile(`\A(?:` + pattern + `)`)
if err != nil {
return err
}
f.fixes = append(f.fixes, func(x any, flags int) any {
switch x := x.(type) {
case *markdown.AutoLink:
old := x.URL
x.URL = re.ReplaceAllString(x.URL, repl)
if x.URL == old {
return nil
}
if x.Text == old {
x.Text = x.URL
}
return x
case *markdown.Link:
old := x.URL
x.URL = re.ReplaceAllString(x.URL, repl)
if x.URL == old {
return nil
}
if len(x.Inner) == 1 {
if p, ok := x.Inner[0].(*markdown.Plain); ok && p.Text == old {
p.Text = x.URL
}
}
return x
}
return nil
})
return nil
}
// An action has all the information needed to edit a GitHub issue or comment.
type action struct {
Project string
Issue int64
IC *issueOrComment
Body string // new body of issue or comment
}
// logKey returns the key for the action in the action log.
// The full db key includes the action kind as well, which includes
// the Fixer name.
func (a *action) logKey() []byte {
return ordered.Encode(a.IC.url())
}
// result is the result of applying an action.
type result struct {
URL string // URL of modified issue or comment
}
// Run adds to the action log the configured rewrites to issue texts and comments on GitHub
// that have been updated since the last call to Run for this fixer with edits enabled
// (including in different program invocations using the same fixer name).
//
// By default, Run ignores issues texts and comments more than 30 days old.
// Use [Fixer.SetTimeLimit] to change the cutoff.
//
// Run prints diffs of its edits to standard error in addition to logging them,
// because slog logs the diffs as single-line Go quoted strings that are
// too difficult to skim.
//
// If [Fixer.EnableEdits] has not been called, Run processes recent issue texts and
// comments and prints diffs of its intended edits to standard error, but it does
// not add the changes to the action log. It also does not mark the issues and comments
// as processed, so that a future call to Run with edits enabled can rewrite them
// on GitHub.
//
// Run panics if the Fixer was not constructed by calling [New]
// with a non-nil [github.Client].
func (f *Fixer) Run(ctx context.Context) error {
if f.watcher == nil {
return errors.New("commentfix.Fixer: Run missing GitHub client")
}
last := timed.DBTime(0)
old := 0
const maxOld = 100
for e := range f.watcher.Recent() {
if f.edit && last != 0 {
// Occasionally remember where we were,
// so if we are repeatedly interrupted we still
// make progress.
if old++; old >= maxOld {
f.watcher.MarkOld(last)
f.watcher.Flush()
old = 0
}
}
last = e.DBTime
f.logFix(e)
if f.edit {
// Mark this one old right now, so that we don't consider editing it again.
f.watcher.MarkOld(e.DBTime)
f.watcher.Flush()
old = 0
}
}
// Mark the final entry we saw as old.
// Have to start a new loop because MarkOld must be called during Recent.
// If another process has moved the mark past last, MarkOld is a no-op.
if f.edit && last != 0 {
for range f.watcher.Recent() {
f.watcher.MarkOld(last)
f.watcher.Flush()
break
}
}
return nil
}
// LogFixGitHubIssue adds rewrites to the issue body and comments of the
// specified GitHub issue to the action log, following the same logic as [Fixer.Run].
//
// It requires that the Fixer's [github.Client] contain one or more events
// for the issue.
//
// It does not affect the watcher used by [Fixer.Run] and can be run
// concurrently with [Fixer.Run].
//
// However, any issues or comments for which fixes were applied will not
// be fixed again by subsequent calls to [Fixer.Run] or [Fixer.FixGitHubIssue]
// for a [Fixer] with the same name as this one. This is true even if the
// issue or comment body has changed since the fix was applied, in order
// to a prevent a non-idempotent fix from being applied multiple times.
//
// It returns an error if any of the fixes cannot be applied or if
// no events are found for the issue.
func (f *Fixer) LogFixGitHubIssue(ctx context.Context, project string, issue int64) error {
events := 0
for event := range f.github.Events(project, issue, issue) {
events++
f.logFix(event)
}
if events == 0 {
return fmt.Errorf("%w for project=%s issue=%d", errNoGitHubEvents, project, issue)
}
return nil
}
var (
sleep = 1 * time.Second
errNoGitHubEvents = errors.New("no GitHub events")
)
// logFix adds an action to fix the specified event to the action log
// if edits are enabled. If edits are disabled or no fix is needed, logFix does nothing.
func (f *Fixer) logFix(e *github.Event) {
if a := f.newAction(e); a != nil {
// Don't add the action to the log if edits are off.
// If we did add it, it could get run; perhaps not now, but in a future time
// when edits were on.
if !f.edit {
return
}
key := a.logKey()
if f.logAction(f.db, key, storage.JSON(a), !actions.RequiresApproval) {
f.slog.Info("logged action", "key", storage.Fmt(key))
} else {
f.slog.Info("fixer already added action", "key", storage.Fmt(key))
}
}
}
// newAction returns a new action to take on the issue or comment of the event,
// or nil if there is nothing to do.
func (f *Fixer) newAction(e *github.Event) *action {
if !f.projects[e.Project] {
return nil
}
var ic *issueOrComment
switch x := e.Typed.(type) {
default: // for example, *github.IssueEvent
f.slog.Info("fixer skip", "dbtime", e.DBTime, "type", reflect.TypeOf(e.Typed).String())
return nil
case *github.Issue:
if x.PullRequest != nil {
// Do not edit pull request bodies,
// because they turn into commit messages
// and cannot contain things like hyperlinks.
return nil
}
ic = &issueOrComment{Issue: x}
f.slog.Info("fixer run issue", "dbtime", e.DBTime, "issue", ic.Issue.Number)
case *github.IssueComment:
ic = &issueOrComment{Comment: x}
f.slog.Info("fixer run comment", "dbtime", e.DBTime, "url", ic.Comment.URL)
}
if tm, err := time.Parse(time.RFC3339, ic.updatedAt()); err == nil && tm.Before(f.timeLimit) {
return nil
}
body, updated := f.Fix(ic.body())
if !updated {
return nil
}
return &action{
Project: e.Project,
Issue: e.Issue,
IC: ic,
Body: body,
}
}
// runFromActionLog is called by actions.Run to execute an action.
// It decodes the action, calls [Fixer.runAction], then encodes the result.
func (f *Fixer) runFromActionLog(ctx context.Context, data []byte) ([]byte, error) {
var a action
if err := json.Unmarshal(data, &a); err != nil {
return nil, err
}
res, err := f.runAction(ctx, &a)
if err != nil {
return nil, err
}
return storage.JSON(res), nil
}
// runAction runs the given action.
func (f *Fixer) runAction(ctx context.Context, a *action) (*result, error) {
// Do not include this Fixer's name in the lock, so that separate
// fixers cannot operate on the same object at the same time.
// We need this lock, even though [actions.Run] acquires one.
// The action log lock includes the fixer name, but this one locks out all fixers.
lock := string(ordered.Encode("commentfix", a.IC.url()))
f.db.Lock(lock)
defer f.db.Unlock(lock)
live, err := a.IC.download(ctx, f.github)
if err != nil {
// unreachable unless github error
return nil, fmt.Errorf("commentfix download error: project=%s issue=%d url=%s err=%w", a.Project, a.Issue, a.IC.url(), err)
}
if live.body() != a.IC.body() {
f.slog.Info("commentfix stale", "project", a.Project, "issue", a.Issue, "url", a.IC.url())
return nil, nil
}
f.slog.Info("do commentfix rewrite", "project", a.Project, "issue", a.Issue, "url", a.IC.url(), "edit", f.edit, "diff", bodyDiff(a.IC.body(), a.Body))
fmt.Fprintf(f.stderr(), "Fix %s:\n%s\n", a.IC.url(), bodyDiff(a.IC.body(), a.Body))
if !f.edit {
return nil, nil
}
f.slog.Info("commentfix editing github", "url", a.IC.url())
if err := a.IC.editBody(ctx, f.github, a.Body); err != nil {
// unreachable unless github error
return nil, fmt.Errorf("commentfix edit: project=%s issue=%d err=%w", a.Project, a.Issue, err)
}
if !testing.Testing() {
// unreachable in tests
time.Sleep(sleep)
}
return &result{URL: a.IC.url()}, nil
}
// Latest returns the latest known DBTime marked old by the Fixer's Watcher.
func (f *Fixer) Latest() timed.DBTime {
return f.watcher.Latest()
}
type issueOrComment struct {
Issue *github.Issue
Comment *github.IssueComment
}
func (ic *issueOrComment) updatedAt() string {
if ic.Issue != nil {
return ic.Issue.UpdatedAt
}
return ic.Comment.UpdatedAt
}
func (ic *issueOrComment) body() string {
if ic.Issue != nil {
return ic.Issue.Body
}
return ic.Comment.Body
}
func (ic *issueOrComment) download(ctx context.Context, gh *github.Client) (*issueOrComment, error) {
if ic.Issue != nil {
live, err := gh.DownloadIssue(ctx, ic.Issue.URL)
return &issueOrComment{Issue: live}, err
}
live, err := gh.DownloadIssueComment(ctx, ic.Comment.URL)
return &issueOrComment{Comment: live}, err
}
func (ic *issueOrComment) url() string {
if ic.Issue != nil {
return ic.Issue.URL
}
return ic.Comment.URL
}
func (ic *issueOrComment) editBody(ctx context.Context, gh *github.Client, body string) error {
if ic.Issue != nil {
return gh.EditIssue(ctx, ic.Issue, &github.IssueChanges{Body: body})
}
return gh.EditIssueComment(ctx, ic.Comment, &github.IssueCommentChanges{Body: body})
}
// Fix applies the configured rewrites to the markdown text.
// If no fixes apply, it returns "", false.
// If any fixes apply, it returns the updated text and true.
func (f *Fixer) Fix(text string) (newText string, fixed bool) {
p := &markdown.Parser{
AutoLinkText: true,
Strikethrough: true,
HeadingIDs: true,
Emoji: true,
}
doc := p.Parse(text)
for _, fixer := range f.fixes {
if f.fixOne(fixer, doc) {
fixed = true
}
}
if !fixed {
return "", false
}
return markdown.Format(doc), true
}
const (
// flagLink means this inline is link text,
// so it is inappropriate/impossible to turn
// it into a (nested) hyperlink.
flagLink = 1 << iota
)
// fixOne runs one fix function over doc,
// reporting whether doc was changed.
func (f *Fixer) fixOne(fix func(any, int) any, doc *markdown.Document) (fixed bool) {
var (
fixBlock func(markdown.Block)
fixInlines func(*[]markdown.Inline)
)
fixBlock = func(x markdown.Block) {
switch x := x.(type) {
case *markdown.Document:
for _, sub := range x.Blocks {
fixBlock(sub)
}
case *markdown.Quote:
for _, sub := range x.Blocks {
fixBlock(sub)
}
case *markdown.List:
for _, sub := range x.Items {
fixBlock(sub)
}
case *markdown.Item:
for _, sub := range x.Blocks {
fixBlock(sub)
}
case *markdown.Heading:
fixBlock(x.Text)
case *markdown.Paragraph:
fixBlock(x.Text)
case *markdown.Text:
fixInlines(&x.Inline)
}
}
link := 0
fixInlines = func(inlines *[]markdown.Inline) {
changed := false
var out []markdown.Inline
for _, x := range *inlines {
switch x := x.(type) {
case *markdown.Del:
fixInlines(&x.Inner)
case *markdown.Emph:
fixInlines(&x.Inner)
case *markdown.Strong:
fixInlines(&x.Inner)
case *markdown.Link:
link++
fixInlines(&x.Inner)
link--
}
flags := 0
if link > 0 {
flags = flagLink
}
switch fx := fix(x, flags).(type) {
default:
// unreachable unless bug in fix func
f.slog.Error("fixer returned invalid type", "old", reflect.TypeOf(x).String(), "new", reflect.TypeOf(fx).String())
out = append(out, x)
case nil:
out = append(out, x)
case markdown.Inline:
changed = true
out = append(out, fx)
case []markdown.Inline:
changed = true
out = append(out, fx...)
}
}
if changed {
*inlines = out
fixed = true
}
}
fixBlock(doc)
return fixed
}
func bodyDiff(old, new string) string {
old = strings.TrimRight(old, "\n") + "\n"
old = strings.ReplaceAll(old, "\r\n", "\n")
new = strings.TrimRight(new, "\n") + "\n"
new = strings.ReplaceAll(new, "\r\n", "\n")
return string(diff.Diff("old", []byte(old), "new", []byte(new)))
}