| // Copyright 2024 The Go Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style |
| // license that can be found in the LICENSE file. |
| |
| // Package commentfix implements rule-based rewriting of issue comments. |
| package commentfix |
| |
| import ( |
| "context" |
| "fmt" |
| "io" |
| "log/slog" |
| "os" |
| "reflect" |
| "regexp" |
| "strings" |
| "testing" |
| "time" |
| |
| "golang.org/x/oscar/internal/diff" |
| "golang.org/x/oscar/internal/github" |
| "golang.org/x/oscar/internal/storage/timed" |
| "rsc.io/markdown" |
| ) |
| |
| // A Fixer rewrites issue texts and issue comments using a set of rules. |
| // After creating a fixer with [New], new rules can be added using |
| // the [Fixer.AutoLink], [Fixer.ReplaceText], and [Fixer.ReplaceURL] methods, |
| // and then repeated calls to [Fixer.Run] apply the replacements on GitHub. |
| // |
| // The zero value of a Fixer can be used in “offline” mode with [Fixer.Fix], |
| // which returns rewritten Markdown. |
| // |
| // TODO(rsc): Separate the GitHub logic more cleanly from the rewrite logic. |
| type Fixer struct { |
| slog *slog.Logger |
| github *github.Client |
| watcher *timed.Watcher[*github.Event] |
| fixes []func(any, int) any |
| projects map[string]bool |
| edit bool |
| timeLimit time.Time |
| |
| stderrw io.Writer |
| } |
| |
| func (f *Fixer) stderr() io.Writer { |
| if f.stderrw != nil { |
| return f.stderrw |
| } |
| return os.Stderr |
| } |
| |
| // SetStderr sets the writer to use for messages f intends to print to standard error. |
| // A Fixer writes directly to standard error (or this writer) so that it can print |
| // readable multiline debugging outputs. These are also logged via the slog.Logger |
| // passed to New, but multiline strings format as one very long Go-quoted string in slog |
| // and are not as easy to read. |
| func (f *Fixer) SetStderr(w io.Writer) { |
| f.stderrw = w |
| } |
| |
| // New creates a new Fixer using the given logger and GitHub client. |
| // |
| // The Fixer logs status and errors to lg; if lg is nil, the Fixer does not log anything. |
| // |
| // The GitHub client is used to watch for new issues and comments |
| // and to edit issues and comments. If gh is nil, the Fixer can still be |
| // configured and applied to Markdown using [Fixer.Fix], but calling |
| // [Fixer.Run] will panic. |
| // |
| // The name is the handle by which the Fixer's “last position” is retrieved |
| // across multiple program invocations; each differently configured |
| // Fixer needs a different name. |
| func New(lg *slog.Logger, gh *github.Client, name string) *Fixer { |
| f := &Fixer{ |
| slog: lg, |
| github: gh, |
| projects: make(map[string]bool), |
| timeLimit: time.Now().Add(-30 * 24 * time.Hour), |
| } |
| f.init() // set f.slog if lg==nil |
| if gh != nil { |
| f.watcher = gh.EventWatcher("commentfix.Fixer:" + name) |
| } |
| return f |
| } |
| |
| // SetTimeLimit sets the time before which comments are not edited. |
| func (f *Fixer) SetTimeLimit(limit time.Time) { |
| f.timeLimit = limit |
| } |
| |
| // init makes sure slog is non-nil. |
| func (f *Fixer) init() { |
| if f.slog == nil { |
| f.slog = slog.New(slog.NewTextHandler(io.Discard, nil)) |
| } |
| } |
| |
| func (f *Fixer) EnableProject(name string) { |
| f.init() |
| if f.github == nil { |
| panic("commentfix.Fixer: EnableProject missing GitHub client") |
| } |
| f.projects[name] = true |
| } |
| |
| // EnableEdits configures the fixer to make edits to comments on GitHub. |
| // If EnableEdits is not called, the Fixer only prints what it would do, |
| // and it does not mark the issues and comments as “old”. |
| // This default mode is useful for experimenting with a Fixer |
| // to gauge its effects. |
| // |
| // EnableEdits panics if the Fixer was not constructed by calling [New] |
| // with a non-nil [github.Client]. |
| func (f *Fixer) EnableEdits() { |
| f.init() |
| if f.github == nil { |
| panic("commentfix.Fixer: EnableEdits missing GitHub client") |
| } |
| f.edit = true |
| } |
| |
| // AutoLink instructs the fixer to turn any text matching the |
| // regular expression pattern into a link to the URL. |
| // The URL can contain substitution values like $1 |
| // as supported by [regexp.Regexp.Expand]. |
| // |
| // For example, to link CL nnn to https://go.dev/cl/nnn, |
| // you could use: |
| // |
| // f.AutoLink(`\bCL (\d+)\b`, "https://go.dev/cl/$1") |
| func (f *Fixer) AutoLink(pattern, url string) error { |
| f.init() |
| re, err := regexp.Compile(pattern) |
| if err != nil { |
| return err |
| } |
| f.fixes = append(f.fixes, func(x any, flags int) any { |
| if flags&flagLink != 0 { |
| // already inside link |
| return nil |
| } |
| plain, ok := x.(*markdown.Plain) |
| if !ok { |
| return nil |
| } |
| var out []markdown.Inline |
| start := 0 |
| text := plain.Text |
| for _, m := range re.FindAllStringSubmatchIndex(text, -1) { |
| if start < m[0] { |
| out = append(out, &markdown.Plain{Text: text[start:m[0]]}) |
| } |
| link := string(re.ExpandString(nil, url, text, m)) |
| out = append(out, &markdown.Link{ |
| Inner: []markdown.Inline{&markdown.Plain{Text: text[m[0]:m[1]]}}, |
| URL: link, |
| }) |
| start = m[1] |
| } |
| if start == 0 { |
| return nil |
| } |
| if start < len(text) { |
| out = append(out, &markdown.Plain{Text: text[start:]}) |
| } |
| return out |
| }) |
| return nil |
| } |
| |
| // ReplaceText instructs the fixer to replace any text |
| // matching the regular expression pattern with the replacement repl. |
| // The replacement can contain substitution values like $1 |
| // as supported by [regexp.Regexp.Expand]. |
| // |
| // ReplaceText only applies in Markdown plain text. |
| // It does not apply in backticked code text, or in backticked |
| // or indented code blocks, or to URLs. |
| // It does apply to the plain text inside headings, |
| // inside bold, italic, or link markup. |
| // |
| // For example, you could correct “cancelled” to “canceled”, |
| // following Go's usual conventions, with: |
| // |
| // f.ReplaceText(`cancelled`, "canceled") |
| func (f *Fixer) ReplaceText(pattern, repl string) error { |
| f.init() |
| re, err := regexp.Compile(pattern) |
| if err != nil { |
| return err |
| } |
| f.fixes = append(f.fixes, func(x any, flags int) any { |
| plain, ok := x.(*markdown.Plain) |
| if !ok { |
| return nil |
| } |
| if re.FindStringSubmatchIndex(plain.Text) == nil { |
| return nil |
| } |
| plain.Text = re.ReplaceAllString(plain.Text, repl) |
| return plain |
| }) |
| return nil |
| } |
| |
| // ReplaceURL instructs the fixer to replace any linked URLs |
| // matching the regular expression pattern with the replacement URL repl. |
| // The replacement can contain substitution values like $1 |
| // as supported by [regexp.Regexp.Expand]. |
| // |
| // The regular expression pattern is automatically anchored |
| // to the start of the URL: there is no need to start it with \A or ^. |
| // |
| // For example, to replace links to golang.org with links to go.dev, |
| // you could use: |
| // |
| // f.ReplaceURL(`https://golang\.org(/?)`, "https://go.dev$1") |
| func (f *Fixer) ReplaceURL(pattern, repl string) error { |
| f.init() |
| re, err := regexp.Compile(`\A(?:` + pattern + `)`) |
| if err != nil { |
| return err |
| } |
| f.fixes = append(f.fixes, func(x any, flags int) any { |
| switch x := x.(type) { |
| case *markdown.AutoLink: |
| old := x.URL |
| x.URL = re.ReplaceAllString(x.URL, repl) |
| if x.URL == old { |
| return nil |
| } |
| if x.Text == old { |
| x.Text = x.URL |
| } |
| return x |
| case *markdown.Link: |
| old := x.URL |
| x.URL = re.ReplaceAllString(x.URL, repl) |
| if x.URL == old { |
| return nil |
| } |
| if len(x.Inner) == 1 { |
| if p, ok := x.Inner[0].(*markdown.Plain); ok && p.Text == old { |
| p.Text = x.URL |
| } |
| } |
| return x |
| } |
| return nil |
| }) |
| return nil |
| } |
| |
| // Run applies the configured rewrites to issue texts and comments on GitHub |
| // that have been updated since the last call to Run for this fixer with edits enabled |
| // (including in different program invocations using the same fixer name). |
| // |
| // By default, Run ignores issues texts and comments more than 30 days old. |
| // Use [Fixer.SetTimeLimit] to change the cutoff. |
| // |
| // Run prints diffs of its edits to standard error in addition to logging them, |
| // because slog logs the diffs as single-line Go quoted strings that are |
| // too difficult to skim. |
| // |
| // If [Fixer.EnableEdits] has not been called, Run processes recent issue texts |
| // and comments and prints diffs of its intended edits to standard error, |
| // but it does not make the changes. It also does not mark the issues and comments as processed, |
| // so that a future call to Run with edits enabled can rewrite them on GitHub. |
| // |
| // Run sleeps for 1 second after each GitHub edit. |
| // |
| // Run panics if the Fixer was not constructed by calling [New] |
| // with a non-nil [github.Client]. |
| func (f *Fixer) Run(ctx context.Context) { |
| if f.watcher == nil { |
| panic("commentfix.Fixer: Run missing GitHub client") |
| } |
| |
| last := timed.DBTime(0) |
| old := 0 |
| const maxOld = 100 |
| for e := range f.watcher.Recent() { |
| if f.edit && last != 0 { |
| // Occasionally remember where we were, |
| // so if we are repeatedly interrupted we still |
| // make progress. |
| if old++; old >= maxOld { |
| f.watcher.MarkOld(last) |
| f.watcher.Flush() |
| old = 0 |
| } |
| } |
| last = e.DBTime |
| |
| if !f.projects[e.Project] { |
| continue |
| } |
| var ic *issueOrComment |
| switch x := e.Typed.(type) { |
| default: // for example, *github.IssueEvent |
| f.slog.Info("fixer skip", "dbtime", e.DBTime, "type", reflect.TypeOf(e.Typed).String()) |
| continue |
| case *github.Issue: |
| if x.PullRequest != nil { |
| // Do not edit pull request bodies, |
| // because they turn into commit messages |
| // and cannot contain things like hyperlinks. |
| continue |
| } |
| ic = &issueOrComment{issue: x} |
| f.slog.Info("fixer run issue", "dbtime", e.DBTime, "issue", ic.issue.Number) |
| case *github.IssueComment: |
| ic = &issueOrComment{comment: x} |
| f.slog.Info("fixer run comment", "dbtime", e.DBTime, "url", ic.comment.URL) |
| } |
| if tm, err := time.Parse(time.RFC3339, ic.updatedAt()); err == nil && tm.Before(f.timeLimit) { |
| continue |
| } |
| body, updated := f.Fix(ic.body()) |
| if !updated { |
| continue |
| } |
| live, err := ic.download(ctx, f.github) |
| if err != nil { |
| // unreachable unless github error |
| f.slog.Error("commentfix download error", "project", e.Project, "issue", e.Issue, "url", ic.url(), "err", err) |
| continue |
| } |
| if live.body() != ic.body() { |
| f.slog.Info("commentfix stale", "project", e.Project, "issue", e.Issue, "url", ic.url()) |
| continue |
| } |
| f.slog.Info("commentfix rewrite", "project", e.Project, "issue", e.Issue, "url", ic.url(), "edit", f.edit, "diff", bodyDiff(ic.body(), body)) |
| fmt.Fprintf(f.stderr(), "Fix %s:\n%s\n", ic.url(), bodyDiff(ic.body(), body)) |
| if f.edit { |
| f.slog.Info("commentfix editing github", "url", ic.url()) |
| if err := ic.editBody(ctx, f.github, body); err != nil { |
| // unreachable unless github error |
| f.slog.Error("commentfix edit", "project", e.Project, "issue", e.Issue, "err", err) |
| continue |
| } |
| |
| // Mark this one old right now, so that we don't consider editing it again. |
| f.watcher.MarkOld(e.DBTime) |
| f.watcher.Flush() |
| old = 0 |
| |
| if !testing.Testing() { |
| // unreachable in tests |
| time.Sleep(1 * time.Second) |
| } |
| } |
| } |
| |
| // Mark the final entry we saw as old. |
| // Have to start a new loop because MarkOld must be called during Recent. |
| // If another process has moved the mark past last, MarkOld is a no-op. |
| if f.edit && last != 0 { |
| for range f.watcher.Recent() { |
| f.watcher.MarkOld(last) |
| f.watcher.Flush() |
| break |
| } |
| } |
| } |
| |
| type issueOrComment struct { |
| issue *github.Issue |
| comment *github.IssueComment |
| } |
| |
| func (ic *issueOrComment) updatedAt() string { |
| if ic.issue != nil { |
| return ic.issue.UpdatedAt |
| } |
| return ic.comment.UpdatedAt |
| } |
| |
| func (ic *issueOrComment) body() string { |
| if ic.issue != nil { |
| return ic.issue.Body |
| } |
| return ic.comment.Body |
| } |
| |
| func (ic *issueOrComment) download(ctx context.Context, gh *github.Client) (*issueOrComment, error) { |
| if ic.issue != nil { |
| live, err := gh.DownloadIssue(ctx, ic.issue.URL) |
| return &issueOrComment{issue: live}, err |
| } |
| live, err := gh.DownloadIssueComment(ctx, ic.comment.URL) |
| return &issueOrComment{comment: live}, err |
| } |
| |
| func (ic *issueOrComment) url() string { |
| if ic.issue != nil { |
| return ic.issue.URL |
| } |
| return ic.comment.URL |
| } |
| |
| func (ic *issueOrComment) editBody(ctx context.Context, gh *github.Client, body string) error { |
| if ic.issue != nil { |
| return gh.EditIssue(ctx, ic.issue, &github.IssueChanges{Body: body}) |
| } |
| return gh.EditIssueComment(ctx, ic.comment, &github.IssueCommentChanges{Body: body}) |
| } |
| |
| // Fix applies the configured rewrites to the markdown text. |
| // If no fixes apply, it returns "", false. |
| // If any fixes apply, it returns the updated text and true. |
| func (f *Fixer) Fix(text string) (newText string, fixed bool) { |
| p := &markdown.Parser{ |
| AutoLinkText: true, |
| Strikethrough: true, |
| HeadingIDs: true, |
| Emoji: true, |
| } |
| doc := p.Parse(text) |
| for _, fixer := range f.fixes { |
| if f.fixOne(fixer, doc) { |
| fixed = true |
| } |
| } |
| if !fixed { |
| return "", false |
| } |
| return markdown.Format(doc), true |
| } |
| |
| const ( |
| // flagLink means this inline is link text, |
| // so it is inappropriate/impossible to turn |
| // it into a (nested) hyperlink. |
| flagLink = 1 << iota |
| ) |
| |
| // fixOne runs one fix function over doc, |
| // reporting whether doc was changed. |
| func (f *Fixer) fixOne(fix func(any, int) any, doc *markdown.Document) (fixed bool) { |
| var ( |
| fixBlock func(markdown.Block) |
| fixInlines func(*[]markdown.Inline) |
| ) |
| fixBlock = func(x markdown.Block) { |
| switch x := x.(type) { |
| case *markdown.Document: |
| for _, sub := range x.Blocks { |
| fixBlock(sub) |
| } |
| case *markdown.Quote: |
| for _, sub := range x.Blocks { |
| fixBlock(sub) |
| } |
| case *markdown.List: |
| for _, sub := range x.Items { |
| fixBlock(sub) |
| } |
| case *markdown.Item: |
| for _, sub := range x.Blocks { |
| fixBlock(sub) |
| } |
| case *markdown.Heading: |
| fixBlock(x.Text) |
| case *markdown.Paragraph: |
| fixBlock(x.Text) |
| case *markdown.Text: |
| fixInlines(&x.Inline) |
| } |
| } |
| |
| link := 0 |
| fixInlines = func(inlines *[]markdown.Inline) { |
| changed := false |
| var out []markdown.Inline |
| for _, x := range *inlines { |
| switch x := x.(type) { |
| case *markdown.Del: |
| fixInlines(&x.Inner) |
| case *markdown.Emph: |
| fixInlines(&x.Inner) |
| case *markdown.Strong: |
| fixInlines(&x.Inner) |
| case *markdown.Link: |
| link++ |
| fixInlines(&x.Inner) |
| link-- |
| } |
| flags := 0 |
| if link > 0 { |
| flags = flagLink |
| } |
| switch fx := fix(x, flags).(type) { |
| default: |
| // unreachable unless bug in fix func |
| f.slog.Error("fixer returned invalid type", "old", reflect.TypeOf(x).String(), "new", reflect.TypeOf(fx).String()) |
| out = append(out, x) |
| case nil: |
| out = append(out, x) |
| case markdown.Inline: |
| changed = true |
| out = append(out, fx) |
| case []markdown.Inline: |
| changed = true |
| out = append(out, fx...) |
| } |
| } |
| if changed { |
| *inlines = out |
| fixed = true |
| } |
| } |
| |
| fixBlock(doc) |
| return fixed |
| } |
| |
| func bodyDiff(old, new string) string { |
| old = strings.TrimRight(old, "\n") + "\n" |
| old = strings.ReplaceAll(old, "\r\n", "\n") |
| |
| new = strings.TrimRight(new, "\n") + "\n" |
| new = strings.ReplaceAll(new, "\r\n", "\n") |
| |
| return string(diff.Diff("old", []byte(old), "new", []byte(new))) |
| } |