internal/github: add Wrapper for github edits
A Wrapper is used to wrap comments/edits made to GitHub with hidden
tags so that they can later be identified without relying on a database.
The first use of this will be by the Overview poster to wrap its own
comments and edits, so it can easily identify and strip out its own edits
from a body of text.
Change-Id: I8aa1f14d0d68264be4221f5fa99b5b24278d153f
Reviewed-on: https://go-review.googlesource.com/c/oscar/+/636858
Reviewed-by: Hyang-Ah Hana Kim <hyangah@gmail.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
diff --git a/internal/github/wrap.go b/internal/github/wrap.go
new file mode 100644
index 0000000..334e64a
--- /dev/null
+++ b/internal/github/wrap.go
@@ -0,0 +1,158 @@
+// Copyright 2024 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package github
+
+import (
+ "encoding/json"
+ "fmt"
+ "regexp"
+ "strings"
+
+ "golang.org/x/oscar/internal/storage"
+)
+
+// DO NOT REMOVE/EDIT ANY LITERAL STRINGS OR FORMATTING USED IN THIS FILE,
+// (unless you have a good reason and a plan to migrate any existing usage).
+// Go names can be changed.
+
+// A Wrapper is used to wrap comments/edits made to GitHub so that
+// they can later be identified without referencing a database.
+// This is especially useful for stripping out edits to a user-generated
+// post made by a bot.
+//
+// Wrapped strings ($body) are of the form:
+//
+// <!-- Generated by Oscar. DO NOT EDIT. {"bot":"$bot","kind":"$kind","meta":$meta} -->$body<!-- oscar-end -->
+//
+// where $bot and $kind are specified in [NewWrapper], and $meta is arbitrary
+// metadata (if nil, metadata is omitted).
+//
+// Note that the wrapping strategy is not robust against a user
+// intentionally removing or editing the tag(s).
+type Wrapper struct {
+ bot, kind string
+}
+
+// NewWrapper returns a wrapper for GitHub modifications.
+// bot is the name of the bot (e.g. "gabyhelp") that is making the edits,
+// and kind is a context string used to identify the purpose/type
+// of the edit (e.g "overview" or "related").
+func NewWrapper(bot, kind string) *Wrapper {
+ return &Wrapper{
+ bot: bot, kind: kind,
+ }
+}
+
+// Wrap wraps body (the text of a GitHub modification).
+// body must not already be wrapped, or contain the end tag (<!-- oscar-end -->).
+// metadata is freeform metadata to include in the hidden tags.
+// metadata is sanitized via [json.Marshal], so it may contain otherwise
+// forbidden elements (e.g., "-->").
+func (w *Wrapper) Wrap(body string, metadata any) (string, error) {
+ // Body cannot contain the string [endTag].
+ if strings.Contains(body, endTag) {
+ return "", fmt.Errorf("github: wrapped body cannot contain %q", endTag)
+ }
+ // DO NOT REMOVE/EDIT STRUCTURE OR CONTENTS.
+ return w.startTag(metadata) + body + endTag, nil
+}
+
+// regexps for checking if a string is wrapped
+var (
+ reString = `<!-- Generated by Oscar\. DO NOT EDIT\. (\{.*?\}) -->(?s)(.*?)<!-- oscar-end -->`
+ containsRE *regexp.Regexp = regexp.MustCompile(reString)
+ isRE *regexp.Regexp = regexp.MustCompile(`^` + reString + `$`)
+)
+
+// IsWrapped reports whether the string is of the form
+// returned by [Wrapper.Wrap] (for any [Wrapper]).
+func IsWrapped(s string) bool {
+ return isRE.MatchString(s)
+}
+
+// ContainsWrapped returns whether the given string
+// contains one or more strings of the form output by
+// [Wrapper.Wrap] (for any [Wrapper]).
+func ContainsWrapped(s string) bool {
+ return containsRE.MatchString(s)
+}
+
+// Unwrapped contains the parsed contents of a wrapped string.
+type Unwrapped struct {
+ TagContent
+ Body string
+}
+
+// TagContent contains the structured contents of a start tag.
+type TagContent struct {
+ Bot string `json:"bot"`
+ Kind string `json:"kind"`
+ Meta json.RawMessage `json:"meta,omitempty"`
+}
+
+// Parse parses the contents of a wrapped string.
+// It returns false if the string is malformed.
+func Parse(s string) (_ *Unwrapped, ok bool) {
+ m := isRE.FindStringSubmatch(s)
+ if len(m) != 3 {
+ return nil, false
+ }
+ // s is m[0]
+ tc, body := m[1], m[2]
+
+ var tagContent TagContent
+ if err := json.Unmarshal([]byte(tc), &tagContent); err != nil {
+ return nil, false
+ }
+
+ return &Unwrapped{
+ TagContent: tagContent,
+ Body: body,
+ }, true
+}
+
+// ParseAll parses the contents of all wrapped substrings in the
+// string. It returns false if there are no wrapped substrings,
+// or any of them are malformed.
+func ParseAll(s string) (_ []*Unwrapped, ok bool) {
+ var us []*Unwrapped
+ matches := containsRE.FindAllString(s, -1)
+ if len(matches) == 0 {
+ return nil, false
+ }
+ for _, substr := range matches {
+ u, ok := Parse(substr)
+ if !ok {
+ return nil, false
+ }
+ us = append(us, u)
+ }
+ return us, true
+}
+
+// Strip removes all wrapped substrings of the string
+// and returns the result.
+func Strip(s string) string {
+ return containsRE.ReplaceAllString(s, "")
+}
+
+// startTag returns <!-- Generated by Oscar. DO NOT EDIT. {"bot":"$bot","kind":"$kind","meta":$metadata-json} -->
+// where $metadata-json is the JSON representaion of the given metadata.
+func (w *Wrapper) startTag(metadata any) string {
+ // DO NOT REMOVE/EDIT this function body.
+ var js json.RawMessage
+ if metadata != nil {
+ js = storage.JSON(metadata)
+ }
+ return fmt.Sprintf("<!-- Generated by Oscar. DO NOT EDIT. %s -->",
+ storage.JSON(TagContent{
+ Bot: w.bot,
+ Kind: w.kind,
+ Meta: js,
+ }))
+}
+
+// DO NOT REMOVE/EDIT.
+const endTag = "<!-- oscar-end -->"
diff --git a/internal/github/wrap_test.go b/internal/github/wrap_test.go
new file mode 100644
index 0000000..1f3765a
--- /dev/null
+++ b/internal/github/wrap_test.go
@@ -0,0 +1,254 @@
+// Copyright 2024 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package github
+
+import (
+ "encoding/json"
+ "fmt"
+ "testing"
+
+ "github.com/google/go-cmp/cmp"
+ "golang.org/x/oscar/internal/storage"
+)
+
+func TestWrapper(t *testing.T) {
+ w := NewWrapper("test-bot", "test")
+ body := "this is a comment"
+ metadata := &struct {
+ Foo string
+ Bar int
+ }{
+ Foo: "foo",
+ Bar: 123,
+ }
+
+ for _, tc := range []struct {
+ name string
+ body string
+ meta any
+ wantMeta json.RawMessage
+ }{
+ {
+ name: "no_metadata",
+ body: body,
+ meta: nil,
+ wantMeta: nil,
+ },
+ {
+ name: "metadata",
+ body: body,
+ meta: metadata,
+ wantMeta: storage.JSON(metadata),
+ },
+ } {
+ t.Run(tc.name, func(t *testing.T) {
+ wrapped, err := w.Wrap(tc.body, tc.meta)
+ if err != nil {
+ t.Fatal(err)
+ }
+ wrappedEdit := "something" + wrapped + "something else"
+
+ t.Run("IsWrapped", func(t *testing.T) {
+ if !IsWrapped(wrapped) {
+ t.Errorf("IsWrapped(%s, nil) = false, want true", wrapped)
+ }
+
+ // IsWrapped expects an an exact match.
+ if IsWrapped(wrappedEdit) {
+ t.Errorf("IsWrapped(%s) = true, want false", wrappedEdit)
+ }
+ })
+
+ t.Run("ContainsWrapped", func(t *testing.T) {
+ if !ContainsWrapped(wrapped) {
+ t.Errorf("ContainsWrapped(%s, nil) = false, want true", wrapped)
+ }
+
+ if !ContainsWrapped(wrappedEdit) {
+ t.Errorf("ContainsWrapped(%s) = false, want true", wrappedEdit)
+ }
+ })
+
+ t.Run("Parse", func(t *testing.T) {
+ got, ok := Parse(wrapped)
+ if !ok {
+ t.Fatalf("Parse(%s) = false, want true", wrapped)
+ }
+ if got.Body != body {
+ t.Errorf("Parse(%s).Body = %q, want %q", wrapped, got.Body, body)
+ }
+ if string(got.Meta) != string(tc.wantMeta) {
+ t.Errorf("Parse(%s).Metadata = %q, want %s", wrapped, got.Meta, tc.wantMeta)
+ }
+ })
+
+ t.Run("ParseAll", func(t *testing.T) {
+ wrappedMultiple := "something" + wrapped + "something else" + wrapped
+ got, ok := ParseAll(wrappedMultiple)
+ if !ok {
+ t.Fatalf("ParseAll(%s) = false, want true", wrappedMultiple)
+ }
+ want := []*Unwrapped{
+ {
+ TagContent: TagContent{
+ Bot: "test-bot",
+ Kind: "test",
+ Meta: tc.wantMeta,
+ },
+ Body: body,
+ },
+ {
+ TagContent: TagContent{
+ Bot: "test-bot",
+ Kind: "test",
+ Meta: tc.wantMeta,
+ },
+ Body: body,
+ },
+ }
+ if diff := cmp.Diff(want, got); diff != "" {
+ t.Errorf("ParseAll(%s) mismatch (-want +got):\n%s", wrappedMultiple, diff)
+ }
+ })
+
+ t.Run("Strip", func(t *testing.T) {
+ wrappedMultiple := "something" + wrapped + " something else" + wrapped
+ want := "something something else"
+ got := Strip(wrappedMultiple)
+ if got != want {
+ t.Errorf("Strip(%s) = %v, want %v", wrappedMultiple, got, want)
+ }
+ })
+ })
+ }
+}
+
+func TestWrapperError(t *testing.T) {
+ w := NewWrapper("test-bot", "test")
+
+ wrapped, err := w.Wrap("body <!-- oscar-end --> something", nil)
+ if err == nil {
+ t.Errorf("w.Wrap() = (%s, nil), want error", wrapped)
+ }
+
+ inner, err := w.Wrap("body", nil)
+ if err != nil {
+ t.Fatal(err)
+ }
+
+ // Can't double-wrap.
+ double, err := w.Wrap(inner, nil)
+ if err == nil {
+ t.Errorf("w.Wrap() = (%s, nil), want error", double)
+ }
+}
+
+func TestWrapperLiteral(t *testing.T) {
+ // Full text by Oscar.
+ comment :=
+ `<!-- Generated by Oscar. DO NOT EDIT. {"bot":"test-bot","kind":"test","meta":{"Foo":"foo","Bar":123}} -->this is a comment
+with multiple lines <!-- and an HTML comment --><!-- oscar-end -->`
+
+ t.Run("IsWrapped", func(t *testing.T) {
+ if !IsWrapped(comment) {
+ t.Errorf("IsWrapped(%s) = false, want true", comment)
+ }
+ })
+
+ t.Run("ContainsWrapped", func(t *testing.T) {
+ edit := `hello` + comment + `some more stuff`
+ if !ContainsWrapped(edit) {
+ t.Errorf("ContainsWrapped(%s) = false, want true", edit)
+ }
+ })
+
+ t.Run("Parse", func(t *testing.T) {
+ got, ok := Parse(comment)
+ if !ok {
+ t.Fatalf("Parse(%s) = false, want true", comment)
+ }
+ want := &Unwrapped{
+ TagContent: TagContent{
+ Bot: "test-bot",
+ Kind: "test",
+ Meta: json.RawMessage(`{"Foo":"foo","Bar":123}`),
+ },
+ Body: `this is a comment
+with multiple lines <!-- and an HTML comment -->`,
+ }
+ if !cmp.Equal(got, want) {
+ t.Errorf("Parse(%s) = %+v, want %+v", comment, got, want)
+ }
+ })
+
+ // Text with multiple edits by Oscar (with different bots and kinds).
+ edited := `a user comment<!-- Generated by Oscar. DO NOT EDIT. {"bot":"test-bot","kind":"test","meta":{"Object:":{"Foo":"foo","Bar":123}}} -->this is a comment<!-- oscar-end -->
+more user comment<!-- Generated by Oscar. DO NOT EDIT. {"bot":"test-bot2","kind":"test2","meta":{"Text":"hello"}} -->this is another comment<!-- oscar-end -->`
+ t.Run("ParseAll", func(t *testing.T) {
+ got, ok := ParseAll(edited)
+ if !ok {
+ t.Fatalf("Parse(%s) = false, want true", comment)
+ }
+ want := []*Unwrapped{
+ {
+ TagContent: TagContent{
+ Bot: "test-bot",
+ Kind: "test",
+ Meta: json.RawMessage(`{"Object:":{"Foo":"foo","Bar":123}}`),
+ },
+ Body: `this is a comment`,
+ },
+ {
+ TagContent: TagContent{
+ Bot: "test-bot2",
+ Kind: "test2",
+ Meta: json.RawMessage(`{"Text":"hello"}`),
+ },
+ Body: `this is another comment`,
+ },
+ }
+ if diff := cmp.Diff(want, got); diff != "" {
+ t.Errorf("ParseAll(%s) mismatch (-want +got):\n%s", comment, diff)
+ }
+ })
+
+ t.Run("Strip", func(t *testing.T) {
+ got := Strip(edited)
+ want := `a user comment
+more user comment`
+ if got != want {
+ t.Errorf("Strip(%s) = %s, want %s", edited, got, want)
+ }
+ })
+}
+
+func ExampleWrapper() {
+ w := NewWrapper("test-bot", "test")
+ metadata := &struct {
+ Foo string
+ Bar int
+ }{
+ Foo: "foo",
+ Bar: 123,
+ }
+
+ wrapped, err := w.Wrap("this is a comment", metadata)
+ if err != nil {
+ fmt.Println("Wrap:", err)
+ return
+ }
+ fmt.Println("Wrap:", wrapped)
+
+ isWrapped := IsWrapped(wrapped)
+ fmt.Println("IsWrapped:", isWrapped)
+
+ u, ok := Parse(wrapped)
+ fmt.Printf("Parse: (bot=%s, kind=%s, meta=%s, body=%s, %t)\n", u.Bot, u.Kind, u.Meta, u.Body, ok)
+
+ // Output:
+ // Wrap: <!-- Generated by Oscar. DO NOT EDIT. {"bot":"test-bot","kind":"test","meta":{"Foo":"foo","Bar":123}} -->this is a comment<!-- oscar-end -->
+ // IsWrapped: true
+ // Parse: (bot=test-bot, kind=test, meta={"Foo":"foo","Bar":123}, body=this is a comment, true)
+}