internal/github: add Wrapper for github edits

A Wrapper is used to wrap comments/edits made to GitHub with hidden
tags so that they can later be identified without relying on a database.

The first use of this will be by the Overview poster to wrap its own
comments and edits, so it can easily identify and strip out its own edits
from a body of text.

Change-Id: I8aa1f14d0d68264be4221f5fa99b5b24278d153f
Reviewed-on: https://go-review.googlesource.com/c/oscar/+/636858
Reviewed-by: Hyang-Ah Hana Kim <hyangah@gmail.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
diff --git a/internal/github/wrap.go b/internal/github/wrap.go
new file mode 100644
index 0000000..334e64a
--- /dev/null
+++ b/internal/github/wrap.go
@@ -0,0 +1,158 @@
+// Copyright 2024 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package github
+
+import (
+	"encoding/json"
+	"fmt"
+	"regexp"
+	"strings"
+
+	"golang.org/x/oscar/internal/storage"
+)
+
+// DO NOT REMOVE/EDIT ANY LITERAL STRINGS OR FORMATTING USED IN THIS FILE,
+// (unless you have a good reason and a plan to migrate any existing usage).
+// Go names can be changed.
+
+// A Wrapper is used to wrap comments/edits made to GitHub so that
+// they can later be identified without referencing a database.
+// This is especially useful for stripping out edits to a user-generated
+// post made by a bot.
+//
+// Wrapped strings ($body) are of the form:
+//
+// <!-- Generated by Oscar. DO NOT EDIT. {"bot":"$bot","kind":"$kind","meta":$meta} -->$body<!-- oscar-end -->
+//
+// where $bot and $kind are specified in [NewWrapper], and $meta is arbitrary
+// metadata (if nil, metadata is omitted).
+//
+// Note that the wrapping strategy is not robust against a user
+// intentionally removing or editing the tag(s).
+type Wrapper struct {
+	bot, kind string
+}
+
+// NewWrapper returns a wrapper for GitHub modifications.
+// bot is the name of the bot (e.g. "gabyhelp") that is making the edits,
+// and kind is a context string used to identify the purpose/type
+// of the edit (e.g "overview" or "related").
+func NewWrapper(bot, kind string) *Wrapper {
+	return &Wrapper{
+		bot: bot, kind: kind,
+	}
+}
+
+// Wrap wraps body (the text of a GitHub modification).
+// body must not already be wrapped, or contain the end tag (<!-- oscar-end -->).
+// metadata is freeform metadata to include in the hidden tags.
+// metadata is sanitized via [json.Marshal], so it may contain otherwise
+// forbidden elements (e.g., "-->").
+func (w *Wrapper) Wrap(body string, metadata any) (string, error) {
+	// Body cannot contain the string [endTag].
+	if strings.Contains(body, endTag) {
+		return "", fmt.Errorf("github: wrapped body cannot contain %q", endTag)
+	}
+	// DO NOT REMOVE/EDIT STRUCTURE OR CONTENTS.
+	return w.startTag(metadata) + body + endTag, nil
+}
+
+// regexps for checking if a string is wrapped
+var (
+	reString                  = `<!-- Generated by Oscar\. DO NOT EDIT\. (\{.*?\}) -->(?s)(.*?)<!-- oscar-end -->`
+	containsRE *regexp.Regexp = regexp.MustCompile(reString)
+	isRE       *regexp.Regexp = regexp.MustCompile(`^` + reString + `$`)
+)
+
+// IsWrapped reports whether the string is of the form
+// returned by [Wrapper.Wrap] (for any [Wrapper]).
+func IsWrapped(s string) bool {
+	return isRE.MatchString(s)
+}
+
+// ContainsWrapped returns whether the given string
+// contains one or more strings of the form output by
+// [Wrapper.Wrap] (for any [Wrapper]).
+func ContainsWrapped(s string) bool {
+	return containsRE.MatchString(s)
+}
+
+// Unwrapped contains the parsed contents of a wrapped string.
+type Unwrapped struct {
+	TagContent
+	Body string
+}
+
+// TagContent contains the structured contents of a start tag.
+type TagContent struct {
+	Bot  string          `json:"bot"`
+	Kind string          `json:"kind"`
+	Meta json.RawMessage `json:"meta,omitempty"`
+}
+
+// Parse parses the contents of a wrapped string.
+// It returns false if the string is malformed.
+func Parse(s string) (_ *Unwrapped, ok bool) {
+	m := isRE.FindStringSubmatch(s)
+	if len(m) != 3 {
+		return nil, false
+	}
+	// s is m[0]
+	tc, body := m[1], m[2]
+
+	var tagContent TagContent
+	if err := json.Unmarshal([]byte(tc), &tagContent); err != nil {
+		return nil, false
+	}
+
+	return &Unwrapped{
+		TagContent: tagContent,
+		Body:       body,
+	}, true
+}
+
+// ParseAll parses the contents of all wrapped substrings in the
+// string. It returns false if there are no wrapped substrings,
+// or any of them are malformed.
+func ParseAll(s string) (_ []*Unwrapped, ok bool) {
+	var us []*Unwrapped
+	matches := containsRE.FindAllString(s, -1)
+	if len(matches) == 0 {
+		return nil, false
+	}
+	for _, substr := range matches {
+		u, ok := Parse(substr)
+		if !ok {
+			return nil, false
+		}
+		us = append(us, u)
+	}
+	return us, true
+}
+
+// Strip removes all wrapped substrings of the string
+// and returns the result.
+func Strip(s string) string {
+	return containsRE.ReplaceAllString(s, "")
+}
+
+// startTag returns <!-- Generated by Oscar. DO NOT EDIT. {"bot":"$bot","kind":"$kind","meta":$metadata-json} -->
+// where $metadata-json is the JSON representaion of the given metadata.
+func (w *Wrapper) startTag(metadata any) string {
+	// DO NOT REMOVE/EDIT this function body.
+	var js json.RawMessage
+	if metadata != nil {
+		js = storage.JSON(metadata)
+	}
+	return fmt.Sprintf("<!-- Generated by Oscar. DO NOT EDIT. %s -->",
+		storage.JSON(TagContent{
+			Bot:  w.bot,
+			Kind: w.kind,
+			Meta: js,
+		}))
+}
+
+// DO NOT REMOVE/EDIT.
+const endTag = "<!-- oscar-end -->"
diff --git a/internal/github/wrap_test.go b/internal/github/wrap_test.go
new file mode 100644
index 0000000..1f3765a
--- /dev/null
+++ b/internal/github/wrap_test.go
@@ -0,0 +1,254 @@
+// Copyright 2024 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package github
+
+import (
+	"encoding/json"
+	"fmt"
+	"testing"
+
+	"github.com/google/go-cmp/cmp"
+	"golang.org/x/oscar/internal/storage"
+)
+
+func TestWrapper(t *testing.T) {
+	w := NewWrapper("test-bot", "test")
+	body := "this is a comment"
+	metadata := &struct {
+		Foo string
+		Bar int
+	}{
+		Foo: "foo",
+		Bar: 123,
+	}
+
+	for _, tc := range []struct {
+		name     string
+		body     string
+		meta     any
+		wantMeta json.RawMessage
+	}{
+		{
+			name:     "no_metadata",
+			body:     body,
+			meta:     nil,
+			wantMeta: nil,
+		},
+		{
+			name:     "metadata",
+			body:     body,
+			meta:     metadata,
+			wantMeta: storage.JSON(metadata),
+		},
+	} {
+		t.Run(tc.name, func(t *testing.T) {
+			wrapped, err := w.Wrap(tc.body, tc.meta)
+			if err != nil {
+				t.Fatal(err)
+			}
+			wrappedEdit := "something" + wrapped + "something else"
+
+			t.Run("IsWrapped", func(t *testing.T) {
+				if !IsWrapped(wrapped) {
+					t.Errorf("IsWrapped(%s, nil) = false, want true", wrapped)
+				}
+
+				// IsWrapped expects an an exact match.
+				if IsWrapped(wrappedEdit) {
+					t.Errorf("IsWrapped(%s) = true, want false", wrappedEdit)
+				}
+			})
+
+			t.Run("ContainsWrapped", func(t *testing.T) {
+				if !ContainsWrapped(wrapped) {
+					t.Errorf("ContainsWrapped(%s, nil) = false, want true", wrapped)
+				}
+
+				if !ContainsWrapped(wrappedEdit) {
+					t.Errorf("ContainsWrapped(%s) = false, want true", wrappedEdit)
+				}
+			})
+
+			t.Run("Parse", func(t *testing.T) {
+				got, ok := Parse(wrapped)
+				if !ok {
+					t.Fatalf("Parse(%s) = false, want true", wrapped)
+				}
+				if got.Body != body {
+					t.Errorf("Parse(%s).Body = %q, want %q", wrapped, got.Body, body)
+				}
+				if string(got.Meta) != string(tc.wantMeta) {
+					t.Errorf("Parse(%s).Metadata = %q, want %s", wrapped, got.Meta, tc.wantMeta)
+				}
+			})
+
+			t.Run("ParseAll", func(t *testing.T) {
+				wrappedMultiple := "something" + wrapped + "something else" + wrapped
+				got, ok := ParseAll(wrappedMultiple)
+				if !ok {
+					t.Fatalf("ParseAll(%s) = false, want true", wrappedMultiple)
+				}
+				want := []*Unwrapped{
+					{
+						TagContent: TagContent{
+							Bot:  "test-bot",
+							Kind: "test",
+							Meta: tc.wantMeta,
+						},
+						Body: body,
+					},
+					{
+						TagContent: TagContent{
+							Bot:  "test-bot",
+							Kind: "test",
+							Meta: tc.wantMeta,
+						},
+						Body: body,
+					},
+				}
+				if diff := cmp.Diff(want, got); diff != "" {
+					t.Errorf("ParseAll(%s) mismatch (-want +got):\n%s", wrappedMultiple, diff)
+				}
+			})
+
+			t.Run("Strip", func(t *testing.T) {
+				wrappedMultiple := "something" + wrapped + " something else" + wrapped
+				want := "something something else"
+				got := Strip(wrappedMultiple)
+				if got != want {
+					t.Errorf("Strip(%s) = %v, want %v", wrappedMultiple, got, want)
+				}
+			})
+		})
+	}
+}
+
+func TestWrapperError(t *testing.T) {
+	w := NewWrapper("test-bot", "test")
+
+	wrapped, err := w.Wrap("body <!-- oscar-end --> something", nil)
+	if err == nil {
+		t.Errorf("w.Wrap() = (%s, nil), want error", wrapped)
+	}
+
+	inner, err := w.Wrap("body", nil)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	// Can't double-wrap.
+	double, err := w.Wrap(inner, nil)
+	if err == nil {
+		t.Errorf("w.Wrap() = (%s, nil), want error", double)
+	}
+}
+
+func TestWrapperLiteral(t *testing.T) {
+	// Full text by Oscar.
+	comment :=
+		`<!-- Generated by Oscar. DO NOT EDIT. {"bot":"test-bot","kind":"test","meta":{"Foo":"foo","Bar":123}} -->this is a comment
+with multiple lines <!-- and an HTML comment --><!-- oscar-end -->`
+
+	t.Run("IsWrapped", func(t *testing.T) {
+		if !IsWrapped(comment) {
+			t.Errorf("IsWrapped(%s) = false, want true", comment)
+		}
+	})
+
+	t.Run("ContainsWrapped", func(t *testing.T) {
+		edit := `hello` + comment + `some more stuff`
+		if !ContainsWrapped(edit) {
+			t.Errorf("ContainsWrapped(%s) = false, want true", edit)
+		}
+	})
+
+	t.Run("Parse", func(t *testing.T) {
+		got, ok := Parse(comment)
+		if !ok {
+			t.Fatalf("Parse(%s) = false, want true", comment)
+		}
+		want := &Unwrapped{
+			TagContent: TagContent{
+				Bot:  "test-bot",
+				Kind: "test",
+				Meta: json.RawMessage(`{"Foo":"foo","Bar":123}`),
+			},
+			Body: `this is a comment
+with multiple lines <!-- and an HTML comment -->`,
+		}
+		if !cmp.Equal(got, want) {
+			t.Errorf("Parse(%s) = %+v, want %+v", comment, got, want)
+		}
+	})
+
+	// Text with multiple edits by Oscar (with different bots and kinds).
+	edited := `a user comment<!-- Generated by Oscar. DO NOT EDIT. {"bot":"test-bot","kind":"test","meta":{"Object:":{"Foo":"foo","Bar":123}}} -->this is a comment<!-- oscar-end -->
+more user comment<!-- Generated by Oscar. DO NOT EDIT. {"bot":"test-bot2","kind":"test2","meta":{"Text":"hello"}} -->this is another comment<!-- oscar-end -->`
+	t.Run("ParseAll", func(t *testing.T) {
+		got, ok := ParseAll(edited)
+		if !ok {
+			t.Fatalf("Parse(%s) = false, want true", comment)
+		}
+		want := []*Unwrapped{
+			{
+				TagContent: TagContent{
+					Bot:  "test-bot",
+					Kind: "test",
+					Meta: json.RawMessage(`{"Object:":{"Foo":"foo","Bar":123}}`),
+				},
+				Body: `this is a comment`,
+			},
+			{
+				TagContent: TagContent{
+					Bot:  "test-bot2",
+					Kind: "test2",
+					Meta: json.RawMessage(`{"Text":"hello"}`),
+				},
+				Body: `this is another comment`,
+			},
+		}
+		if diff := cmp.Diff(want, got); diff != "" {
+			t.Errorf("ParseAll(%s) mismatch (-want +got):\n%s", comment, diff)
+		}
+	})
+
+	t.Run("Strip", func(t *testing.T) {
+		got := Strip(edited)
+		want := `a user comment
+more user comment`
+		if got != want {
+			t.Errorf("Strip(%s) = %s, want %s", edited, got, want)
+		}
+	})
+}
+
+func ExampleWrapper() {
+	w := NewWrapper("test-bot", "test")
+	metadata := &struct {
+		Foo string
+		Bar int
+	}{
+		Foo: "foo",
+		Bar: 123,
+	}
+
+	wrapped, err := w.Wrap("this is a comment", metadata)
+	if err != nil {
+		fmt.Println("Wrap:", err)
+		return
+	}
+	fmt.Println("Wrap:", wrapped)
+
+	isWrapped := IsWrapped(wrapped)
+	fmt.Println("IsWrapped:", isWrapped)
+
+	u, ok := Parse(wrapped)
+	fmt.Printf("Parse: (bot=%s, kind=%s, meta=%s, body=%s, %t)\n", u.Bot, u.Kind, u.Meta, u.Body, ok)
+
+	// Output:
+	// Wrap: <!-- Generated by Oscar. DO NOT EDIT. {"bot":"test-bot","kind":"test","meta":{"Foo":"foo","Bar":123}} -->this is a comment<!-- oscar-end -->
+	// IsWrapped: true
+	// Parse: (bot=test-bot, kind=test, meta={"Foo":"foo","Bar":123}, body=this is a comment, true)
+}