go/doc: simplify and robustify link detection logic

To fix #5043, we added logic to allow balanced pairs of parenthesis
so that we could match URLs like:
	http://example.com/some_resource(foo)

Howewer, such logic breaks when parsing something like the following:
	art by [https://example.com/person][Person Name]].
such that the following is considered the link:
	https://example.com/person][Person

Since the logic added in #5043 was just a heuristic, we adjust
the heuristic that in addition to requiring balanced pairs,
the first parenthesis must be an opening one.

For further robustness, we apply this heuristic to
parenthesis, braces, and brackets.

Fixes #22285

Change-Id: I23b728a644e35ce3995b05a79129cad2c1e3b1ce
Reviewed-on: https://go-review.googlesource.com/c/94876
Run-TryBot: Robert Griesemer <gri@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Robert Griesemer <gri@golang.org>
diff --git a/src/go/doc/comment.go b/src/go/doc/comment.go
index d2c026e..0ec4264 100644
--- a/src/go/doc/comment.go
+++ b/src/go/doc/comment.go
@@ -54,7 +54,7 @@
 	identRx = `[\pL_][\pL_0-9]*`
 
 	// Regexp for URLs
-	// Match parens, and check in pairedParensPrefixLen for balance - see #5043
+	// Match parens, and check later for balance - see #5043, #22285
 	// Match .,:;?! within path, but not at end - see #18139, #16565
 	// This excludes some rare yet valid urls ending in common punctuation
 	// in order to allow sentences ending in URLs.
@@ -86,29 +86,6 @@
 	html_endh   = []byte("</h3>\n")
 )
 
-// pairedParensPrefixLen returns the length of the longest prefix of s containing paired parentheses.
-func pairedParensPrefixLen(s string) int {
-	parens := 0
-	l := len(s)
-	for i, ch := range s {
-		switch ch {
-		case '(':
-			if parens == 0 {
-				l = i
-			}
-			parens++
-		case ')':
-			parens--
-			if parens == 0 {
-				l = len(s)
-			} else if parens < 0 {
-				return i
-			}
-		}
-	}
-	return l
-}
-
 // Emphasize and escape a line of text for HTML. URLs are converted into links;
 // if the URL also appears in the words map, the link is taken from the map (if
 // the corresponding map value is the empty string, the URL is not converted
@@ -128,13 +105,27 @@
 		// write text before match
 		commentEscape(w, line[0:m[0]], nice)
 
-		// adjust match if necessary
+		// adjust match for URLs
 		match := line[m[0]:m[1]]
-		if n := pairedParensPrefixLen(match); n < len(match) {
-			// match contains unpaired parentheses (rare);
-			// redo matching with shortened line for correct indices
-			m = matchRx.FindStringSubmatchIndex(line[:m[0]+n])
-			match = match[:n]
+		if strings.Contains(match, "://") {
+			m0, m1 := m[0], m[1]
+			for _, s := range []string{"()", "{}", "[]"} {
+				open, close := s[:1], s[1:] // E.g., "(" and ")"
+				// require opening parentheses before closing parentheses (#22285)
+				if i := strings.Index(match, close); i >= 0 && i < strings.Index(match, open) {
+					m1 = m0 + i
+					match = line[m0:m1]
+				}
+				// require balanced pairs of parentheses (#5043)
+				for i := 0; strings.Count(match, open) != strings.Count(match, close) && i < 10; i++ {
+					m1 = strings.LastIndexAny(line[:m1], s)
+					match = line[m0:m1]
+				}
+			}
+			if m1 != m[1] {
+				// redo matching with shortened line for correct indices
+				m = matchRx.FindStringSubmatchIndex(line[:m[0]+len(match)])
+			}
 		}
 
 		// analyze match
diff --git a/src/go/doc/comment_test.go b/src/go/doc/comment_test.go
index 1e6cf84..e0adeb2 100644
--- a/src/go/doc/comment_test.go
+++ b/src/go/doc/comment_test.go
@@ -151,6 +151,7 @@
 var emphasizeTests = []struct {
 	in, out string
 }{
+	{"", ""},
 	{"http://[::1]:8080/foo.txt", `<a href="http://[::1]:8080/foo.txt">http://[::1]:8080/foo.txt</a>`},
 	{"before (https://www.google.com) after", `before (<a href="https://www.google.com">https://www.google.com</a>) after`},
 	{"before https://www.google.com:30/x/y/z:b::c. After", `before <a href="https://www.google.com:30/x/y/z:b::c">https://www.google.com:30/x/y/z:b::c</a>. After`},
@@ -169,7 +170,13 @@
 	{"Hello http://example.com/%2f/ /world.", `Hello <a href="http://example.com/%2f/">http://example.com/%2f/</a> /world.`},
 	{"Lorem http: ipsum //host/path", "Lorem http: ipsum //host/path"},
 	{"javascript://is/not/linked", "javascript://is/not/linked"},
+	{"http://foo", `<a href="http://foo">http://foo</a>`},
+	{"art by [[https://www.example.com/person/][Person Name]]", `art by [[<a href="https://www.example.com/person/">https://www.example.com/person/</a>][Person Name]]`},
+	{"please visit (http://golang.org/)", `please visit (<a href="http://golang.org/">http://golang.org/</a>)`},
+	{"please visit http://golang.org/hello())", `please visit <a href="http://golang.org/hello()">http://golang.org/hello()</a>)`},
 	{"http://git.qemu.org/?p=qemu.git;a=blob;f=qapi-schema.json;hb=HEAD", `<a href="http://git.qemu.org/?p=qemu.git;a=blob;f=qapi-schema.json;hb=HEAD">http://git.qemu.org/?p=qemu.git;a=blob;f=qapi-schema.json;hb=HEAD</a>`},
+	{"https://foo.bar/bal/x(])", `<a href="https://foo.bar/bal/x(">https://foo.bar/bal/x(</a>])`}, // inner ] causes (]) to be cut off from URL
+	{"foo [ http://bar(])", `foo [ <a href="http://bar(">http://bar(</a>])`},                      // outer [ causes ]) to be cut off from URL
 }
 
 func TestEmphasize(t *testing.T) {
@@ -183,37 +190,6 @@
 	}
 }
 
-var pairedParensPrefixLenTests = []struct {
-	in, out string
-}{
-	{"", ""},
-	{"foo", "foo"},
-	{"()", "()"},
-	{"foo()", "foo()"},
-	{"foo()()()", "foo()()()"},
-	{"foo()((()()))", "foo()((()()))"},
-	{"foo()((()()))bar", "foo()((()()))bar"},
-	{"foo)", "foo"},
-	{"foo))", "foo"},
-	{"foo)))))", "foo"},
-	{"(foo", ""},
-	{"((foo", ""},
-	{"(((((foo", ""},
-	{"(foo)", "(foo)"},
-	{"((((foo))))", "((((foo))))"},
-	{"foo()())", "foo()()"},
-	{"foo((()())", "foo"},
-	{"foo((()())) (() foo ", "foo((()())) "},
-}
-
-func TestPairedParensPrefixLen(t *testing.T) {
-	for i, tt := range pairedParensPrefixLenTests {
-		if out := tt.in[:pairedParensPrefixLen(tt.in)]; out != tt.out {
-			t.Errorf("#%d: mismatch\nhave: %q\nwant: %q", i, out, tt.out)
-		}
-	}
-}
-
 func TestCommentEscape(t *testing.T) {
 	commentTests := []struct {
 		in, out string