internal/web: pre-expand tabs in markdown

Goldmark has a bug that causes crashes in inputs
containing mixed spaces and tabs. Avoid it by removing tabs.

Change-Id: I4c678fce6c68bf0d448ed0b75a2ac12a42891ec6
Reviewed-on: https://go-review.googlesource.com/c/website/+/331349
Trust: Russ Cox <rsc@golang.org>
Run-TryBot: Russ Cox <rsc@golang.org>
TryBot-Result: Go Bot <gobot@golang.org>
Reviewed-by: Steve Traut <straut@google.com>
diff --git a/internal/web/markdown.go b/internal/web/markdown.go
index 1973461..2cee319 100644
--- a/internal/web/markdown.go
+++ b/internal/web/markdown.go
@@ -6,6 +6,7 @@
 
 import (
 	"bytes"
+	"unicode/utf8"
 
 	"github.com/yuin/goldmark"
 	"github.com/yuin/goldmark/parser"
@@ -18,6 +19,7 @@
 // The Markdown source may contain raw HTML,
 // but Go templates have already been processed.
 func renderMarkdown(src []byte) ([]byte, error) {
+	src = replaceTabs(src)
 	// parser.WithHeadingAttribute allows custom ids on headings.
 	// html.WithUnsafe allows use of raw HTML, which we need for tables.
 	md := goldmark.New(
@@ -29,3 +31,42 @@
 	}
 	return buf.Bytes(), nil
 }
+
+// replaceTabs replaces all tabs in text with spaces up to a 4-space tab stop.
+//
+// In Markdown, tabs used for indentation are required to be interpreted as
+// 4-space tab stops. See https://spec.commonmark.org/0.30/#tabs.
+// Go also renders nicely and more compactly on the screen with 4-space
+// tab stops, while browsers often use 8-space.
+// And Goldmark crashes in some inputs that mix spaces and tabs.
+// Fix the crashes and make the Go code consistently compact across browsers,
+// all while staying Markdown-compatible, by expanding to 4-space tab stops.
+//
+// This function does not handle multi-codepoint Unicode sequences correctly.
+func replaceTabs(text []byte) []byte {
+	var buf bytes.Buffer
+	col := 0
+	for len(text) > 0 {
+		r, size := utf8.DecodeRune(text)
+		text = text[size:]
+
+		switch r {
+		case '\n':
+			buf.WriteByte('\n')
+			col = 0
+
+		case '\t':
+			buf.WriteByte(' ')
+			col++
+			for col%4 != 0 {
+				buf.WriteByte(' ')
+				col++
+			}
+
+		default:
+			buf.WriteRune(r)
+			col++
+		}
+	}
+	return buf.Bytes()
+}