present: accept Markdown in present files

Markdown is enabled by starting the title with "# ".
See the new documentation in present/doc.go for details.

For golang/go#33955.

Change-Id: I04ef2aa2cf253bdf48910c5674d679a482ffa33f
Reviewed-on: https://go-review.googlesource.com/c/tools/+/222846
Reviewed-by: Rob Pike <r@golang.org>
diff --git a/go.mod b/go.mod
index 61dd5df..c257e08 100644
--- a/go.mod
+++ b/go.mod
@@ -3,6 +3,7 @@
 go 1.11
 
 require (
+	github.com/yuin/goldmark v1.1.25
 	golang.org/x/mod v0.2.0
 	golang.org/x/net v0.0.0-20200226121028-0de0cce0169b
 	golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e
diff --git a/go.sum b/go.sum
index 62b1a81..e70ab41 100644
--- a/go.sum
+++ b/go.sum
@@ -1,3 +1,5 @@
+github.com/yuin/goldmark v1.1.25 h1:isv+Q6HQAmmL2Ofcmg8QauBmDPlUUnSoNhEcC940Rds=
+github.com/yuin/goldmark v1.1.25/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
 golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
 golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
 golang.org/x/mod v0.2.0 h1:KU7oHjnv3XNWfa5COkzUifxZmxp1TyI7ImMXqFxLwvQ=
diff --git a/gopls/go.sum b/gopls/go.sum
index 9628287..fb56ab9 100644
--- a/gopls/go.sum
+++ b/gopls/go.sum
@@ -17,6 +17,7 @@
 github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
 github.com/stretchr/testify v1.4.0 h1:2E4SXV/wtOkTonXsotYi4li6zVWxYlZuYNCXe9XRJyk=
 github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4=
+github.com/yuin/goldmark v1.1.25/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
 golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
 golang.org/x/crypto v0.0.0-20190510104115-cbcb75029529/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
 golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
diff --git a/present/doc.go b/present/doc.go
index e12e985..fb84e12 100644
--- a/present/doc.go
+++ b/present/doc.go
@@ -3,23 +3,26 @@
 // license that can be found in the LICENSE file.
 
 /*
-The present file format
+Package present implements parsing and rendering of present files,
+which can be slide presentations as in golang.org/x/tools/cmd/present
+or articles as in golang.org/x/blog (the Go blog).
 
-Present files have the following format.  The first non-blank non-comment
-line is the title, so the header looks like
+File Format
 
-	Title of document
+Present files begin with a header giving the title of the document
+and other metadata, which looks like:
+
+	# Title of document
 	Subtitle of document
 	15:04 2 Jan 2006
 	Tags: foo, bar, baz
-	<blank line>
-	Author Name
-	Job title, Company
-	joe@example.com
-	http://url/
-	@twitter_name
+	Summary: This is a great document you want to read.
 
-The subtitle, date, and tags lines are optional.
+The "# " prefix before the title indicates that this is
+a Markdown-enabled present file: it uses
+Markdown for text markup in the body of the file.
+If the "# " prefix is missing, the file uses
+legacy present markup, described below.
 
 The date line may be written without a time:
 	2 Jan 2006
@@ -28,15 +31,126 @@
 The tags line is a comma-separated list of tags that may be used to categorize
 the document.
 
-The author section may contain a mixture of text, twitter names, and links.
+The summary line gives a short summary used in blog feeds.
+
+Only the title is required;
+the subtitle, date, tags, and summary lines are optional.
+In Markdown-enabled present, the summary defaults to being empty.
+In legacy present, the summary defaults to the first paragraph of text.
+
+After the header come zero or more author blocks, like this:
+
+	Author Name
+	Job title, Company
+	joe@example.com
+	https://url/
+	@twitter_name
+
+The first line of the author block is conventionally the author name.
+Otherwise, the author section may contain a mixture of text, twitter names, and links.
 For slide presentations, only the plain text lines will be displayed on the
 first slide.
 
-Multiple presenters may be specified, separated by a blank line.
+If multiple author blocks are listed, each new block must be preceded
+by its own blank line.
 
-After that come slides/sections, each after a blank line:
+After the author blocks come the presentation slides or article sections,
+which can in turn have subsections.
+In Markdown-enabled present files, each slide or section begins with a "##" header line,
+subsections begin with a "###" header line, and so on.
+In legacy present files, each slide or section begins with a "*" header line,
+subsections begin with a "**" header line, and so on.
 
-	* Title of slide or section (must have asterisk)
+In addition to the marked-up text in a section (or subsection),
+a present file can contain present command invocations, each of which begins
+with a dot, as in:
+
+	.code x.go /^func main/,/^}/
+	.play y.go
+	.image image.jpg
+	.background image.jpg
+	.iframe https://foo
+	.link https://foo label
+	.html file.html
+	.caption _Gopher_ by [[https://twitter.com/reneefrench][Renée French]]
+
+Other than the commands, the text in a section is interpreted
+either as Markdown or as legacy present markup.
+
+Markdown Syntax
+
+Markdown typically means the generic name for a family of similar markup languages.
+The specific variant used in present is CommonMark.
+See https://commonmark.org/help/tutorial/ for a quick tutorial.
+
+In Markdown-enabled present,
+section headings can end in {#name} to set the HTML anchor ID for the heading to "name".
+
+Lines beginning with "//" (outside of code blocks, of course)
+are treated as present comments and have no effect.
+
+Lines beginning with ": " are treated as speaker notes, described below.
+
+Example:
+
+	# Title of Talk
+
+	My Name
+	9 Mar 2020
+	me@example.com
+
+	## Title of Slide or Section (must begin with ##)
+
+	Some Text
+
+	### Subsection {#anchor}
+
+	- bullets
+	- more bullets
+	- a bullet continued
+	  on the next line
+
+	#### Sub-subsection
+
+	Some More text
+
+		Preformatted text (code block)
+		is indented (by one tab, or four spaces)
+
+	Further Text, including command invocations.
+
+	## Section 2: Example formatting {#fmt}
+
+	Formatting:
+
+	_italic_
+	// A comment that is completely ignored.
+	: Speaker notes.
+	**bold**
+	`program`
+	Markup—_especially italic text_—can easily be overused.
+	_Why use scoped\_ptr_? Use plain **\*ptr** instead.
+
+	Visit [the Go home page](https://golang.org/).
+
+Legacy Present Syntax
+
+Compared to Markdown,
+in legacy present
+slides/sections use "*" instead of "##",
+whole-line comments begin with "#" instead of "//",
+bullet lists can only contain single (possibly wrapped) text lines,
+and the font styling and link syntaxes are subtly different.
+
+Example:
+
+	Title of Talk
+
+	My Name
+	1 Jan 2013
+	me@example.com
+
+	* Title of Slide or Section (must begin with *)
 
 	Some Text
 
@@ -45,35 +159,28 @@
 	- bullets
 	- more bullets
 	- a bullet continued
-	  on the next line
+	  on the next line (indented at least one space)
 
 	*** Sub-subsection
 
 	Some More text
 
-	  Preformatted text
+	  Preformatted text (code block)
 	  is indented (however you like)
 
-	Further Text, including invocations like:
+	Further Text, including command invocations.
 
-	.code x.go /^func main/,/^}/
-	.play y.go
-	.image image.jpg
-	.background image.jpg
-	.iframe http://foo
-	.link http://foo label
-	.html file.html
-	.caption _Gopher_ by [[https://www.instagram.com/reneefrench/][Renée French]]
+	* Section 2: Example formatting
 
-	Again, more text
+	Formatting:
 
-Blank lines are OK (not mandatory) after the title and after the
-text.  Text, bullets, and .code etc. are all optional; title is
-not.
+	_italic_
+	*bold*
+	`program`
+	Markup—_especially_italic_text_—can easily be overused.
+	_Why_use_scoped__ptr_? Use plain ***ptr* instead.
 
-Lines starting with # in column 1 are commentary.
-
-Fonts:
+	Visit [[https://golang.org][the Go home page]].
 
 Within the input for plain text or lists, text bracketed by font
 markers will be presented in italic, bold, or program font.
@@ -86,27 +193,21 @@
 a single marker character becomes a space and a doubled single
 marker quotes the marker character.
 
-	_italic_
-	*bold*
-	`program`
-	Markup—_especially_italic_text_—can easily be overused.
-	_Why_use_scoped__ptr_? Use plain ***ptr* instead.
-
-Inline links:
-
 Links can be included in any text with the form [[url][label]], or
 [[url]] to use the URL itself as the label.
 
-Functions:
+Command Invocations
 
-A number of template functions are available through invocations
+A number of special commands are available through invocations
 in the input text. Each such invocation contains a period as the
 first character on the line, followed immediately by the name of
 the function, followed by any arguments. A typical invocation might
 be
+
 	.play demo.go /^func show/,/^}/
+
 (except that the ".play" must be at the beginning of the line and
-not be indented like this.)
+not be indented as in this comment.)
 
 Here follows a description of the functions:
 
@@ -165,7 +266,7 @@
 The first argument is always the HTTP URL.  If there is a second
 argument, it is the text label to display for this link.
 
-	.link http://golang.org golang.org
+	.link https://golang.org golang.org
 
 image:
 
@@ -179,7 +280,6 @@
 preserves the aspect ratio of the image when scaling.
 
 	.image images/betsy.jpg 100 200
-
 	.image images/janet.jpg _ 300
 
 video:
@@ -212,7 +312,7 @@
 The text after ".caption" is embedded in a figcaption element after
 processing styling and links as in standard text lines.
 
-	.caption _Gopher_ by [[http://www.reneefrench.com][Renée French]]
+	.caption _Gopher_ by [[https://twitter.com/reneefrench][Renée French]]
 
 iframe:
 
@@ -228,35 +328,29 @@
 
 	.html file.html
 
-Presenter notes:
+Presenter Notes
 
-Presenter notes may be enabled by appending the "-notes" flag when you run
-your "present" binary.
+Lines that begin with ": " are treated as presenter notes,
+in both Markdown and legacy present syntax.
+By default, presenter notes are collected but ignored.
 
-This will allow you to open a second window by pressing 'N' from your browser
-displaying your slides. The second window is completely synced with your main
-window, except that presenter notes are only visible on the second window.
-
-Lines that begin with ": " are treated as presenter notes.
-
-	* Title of slide
-
-	Some Text
-
-	: Presenter notes (first paragraph)
-	: Presenter notes (subsequent paragraph(s))
+When running the present command with -notes,
+typing 'N' in your browser displaying your slides
+will create a second window displaying the notes.
+The second window is completely synced with the main
+window, except that presenter notes are only visible in the second window.
 
 Notes may appear anywhere within the slide text. For example:
 
 	* Title of slide
 
+	Some text.
+
 	: Presenter notes (first paragraph)
 
-	Some Text
+	Some more text.
 
 	: Presenter notes (subsequent paragraph(s))
 
-This has the same result as the example above.
-
 */
 package present // import "golang.org/x/tools/present"
diff --git a/present/parse.go b/present/parse.go
index 20e8f9f..7f38e6e 100644
--- a/present/parse.go
+++ b/present/parse.go
@@ -19,6 +19,11 @@
 	"time"
 	"unicode"
 	"unicode/utf8"
+
+	"github.com/yuin/goldmark"
+	"github.com/yuin/goldmark/ast"
+	"github.com/yuin/goldmark/renderer/html"
+	"github.com/yuin/goldmark/text"
 )
 
 var (
@@ -68,6 +73,7 @@
 type Doc struct {
 	Title      string
 	Subtitle   string
+	Summary    string
 	Time       time.Time
 	Authors    []Author
 	TitleNotes []string
@@ -98,6 +104,7 @@
 type Section struct {
 	Number  []int
 	Title   string
+	ID      string // HTML anchor ID
 	Elem    []Elem
 	Notes   []string
 	Classes []string
@@ -210,8 +217,9 @@
 
 // Lines is a helper for parsing line-based input.
 type Lines struct {
-	line int // 0 indexed, so has 1-indexed number of last line returned
-	text []string
+	line    int // 0 indexed, so has 1-indexed number of last line returned
+	text    []string
+	comment string
 }
 
 func readLines(r io.Reader) (*Lines, error) {
@@ -223,7 +231,7 @@
 	if err := s.Err(); err != nil {
 		return nil, err
 	}
-	return &Lines{0, lines}, nil
+	return &Lines{0, lines, "#"}, nil
 }
 
 func (l *Lines) next() (text string, ok bool) {
@@ -234,8 +242,8 @@
 			return "", false
 		}
 		text = l.text[current]
-		// Lines starting with # are comments.
-		if len(text) == 0 || text[0] != '#' {
+		// Lines starting with l.comment are comments.
+		if l.comment == "" || !strings.HasPrefix(text, l.comment) {
 			ok = true
 			break
 		}
@@ -282,8 +290,27 @@
 		return nil, err
 	}
 
+	// Detect Markdown-enabled vs legacy present file.
+	// Markdown-enabled files have a title line beginning with "# "
+	// (like preprocessed C files of yore).
+	isMarkdown := false
 	for i := lines.line; i < len(lines.text); i++ {
-		if strings.HasPrefix(lines.text[i], "*") {
+		line := lines.text[i]
+		if line == "" {
+			continue
+		}
+		isMarkdown = strings.HasPrefix(line, "# ")
+		break
+	}
+
+	sectionPrefix := "*"
+	if isMarkdown {
+		sectionPrefix = "##"
+		lines.comment = "//"
+	}
+
+	for i := lines.line; i < len(lines.text); i++ {
+		if strings.HasPrefix(lines.text[i], sectionPrefix) {
 			break
 		}
 
@@ -292,7 +319,7 @@
 		}
 	}
 
-	err = parseHeader(doc, lines)
+	err = parseHeader(doc, isMarkdown, lines)
 	if err != nil {
 		return nil, err
 	}
@@ -301,13 +328,15 @@
 	}
 
 	// Authors
-	if doc.Authors, err = parseAuthors(name, lines); err != nil {
+	if doc.Authors, err = parseAuthors(name, sectionPrefix, lines); err != nil {
 		return nil, err
 	}
+
 	// Sections
-	if doc.Sections, err = parseSections(ctx, name, lines, []int{}); err != nil {
+	if doc.Sections, err = parseSections(ctx, name, sectionPrefix, lines, []int{}); err != nil {
 		return nil, err
 	}
+
 	return doc, nil
 }
 
@@ -324,12 +353,13 @@
 // lesserHeading returns true if text is a heading of a lesser or equal level
 // than that denoted by prefix.
 func lesserHeading(text, prefix string) bool {
-	return isHeading.MatchString(text) && !strings.HasPrefix(text, prefix+"*")
+	return isHeading.MatchString(text) && !strings.HasPrefix(text, prefix+prefix[:1])
 }
 
 // parseSections parses Sections from lines for the section level indicated by
 // number (a nil number indicates the top level).
-func parseSections(ctx *Context, name string, lines *Lines, number []int) ([]Section, error) {
+func parseSections(ctx *Context, name, prefix string, lines *Lines, number []int) ([]Section, error) {
+	isMarkdown := prefix[0] == '#'
 	var sections []Section
 	for i := 1; ; i++ {
 		// Next non-empty line is title.
@@ -340,21 +370,32 @@
 		if !ok {
 			break
 		}
-		prefix := strings.Repeat("*", len(number)+1)
-		if !strings.HasPrefix(text, prefix+" ") {
+		if text != prefix && !strings.HasPrefix(text, prefix+" ") {
 			lines.back()
 			break
 		}
+		// Markdown sections can end in {#id} to set the HTML anchor for the section.
+		// This is nicer than the default #TOC_1_2-style anchor.
+		title := strings.TrimSpace(text[len(prefix):])
+		id := ""
+		if isMarkdown && strings.HasSuffix(title, "}") {
+			j := strings.LastIndex(title, "{#")
+			if j >= 0 {
+				id = title[j+2 : len(title)-1]
+				title = strings.TrimSpace(title[:j])
+			}
+		}
 		section := Section{
 			Number: append(append([]int{}, number...), i),
-			Title:  text[len(prefix)+1:],
+			Title:  title,
+			ID:     id,
 		}
 		text, ok = lines.nextNonEmpty()
 		for ok && !lesserHeading(text, prefix) {
 			var e Elem
 			r, _ := utf8.DecodeRuneInString(text)
 			switch {
-			case unicode.IsSpace(r):
+			case !isMarkdown && unicode.IsSpace(r):
 				i := strings.IndexFunc(text, func(r rune) bool {
 					return !unicode.IsSpace(r)
 				})
@@ -376,7 +417,7 @@
 				pre = strings.Replace(pre, "\t", "    ", -1) // browsers treat tabs badly
 				pre = strings.TrimRightFunc(pre, unicode.IsSpace)
 				e = Text{Lines: []string{pre}, Pre: true, Raw: raw}
-			case strings.HasPrefix(text, "- "):
+			case !isMarkdown && strings.HasPrefix(text, "- "):
 				var b []string
 				for {
 					if strings.HasPrefix(text, "- ") {
@@ -394,9 +435,9 @@
 				e = List{Bullet: b}
 			case isSpeakerNote(text):
 				section.Notes = append(section.Notes, text[2:])
-			case strings.HasPrefix(text, prefix+"* "):
+			case strings.HasPrefix(text, prefix+prefix[:1]+" "):
 				lines.back()
-				subsecs, err := parseSections(ctx, name, lines, section.Number)
+				subsecs, err := parseSections(ctx, name, prefix+prefix[:1], lines, section.Number)
 				if err != nil {
 					return nil, err
 				}
@@ -420,20 +461,46 @@
 				}
 				e = t
 			default:
-				var l []string
+				var block []string
 				for ok && strings.TrimSpace(text) != "" {
-					if text[0] == '.' { // Command breaks text block.
+					// Command breaks text block.
+					// Section heading breaks text block in markdown.
+					if text[0] == '.' || isMarkdown && text[0] == '#' {
 						lines.back()
 						break
 					}
 					if strings.HasPrefix(text, `\.`) { // Backslash escapes initial period.
 						text = text[1:]
 					}
-					l = append(l, text)
+					block = append(block, text)
 					text, ok = lines.next()
 				}
-				if len(l) > 0 {
-					e = Text{Lines: l}
+				if len(block) == 0 {
+					break
+				}
+				if isMarkdown {
+					// Replace all leading tabs with 4 spaces,
+					// which render better in code blocks.
+					// CommonMark defines that for parsing the structure of the file
+					// a tab is equivalent to 4 spaces, so this change won't
+					// affect the later parsing at all.
+					// An alternative would be to apply this to code blocks after parsing,
+					// at the same time that we update <a> targets, but that turns out
+					// to be quite difficult to modify in the AST.
+					for i, line := range block {
+						if len(line) > 0 && line[0] == '\t' {
+							short := strings.TrimLeft(line, "\t")
+							line = strings.Repeat("    ", len(line)-len(short)) + short
+							block[i] = line
+						}
+					}
+					html, err := renderMarkdown([]byte(strings.Join(block, "\n")))
+					if err != nil {
+						return nil, err
+					}
+					e = HTML{HTML: html}
+				} else {
+					e = Text{Lines: block}
 				}
 			}
 			if e != nil {
@@ -449,13 +516,17 @@
 	return sections, nil
 }
 
-func parseHeader(doc *Doc, lines *Lines) error {
+func parseHeader(doc *Doc, isMarkdown bool, lines *Lines) error {
 	var ok bool
 	// First non-empty line starts header.
 	doc.Title, ok = lines.nextNonEmpty()
 	if !ok {
 		return errors.New("unexpected EOF; expected title")
 	}
+	if isMarkdown {
+		doc.Title = strings.TrimSpace(strings.TrimPrefix(doc.Title, "#"))
+	}
+
 	for {
 		text, ok := lines.next()
 		if !ok {
@@ -467,13 +538,14 @@
 		if isSpeakerNote(text) {
 			continue
 		}
-		const tagPrefix = "Tags:"
-		if strings.HasPrefix(text, tagPrefix) {
-			tags := strings.Split(text[len(tagPrefix):], ",")
+		if strings.HasPrefix(text, "Tags:") {
+			tags := strings.Split(text[len("Tags:"):], ",")
 			for i := range tags {
 				tags[i] = strings.TrimSpace(tags[i])
 			}
 			doc.Tags = append(doc.Tags, tags...)
+		} else if strings.HasPrefix(text, "Summary:") {
+			doc.Summary = strings.TrimSpace(text[len("Summary:"):])
 		} else if t, ok := parseTime(text); ok {
 			doc.Time = t
 		} else if doc.Subtitle == "" {
@@ -485,7 +557,7 @@
 	return nil
 }
 
-func parseAuthors(name string, lines *Lines) (authors []Author, err error) {
+func parseAuthors(name, sectionPrefix string, lines *Lines) (authors []Author, err error) {
 	// This grammar demarcates authors with blanks.
 
 	// Skip blank lines.
@@ -502,7 +574,7 @@
 		}
 
 		// If we find a section heading, we're done.
-		if strings.HasPrefix(text, "* ") {
+		if strings.HasPrefix(text, sectionPrefix) {
 			lines.back()
 			break
 		}
@@ -576,3 +648,27 @@
 func isSpeakerNote(s string) bool {
 	return strings.HasPrefix(s, ": ")
 }
+
+func renderMarkdown(input []byte) (template.HTML, error) {
+	md := goldmark.New(goldmark.WithRendererOptions(html.WithUnsafe()))
+	reader := text.NewReader(input)
+	doc := md.Parser().Parse(reader)
+	fixupMarkdown(doc)
+	var b strings.Builder
+	if err := md.Renderer().Render(&b, input, doc); err != nil {
+		return "", err
+	}
+	return template.HTML(b.String()), nil
+}
+
+func fixupMarkdown(n ast.Node) {
+	ast.Walk(n, func(n ast.Node, entering bool) (ast.WalkStatus, error) {
+		if entering {
+			switch n := n.(type) {
+			case *ast.Link:
+				n.SetAttributeString("target", "_blank")
+			}
+		}
+		return ast.WalkContinue, nil
+	})
+}