| // Copyright 2019 The Go Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style |
| // license that can be found in the LICENSE file. |
| |
| package source |
| |
| import ( |
| "bytes" |
| "io" |
| "regexp" |
| "strings" |
| "unicode" |
| "unicode/utf8" |
| ) |
| |
| // CommentToMarkdown converts comment text to formatted markdown. |
| // The comment was prepared by DocReader, |
| // so it is known not to have leading, trailing blank lines |
| // nor to have trailing spaces at the end of lines. |
| // The comment markers have already been removed. |
| // |
| // Each line is converted into a markdown line and empty lines are just converted to |
| // newlines. Heading are prefixed with `### ` to make it a markdown heading. |
| // |
| // A span of indented lines retains a 4 space prefix block, with the common indent |
| // prefix removed unless empty, in which case it will be converted to a newline. |
| // |
| // URLs in the comment text are converted into links. |
| func CommentToMarkdown(text string) string { |
| buf := &bytes.Buffer{} |
| commentToMarkdown(buf, text) |
| return buf.String() |
| } |
| |
| var ( |
| mdNewline = []byte("\n") |
| mdHeader = []byte("### ") |
| mdIndent = []byte(" ") |
| mdLinkStart = []byte("[") |
| mdLinkDiv = []byte("](") |
| mdLinkEnd = []byte(")") |
| ) |
| |
| func commentToMarkdown(w io.Writer, text string) { |
| blocks := blocks(text) |
| for i, b := range blocks { |
| switch b.op { |
| case opPara: |
| for _, line := range b.lines { |
| emphasize(w, line, true) |
| } |
| case opHead: |
| // The header block can consist of only one line. |
| // However, check the number of lines, just in case. |
| if len(b.lines) == 0 { |
| // Skip this block. |
| continue |
| } |
| header := b.lines[0] |
| |
| w.Write(mdHeader) |
| commentEscape(w, header, true) |
| // Header doesn't end with \n unlike the lines of other blocks. |
| w.Write(mdNewline) |
| case opPre: |
| for _, line := range b.lines { |
| if isBlank(line) { |
| w.Write(mdNewline) |
| continue |
| } |
| w.Write(mdIndent) |
| w.Write([]byte(line)) |
| } |
| } |
| |
| if i < len(blocks)-1 { |
| w.Write(mdNewline) |
| } |
| } |
| } |
| |
| const ( |
| ulquo = "“" |
| urquo = "”" |
| ) |
| |
| var ( |
| markdownEscape = regexp.MustCompile(`([\\\x60*{}[\]()#+\-.!_>~|"$%&'\/:;<=?@^])`) |
| |
| unicodeQuoteReplacer = strings.NewReplacer("``", ulquo, "''", urquo) |
| ) |
| |
| // commentEscape escapes comment text for markdown. If nice is set, |
| // also turn double ` and ' into “ and ”. |
| func commentEscape(w io.Writer, text string, nice bool) { |
| if nice { |
| text = convertQuotes(text) |
| } |
| text = escapeRegex(text) |
| w.Write([]byte(text)) |
| } |
| |
| func convertQuotes(text string) string { |
| return unicodeQuoteReplacer.Replace(text) |
| } |
| |
| func escapeRegex(text string) string { |
| return markdownEscape.ReplaceAllString(text, `\$1`) |
| } |
| |
| func emphasize(w io.Writer, line string, nice bool) { |
| for { |
| m := matchRx.FindStringSubmatchIndex(line) |
| if m == nil { |
| break |
| } |
| // m >= 6 (two parenthesized sub-regexps in matchRx, 1st one is urlRx) |
| |
| // write text before match |
| commentEscape(w, line[0:m[0]], nice) |
| |
| // adjust match for URLs |
| match := line[m[0]:m[1]] |
| if strings.Contains(match, "://") { |
| m0, m1 := m[0], m[1] |
| for _, s := range []string{"()", "{}", "[]"} { |
| open, close := s[:1], s[1:] // E.g., "(" and ")" |
| // require opening parentheses before closing parentheses (#22285) |
| if i := strings.Index(match, close); i >= 0 && i < strings.Index(match, open) { |
| m1 = m0 + i |
| match = line[m0:m1] |
| } |
| // require balanced pairs of parentheses (#5043) |
| for i := 0; strings.Count(match, open) != strings.Count(match, close) && i < 10; i++ { |
| m1 = strings.LastIndexAny(line[:m1], s) |
| match = line[m0:m1] |
| } |
| } |
| if m1 != m[1] { |
| // redo matching with shortened line for correct indices |
| m = matchRx.FindStringSubmatchIndex(line[:m[0]+len(match)]) |
| } |
| } |
| |
| // Following code has been modified from go/doc since words is always |
| // nil. All html formatting has also been transformed into markdown formatting |
| |
| // analyze match |
| url := "" |
| if m[2] >= 0 { |
| url = match |
| } |
| |
| // write match |
| if len(url) > 0 { |
| w.Write(mdLinkStart) |
| } |
| |
| commentEscape(w, match, nice) |
| |
| if len(url) > 0 { |
| w.Write(mdLinkDiv) |
| w.Write([]byte(urlReplacer.Replace(url))) |
| w.Write(mdLinkEnd) |
| } |
| |
| // advance |
| line = line[m[1]:] |
| } |
| commentEscape(w, line, nice) |
| } |
| |
| // Everything from here on is a copy of go/doc/comment.go |
| |
| const ( |
| // Regexp for Go identifiers |
| identRx = `[\pL_][\pL_0-9]*` |
| |
| // Regexp for URLs |
| // Match parens, and check later for balance - see #5043, #22285 |
| // Match .,:;?! within path, but not at end - see #18139, #16565 |
| // This excludes some rare yet valid urls ending in common punctuation |
| // in order to allow sentences ending in URLs. |
| |
| // protocol (required) e.g. http |
| protoPart = `(https?|ftp|file|gopher|mailto|nntp)` |
| // host (required) e.g. www.example.com or [::1]:8080 |
| hostPart = `([a-zA-Z0-9_@\-.\[\]:]+)` |
| // path+query+fragment (optional) e.g. /path/index.html?q=foo#bar |
| pathPart = `([.,:;?!]*[a-zA-Z0-9$'()*+&#=@~_/\-\[\]%])*` |
| |
| urlRx = protoPart + `://` + hostPart + pathPart |
| ) |
| |
| var ( |
| matchRx = regexp.MustCompile(`(` + urlRx + `)|(` + identRx + `)`) |
| urlReplacer = strings.NewReplacer(`(`, `\(`, `)`, `\)`) |
| ) |
| |
| func indentLen(s string) int { |
| i := 0 |
| for i < len(s) && (s[i] == ' ' || s[i] == '\t') { |
| i++ |
| } |
| return i |
| } |
| |
| func isBlank(s string) bool { |
| return len(s) == 0 || (len(s) == 1 && s[0] == '\n') |
| } |
| |
| func commonPrefix(a, b string) string { |
| i := 0 |
| for i < len(a) && i < len(b) && a[i] == b[i] { |
| i++ |
| } |
| return a[0:i] |
| } |
| |
| func unindent(block []string) { |
| if len(block) == 0 { |
| return |
| } |
| |
| // compute maximum common white prefix |
| prefix := block[0][0:indentLen(block[0])] |
| for _, line := range block { |
| if !isBlank(line) { |
| prefix = commonPrefix(prefix, line[0:indentLen(line)]) |
| } |
| } |
| n := len(prefix) |
| |
| // remove |
| for i, line := range block { |
| if !isBlank(line) { |
| block[i] = line[n:] |
| } |
| } |
| } |
| |
| // heading returns the trimmed line if it passes as a section heading; |
| // otherwise it returns the empty string. |
| func heading(line string) string { |
| line = strings.TrimSpace(line) |
| if len(line) == 0 { |
| return "" |
| } |
| |
| // a heading must start with an uppercase letter |
| r, _ := utf8.DecodeRuneInString(line) |
| if !unicode.IsLetter(r) || !unicode.IsUpper(r) { |
| return "" |
| } |
| |
| // it must end in a letter or digit: |
| r, _ = utf8.DecodeLastRuneInString(line) |
| if !unicode.IsLetter(r) && !unicode.IsDigit(r) { |
| return "" |
| } |
| |
| // exclude lines with illegal characters. we allow "()," |
| if strings.ContainsAny(line, ";:!?+*/=[]{}_^°&§~%#@<\">\\") { |
| return "" |
| } |
| |
| // allow "'" for possessive "'s" only |
| for b := line; ; { |
| i := strings.IndexRune(b, '\'') |
| if i < 0 { |
| break |
| } |
| if i+1 >= len(b) || b[i+1] != 's' || (i+2 < len(b) && b[i+2] != ' ') { |
| return "" // not followed by "s " |
| } |
| b = b[i+2:] |
| } |
| |
| // allow "." when followed by non-space |
| for b := line; ; { |
| i := strings.IndexRune(b, '.') |
| if i < 0 { |
| break |
| } |
| if i+1 >= len(b) || b[i+1] == ' ' { |
| return "" // not followed by non-space |
| } |
| b = b[i+1:] |
| } |
| |
| return line |
| } |
| |
| type op int |
| |
| const ( |
| opPara op = iota |
| opHead |
| opPre |
| ) |
| |
| type block struct { |
| op op |
| lines []string |
| } |
| |
| func blocks(text string) []block { |
| var ( |
| out []block |
| para []string |
| |
| lastWasBlank = false |
| lastWasHeading = false |
| ) |
| |
| close := func() { |
| if para != nil { |
| out = append(out, block{opPara, para}) |
| para = nil |
| } |
| } |
| |
| lines := strings.SplitAfter(text, "\n") |
| unindent(lines) |
| for i := 0; i < len(lines); { |
| line := lines[i] |
| if isBlank(line) { |
| // close paragraph |
| close() |
| i++ |
| lastWasBlank = true |
| continue |
| } |
| if indentLen(line) > 0 { |
| // close paragraph |
| close() |
| |
| // count indented or blank lines |
| j := i + 1 |
| for j < len(lines) && (isBlank(lines[j]) || indentLen(lines[j]) > 0) { |
| j++ |
| } |
| // but not trailing blank lines |
| for j > i && isBlank(lines[j-1]) { |
| j-- |
| } |
| pre := lines[i:j] |
| i = j |
| |
| unindent(pre) |
| |
| // put those lines in a pre block |
| out = append(out, block{opPre, pre}) |
| lastWasHeading = false |
| continue |
| } |
| |
| if lastWasBlank && !lastWasHeading && i+2 < len(lines) && |
| isBlank(lines[i+1]) && !isBlank(lines[i+2]) && indentLen(lines[i+2]) == 0 { |
| // current line is non-blank, surrounded by blank lines |
| // and the next non-blank line is not indented: this |
| // might be a heading. |
| if head := heading(line); head != "" { |
| close() |
| out = append(out, block{opHead, []string{head}}) |
| i += 2 |
| lastWasHeading = true |
| continue |
| } |
| } |
| |
| // open paragraph |
| lastWasBlank = false |
| lastWasHeading = false |
| para = append(para, lines[i]) |
| i++ |
| } |
| close() |
| |
| return out |
| } |