internal/lsp/source/comment.go - tools.git - Git at Google

 package source

 import (
 	"bytes"
 	"io"
 	"regexp"
 	"strings"
 	"unicode"
 	"unicode/utf8"
 )

 // CommentToMarkdown converts comment text to formatted markdown.
 // The comment was prepared by DocReader,
 // so it is known not to have leading, trailing blank lines
 // nor to have trailing spaces at the end of lines.
 // The comment markers have already been removed.
 //
 // Each line is converted into a markdown line and empty lines are just converted to
 // newlines. Heading are prefixed with `### ` to make it a markdown heading.
 //
 // A span of indented lines retains a 4 space prefix block, with the common indent
 // prefix removed unless empty, in which case it will be converted to a newline.
 //
 // URLs in the comment text are converted into links.
 func CommentToMarkdown(text string) string {
 	buf := &bytes.Buffer{}
 	commentToMarkdown(buf, text)
 	return buf.String()
 }

 var (
 	mdNewline   = []byte("\n")
 	mdHeader    = []byte("### ")
 	mdIndent    = []byte("&nbsp;&nbsp;&nbsp;&nbsp;")
 	mdLinkStart = []byte("[")
 	mdLinkDiv   = []byte("](")
 	mdLinkEnd   = []byte(")")
 )

 func commentToMarkdown(w io.Writer, text string) {
 	isFirstLine := true
 	for _, b := range blocks(text) {
 		switch b.op {
 		case opPara:
 			if !isFirstLine {
 				w.Write(mdNewline)
 			}

 			for _, line := range b.lines {
 				emphasize(w, line, true)
 			}
 		case opHead:
 			if !isFirstLine {
 				w.Write(mdNewline)
 			}
 			w.Write(mdNewline)

 			for _, line := range b.lines {
 				w.Write(mdHeader)
 				commentEscape(w, line, true)
 				w.Write(mdNewline)
 			}
 		case opPre:
 			if !isFirstLine {
 				w.Write(mdNewline)
 			}
 			w.Write(mdNewline)

 			for _, line := range b.lines {
 				if isBlank(line) {
 					w.Write(mdNewline)
 				} else {
 					w.Write(mdIndent)
 					w.Write([]byte(line))
 					w.Write(mdNewline)
 				}
 			}
 		}
 		isFirstLine = false
 	}
 }

 const (
 	ulquo = "“"
 	urquo = "”"
 )

 var (
 	markdownEscape = regexp.MustCompile(`([\\\x60*{}[\]()#+\-.!_>~|"$%&'\/:;<=?@^])`)

 	unicodeQuoteReplacer = strings.NewReplacer("``", ulquo, "''", urquo)
 )

 // commentEscape escapes comment text for markdown. If nice is set,
 // also turn `` into “; and '' into ”;.
 func commentEscape(w io.Writer, text string, nice bool) {
 	if nice {
 		text = convertQuotes(text)
 	}
 	text = escapeRegex(text)
 	w.Write([]byte(text))
 }

 func convertQuotes(text string) string {
 	return unicodeQuoteReplacer.Replace(text)
 }

 func escapeRegex(text string) string {
 	return markdownEscape.ReplaceAllString(text, `\$1`)
 }

 func emphasize(w io.Writer, line string, nice bool) {
 	for {
 		m := matchRx.FindStringSubmatchIndex(line)
 		if m == nil {
 			break
 		}
 		// m >= 6 (two parenthesized sub-regexps in matchRx, 1st one is urlRx)

 		// write text before match
 		commentEscape(w, line[0:m[0]], nice)

 		// adjust match for URLs
 		match := line[m[0]:m[1]]
 		if strings.Contains(match, "://") {
 			m0, m1 := m[0], m[1]
 			for _, s := range []string{"()", "{}", "[]"} {
 				open, close := s[:1], s[1:] // E.g., "(" and ")"
 				// require opening parentheses before closing parentheses (#22285)
 				if i := strings.Index(match, close); i >= 0 && i < strings.Index(match, open) {
 					m1 = m0 + i
 					match = line[m0:m1]
 				}
 				// require balanced pairs of parentheses (#5043)
 				for i := 0; strings.Count(match, open) != strings.Count(match, close) && i < 10; i++ {
 					m1 = strings.LastIndexAny(line[:m1], s)
 					match = line[m0:m1]
 				}
 			}
 			if m1 != m[1] {
 				// redo matching with shortened line for correct indices
 				m = matchRx.FindStringSubmatchIndex(line[:m[0]+len(match)])
 			}
 		}

 		// Following code has been modified from go/doc since words is always
 		// nil. All html formatting has also been transformed into markdown formatting

 		// analyze match
 		url := ""
 		if m[2] >= 0 {
 			url = match
 		}

 		// write match
 		if len(url) > 0 {
 			w.Write(mdLinkStart)
 		}

 		commentEscape(w, match, nice)

 		if len(url) > 0 {
 			w.Write(mdLinkDiv)
 			w.Write([]byte(urlReplacer.Replace(url)))
 			w.Write(mdLinkEnd)
 		}

 		// advance
 		line = line[m[1]:]
 	}
 	commentEscape(w, line, nice)
 }

 // Everything from here on is a copy of go/doc/comment.go

 const (
 	// Regexp for Go identifiers
 	identRx = `[\pL_][\pL_0-9]*`

 	// Regexp for URLs
 	// Match parens, and check later for balance - see #5043, #22285
 	// Match .,:;?! within path, but not at end - see #18139, #16565
 	// This excludes some rare yet valid urls ending in common punctuation
 	// in order to allow sentences ending in URLs.

 	// protocol (required) e.g. http
 	protoPart = `(https?|ftp|file|gopher|mailto|nntp)`
 	// host (required) e.g. www.example.com or [::1]:8080
 	hostPart = `([a-zA-Z0-9_@\-.\[\]:]+)`
 	// path+query+fragment (optional) e.g. /path/index.html?q=foo#bar
 	pathPart = `([.,:;?!]*[a-zA-Z0-9$'()*+&#=@~_/\-\[\]%])*`

 	urlRx = protoPart + `://` + hostPart + pathPart
 )

 var (
 	matchRx     = regexp.MustCompile(`(` + urlRx + `)|(` + identRx + `)`)
 	urlReplacer = strings.NewReplacer(`(`, `\(`, `)`, `\)`)
 )

 func indentLen(s string) int {
 	i := 0
 	for i < len(s) && (s[i] == ' ' || s[i] == '\t') {
 		i++
 	}
 	return i
 }

 func isBlank(s string) bool {
 	return len(s) == 0 || (len(s) == 1 && s[0] == '\n')
 }

 func commonPrefix(a, b string) string {
 	i := 0
 	for i < len(a) && i < len(b) && a[i] == b[i] {
 		i++
 	}
 	return a[0:i]
 }

 func unindent(block []string) {
 	if len(block) == 0 {
 		return
 	}

 	// compute maximum common white prefix
 	prefix := block[0][0:indentLen(block[0])]
 	for _, line := range block {
 		if !isBlank(line) {
 			prefix = commonPrefix(prefix, line[0:indentLen(line)])
 		}
 	}
 	n := len(prefix)

 	// remove
 	for i, line := range block {
 		if !isBlank(line) {
 			block[i] = line[n:]
 		}
 	}
 }

 // heading returns the trimmed line if it passes as a section heading;
 // otherwise it returns the empty string.
 func heading(line string) string {
 	line = strings.TrimSpace(line)
 	if len(line) == 0 {
 		return ""
 	}

 	// a heading must start with an uppercase letter
 	r, _ := utf8.DecodeRuneInString(line)
 	if !unicode.IsLetter(r) || !unicode.IsUpper(r) {
 		return ""
 	}

 	// it must end in a letter or digit:
 	r, _ = utf8.DecodeLastRuneInString(line)
 	if !unicode.IsLetter(r) && !unicode.IsDigit(r) {
 		return ""
 	}

 	// exclude lines with illegal characters. we allow "(),"
 	if strings.ContainsAny(line, ";:!?+*/=[]{}_^°&§~%#@<\">\\") {
 		return ""
 	}

 	// allow "'" for possessive "'s" only
 	for b := line; ; {
 		i := strings.IndexRune(b, '\'')
 		if i < 0 {
 			break
 		}
 		if i+1 >= len(b) || b[i+1] != 's' || (i+2 < len(b) && b[i+2] != ' ') {
 			return "" // not followed by "s "
 		}
 		b = b[i+2:]
 	}

 	// allow "." when followed by non-space
 	for b := line; ; {
 		i := strings.IndexRune(b, '.')
 		if i < 0 {
 			break
 		}
 		if i+1 >= len(b) || b[i+1] == ' ' {
 			return "" // not followed by non-space
 		}
 		b = b[i+1:]
 	}

 	return line
 }

 type op int

 const (
 	opPara op = iota
 	opHead
 	opPre
 )

 type block struct {
 	op    op
 	lines []string
 }

 var nonAlphaNumRx = regexp.MustCompile(`[^a-zA-Z0-9]`)

 func anchorID(line string) string {
 	// Add a "hdr-" prefix to avoid conflicting with IDs used for package symbols.
 	return "hdr-" + nonAlphaNumRx.ReplaceAllString(line, "_")
 }

 func blocks(text string) []block {
 	var (
 		out  []block
 		para []string

 		lastWasBlank   = false
 		lastWasHeading = false
 	)

 	close := func() {
 		if para != nil {
 			out = append(out, block{opPara, para})
 			para = nil
 		}
 	}

 	lines := strings.SplitAfter(text, "\n")
 	unindent(lines)
 	for i := 0; i < len(lines); {
 		line := lines[i]
 		if isBlank(line) {
 			// close paragraph
 			close()
 			i++
 			lastWasBlank = true
 			continue
 		}
 		if indentLen(line) > 0 {
 			// close paragraph
 			close()

 			// count indented or blank lines
 			j := i + 1
 			for j < len(lines) && (isBlank(lines[j]) || indentLen(lines[j]) > 0) {
 				j++
 			}
 			// but not trailing blank lines
 			for j > i && isBlank(lines[j-1]) {
 				j--
 			}
 			pre := lines[i:j]
 			i = j

 			unindent(pre)

 			// put those lines in a pre block
 			out = append(out, block{opPre, pre})
 			lastWasHeading = false
 			continue
 		}

 		if lastWasBlank && !lastWasHeading && i+2 < len(lines) &&
 			isBlank(lines[i+1]) && !isBlank(lines[i+2]) && indentLen(lines[i+2]) == 0 {
 			// current line is non-blank, surrounded by blank lines
 			// and the next non-blank line is not indented: this
 			// might be a heading.
 			if head := heading(line); head != "" {
 				close()
 				out = append(out, block{opHead, []string{head}})
 				i += 2
 				lastWasHeading = true
 				continue
 			}
 		}

 		// open paragraph
 		lastWasBlank = false
 		lastWasHeading = false
 		para = append(para, lines[i])
 		i++
 	}
 	close()

 	return out
 }
	package source

	import (
	"bytes"
	"io"
	"regexp"
	"strings"
	"unicode"
	"unicode/utf8"
	)

	// CommentToMarkdown converts comment text to formatted markdown.
	// The comment was prepared by DocReader,
	// so it is known not to have leading, trailing blank lines
	// nor to have trailing spaces at the end of lines.
	// The comment markers have already been removed.
	//
	// Each line is converted into a markdown line and empty lines are just converted to
	// newlines. Heading are prefixed with `### ` to make it a markdown heading.
	//
	// A span of indented lines retains a 4 space prefix block, with the common indent
	// prefix removed unless empty, in which case it will be converted to a newline.
	//
	// URLs in the comment text are converted into links.
	func CommentToMarkdown(text string) string {
	buf := &bytes.Buffer{}
	commentToMarkdown(buf, text)
	return buf.String()
	}

	var (
	mdNewline = []byte("\n")
	mdHeader = []byte("### ")
	mdIndent = []byte("    ")
	mdLinkStart = []byte("[")
	mdLinkDiv = []byte("](")
	mdLinkEnd = []byte(")")
	)

	func commentToMarkdown(w io.Writer, text string) {
	isFirstLine := true
	for _, b := range blocks(text) {
	switch b.op {
	case opPara:
	if !isFirstLine {
	w.Write(mdNewline)
	}

	for _, line := range b.lines {
	emphasize(w, line, true)
	}
	case opHead:
	if !isFirstLine {
	w.Write(mdNewline)
	}
	w.Write(mdNewline)

	for _, line := range b.lines {
	w.Write(mdHeader)
	commentEscape(w, line, true)
	w.Write(mdNewline)
	}
	case opPre:
	if !isFirstLine {
	w.Write(mdNewline)
	}
	w.Write(mdNewline)

	for _, line := range b.lines {
	if isBlank(line) {
	w.Write(mdNewline)
	} else {
	w.Write(mdIndent)
	w.Write([]byte(line))
	w.Write(mdNewline)
	}
	}
	}
	isFirstLine = false
	}
	}

	const (
	ulquo = "“"
	urquo = "”"
	)

	var (
	markdownEscape = regexp.MustCompile(`([\\\x60*{}[\]()#+\-.!_>~\|"$%&'\/:;<=?@^])`)

	unicodeQuoteReplacer = strings.NewReplacer("``", ulquo, "''", urquo)
	)

	// commentEscape escapes comment text for markdown. If nice is set,
	// also turn `` into “; and '' into ”;.
	func commentEscape(w io.Writer, text string, nice bool) {
	if nice {
	text = convertQuotes(text)
	}
	text = escapeRegex(text)
	w.Write([]byte(text))
	}

	func convertQuotes(text string) string {
	return unicodeQuoteReplacer.Replace(text)
	}

	func escapeRegex(text string) string {
	return markdownEscape.ReplaceAllString(text, `\$1`)
	}

	func emphasize(w io.Writer, line string, nice bool) {
	for {
	m := matchRx.FindStringSubmatchIndex(line)
	if m == nil {
	break
	}
	// m >= 6 (two parenthesized sub-regexps in matchRx, 1st one is urlRx)

	// write text before match
	commentEscape(w, line[0:m[0]], nice)

	// adjust match for URLs
	match := line[m[0]:m[1]]
	if strings.Contains(match, "://") {
	m0, m1 := m[0], m[1]
	for _, s := range []string{"()", "{}", "[]"} {
	open, close := s[:1], s[1:] // E.g., "(" and ")"
	// require opening parentheses before closing parentheses (#22285)
	if i := strings.Index(match, close); i >= 0 && i < strings.Index(match, open) {
	m1 = m0 + i
	match = line[m0:m1]
	}
	// require balanced pairs of parentheses (#5043)
	for i := 0; strings.Count(match, open) != strings.Count(match, close) && i < 10; i++ {
	m1 = strings.LastIndexAny(line[:m1], s)
	match = line[m0:m1]
	}
	}
	if m1 != m[1] {
	// redo matching with shortened line for correct indices
	m = matchRx.FindStringSubmatchIndex(line[:m[0]+len(match)])
	}
	}

	// Following code has been modified from go/doc since words is always
	// nil. All html formatting has also been transformed into markdown formatting

	// analyze match
	url := ""
	if m[2] >= 0 {
	url = match
	}

	// write match
	if len(url) > 0 {
	w.Write(mdLinkStart)
	}

	commentEscape(w, match, nice)

	if len(url) > 0 {
	w.Write(mdLinkDiv)
	w.Write([]byte(urlReplacer.Replace(url)))
	w.Write(mdLinkEnd)
	}

	// advance
	line = line[m[1]:]
	}
	commentEscape(w, line, nice)
	}

	// Everything from here on is a copy of go/doc/comment.go

	const (
	// Regexp for Go identifiers
	identRx = `[\pL_][\pL_0-9]*`

	// Regexp for URLs
	// Match parens, and check later for balance - see #5043, #22285
	// Match .,:;?! within path, but not at end - see #18139, #16565
	// This excludes some rare yet valid urls ending in common punctuation
	// in order to allow sentences ending in URLs.

	// protocol (required) e.g. http
	protoPart = `(https?\|ftp\|file\|gopher\|mailto\|nntp)`
	// host (required) e.g. www.example.com or [::1]:8080
	hostPart = `([a-zA-Z0-9_@\-.\[\]:]+)`
	// path+query+fragment (optional) e.g. /path/index.html?q=foo#bar
	pathPart = `([.,:;?!][a-zA-Z0-9$'()+&#=@~_/\-\[\]%])*`

	urlRx = protoPart + `://` + hostPart + pathPart
	)

	var (
	matchRx = regexp.MustCompile(`(` + urlRx + `)\|(` + identRx + `)`)
	urlReplacer = strings.NewReplacer(`(`, `\(`, `)`, `\)`)
	)

	func indentLen(s string) int {
	i := 0
	for i < len(s) && (s[i] == ' ' \|\| s[i] == '\t') {
	i++
	}
	return i
	}

	func isBlank(s string) bool {
	return len(s) == 0 \|\| (len(s) == 1 && s[0] == '\n')
	}

	func commonPrefix(a, b string) string {
	i := 0
	for i < len(a) && i < len(b) && a[i] == b[i] {
	i++
	}
	return a[0:i]
	}

	func unindent(block []string) {
	if len(block) == 0 {
	return
	}

	// compute maximum common white prefix
	prefix := block[0][0:indentLen(block[0])]
	for _, line := range block {
	if !isBlank(line) {
	prefix = commonPrefix(prefix, line[0:indentLen(line)])
	}
	}
	n := len(prefix)

	// remove
	for i, line := range block {
	if !isBlank(line) {
	block[i] = line[n:]
	}
	}
	}

	// heading returns the trimmed line if it passes as a section heading;
	// otherwise it returns the empty string.
	func heading(line string) string {
	line = strings.TrimSpace(line)
	if len(line) == 0 {
	return ""
	}

	// a heading must start with an uppercase letter
	r, _ := utf8.DecodeRuneInString(line)
	if !unicode.IsLetter(r) \|\| !unicode.IsUpper(r) {
	return ""
	}

	// it must end in a letter or digit:
	r, _ = utf8.DecodeLastRuneInString(line)
	if !unicode.IsLetter(r) && !unicode.IsDigit(r) {
	return ""
	}

	// exclude lines with illegal characters. we allow "(),"
	if strings.ContainsAny(line, ";:!?+*/=[]{}_^°&§~%#@<\">\\") {
	return ""
	}

	// allow "'" for possessive "'s" only
	for b := line; ; {
	i := strings.IndexRune(b, '\'')
	if i < 0 {
	break
	}
	if i+1 >= len(b) \|\| b[i+1] != 's' \|\| (i+2 < len(b) && b[i+2] != ' ') {
	return "" // not followed by "s "
	}
	b = b[i+2:]
	}

	// allow "." when followed by non-space
	for b := line; ; {
	i := strings.IndexRune(b, '.')
	if i < 0 {
	break
	}
	if i+1 >= len(b) \|\| b[i+1] == ' ' {
	return "" // not followed by non-space
	}
	b = b[i+1:]
	}

	return line
	}

	type op int

	const (
	opPara op = iota
	opHead
	opPre
	)

	type block struct {
	op op
	lines []string
	}

	var nonAlphaNumRx = regexp.MustCompile(`[^a-zA-Z0-9]`)

	func anchorID(line string) string {
	// Add a "hdr-" prefix to avoid conflicting with IDs used for package symbols.
	return "hdr-" + nonAlphaNumRx.ReplaceAllString(line, "_")
	}

	func blocks(text string) []block {
	var (
	out []block
	para []string

	lastWasBlank = false
	lastWasHeading = false
	)

	close := func() {
	if para != nil {
	out = append(out, block{opPara, para})
	para = nil
	}
	}

	lines := strings.SplitAfter(text, "\n")
	unindent(lines)
	for i := 0; i < len(lines); {
	line := lines[i]
	if isBlank(line) {
	// close paragraph
	close()
	i++
	lastWasBlank = true
	continue
	}
	if indentLen(line) > 0 {
	// close paragraph
	close()

	// count indented or blank lines
	j := i + 1
	for j < len(lines) && (isBlank(lines[j]) \|\| indentLen(lines[j]) > 0) {
	j++
	}
	// but not trailing blank lines
	for j > i && isBlank(lines[j-1]) {
	j--
	}
	pre := lines[i:j]
	i = j

	unindent(pre)

	// put those lines in a pre block
	out = append(out, block{opPre, pre})
	lastWasHeading = false
	continue
	}

	if lastWasBlank && !lastWasHeading && i+2 < len(lines) &&
	isBlank(lines[i+1]) && !isBlank(lines[i+2]) && indentLen(lines[i+2]) == 0 {
	// current line is non-blank, surrounded by blank lines
	// and the next non-blank line is not indented: this
	// might be a heading.
	if head := heading(line); head != "" {
	close()
	out = append(out, block{opHead, []string{head}})
	i += 2
	lastWasHeading = true
	continue
	}
	}

	// open paragraph
	lastWasBlank = false
	lastWasHeading = false
	para = append(para, lines[i])
	i++
	}
	close()

	return out
	}