internal/frontend/readme.go - pkgsite - Git at Google

 // Copyright 2020 The Go Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.

 package frontend

 import (
 	"bytes"
 	"context"
 	"math"

 	"github.com/google/safehtml"
 	"github.com/google/safehtml/template"
 	"github.com/google/safehtml/uncheckedconversions"
 	"github.com/microcosm-cc/bluemonday"
 	"github.com/yuin/goldmark"
 	emoji "github.com/yuin/goldmark-emoji"
 	"github.com/yuin/goldmark/ast"
 	"github.com/yuin/goldmark/extension"
 	"github.com/yuin/goldmark/parser"
 	"github.com/yuin/goldmark/renderer"
 	goldmarkHtml "github.com/yuin/goldmark/renderer/html"
 	"github.com/yuin/goldmark/text"
 	"github.com/yuin/goldmark/util"
 	"golang.org/x/pkgsite/internal"
 	"golang.org/x/pkgsite/internal/derrors"
 )

 // Heading holds data about a heading within a readme used in the
 // sidebar template to render the readme outline.
 type Heading struct {
 	// Level is the original level of the heading.
 	Level int
 	// Text is the content from the readme contained within a heading.
 	Text string
 	// ID corresponds to the ID attribute for a heading element
 	// and is also used in an href to the corresponding section
 	// within the readme outline. All ids are prefixed with readme-
 	// to avoid name collisions.
 	ID string
 }

 // Readme sanitizes readmeContents and returns a safehtml.HTML. If the readme filepath
 // indicates that this is a markdown file, it will render the markdown contents and
 // generate an outline from the parsed readmeContent's ast. Headings are prefixed with
 // "readme-" and heading levels are adjusted to start at h3 in order to nest them
 // properly within the rest of the page. The readme's original styling is preserved
 // in the html by giving headings a css class styled identical to their original
 // heading level.
 //
 // This function is exported for use in an external tool that uses this package to
 // compare readme files to see how changes in processing will affect them.
 func Readme(ctx context.Context, u *internal.Unit) (_ safehtml.HTML, _ []*Heading, err error) {
 	defer derrors.Wrap(&err, "Readme(%q, %q, %q)", u.Path, u.ModulePath, u.Version)
 	if u.Readme == nil || u.Readme.Contents == "" {
 		return safehtml.HTML{}, nil, nil
 	}
 	if !isMarkdown(u.Readme.Filepath) {
 		t := template.Must(template.New("").Parse(`<pre class="readme">{{.}}</pre>`))
 		h, err := t.ExecuteToHTML(u.Readme.Contents)
 		if err != nil {
 			return safehtml.HTML{}, nil, err
 		}
 		return h, nil, nil
 	}

 	// Sets priority value so that we always use our custom transformer
 	// instead of the default ones. The default values are in:
 	// https://github.com/yuin/goldmark/blob/7b90f04af43131db79ec320be0bd4744079b346f/parser/parser.go#L567
 	const ASTTransformerPriority = 10000
 	gdMarkdown := goldmark.New(
 		goldmark.WithParserOptions(
 			// WithHeadingAttribute allows us to include other attributes in
 			// heading tags. This is useful for our aria-level implementation of
 			// increasing heading rankings.
 			parser.WithHeadingAttribute(),
 			// Generates an id in every heading tag. This is used in github in
 			// order to generate a link with a hash that a user would scroll to
 			// <h1 id="goldmark">goldmark</h1> => github.com/yuin/goldmark#goldmark
 			parser.WithAutoHeadingID(),
 			// Include custom ASTTransformer using the readme and module info to
 			// use translateRelativeLink and translateHTML to modify the AST
 			// before it is rendered.
 			parser.WithASTTransformers(util.Prioritized(&ASTTransformer{
 				info:   u.SourceInfo,
 				readme: u.Readme,
 			}, ASTTransformerPriority)),
 		),
 		// These extensions lets users write HTML code in the README. This is
 		// fine since we process the contents using bluemonday after.
 		goldmark.WithRendererOptions(goldmarkHtml.WithUnsafe(), goldmarkHtml.WithXHTML()),
 		goldmark.WithExtensions(
 			extension.GFM, // Support Github Flavored Markdown.
 			emoji.Emoji,   // Support Github markdown emoji markup.
 		),
 	)
 	gdMarkdown.Renderer().AddOptions(
 		renderer.WithNodeRenderers(
 			util.Prioritized(NewHTMLRenderer(u.SourceInfo, u.Readme), 100),
 		),
 	)
 	contents := []byte(u.Readme.Contents)
 	gdParser := gdMarkdown.Parser()
 	reader := text.NewReader(contents)
 	doc := gdParser.Parse(reader)
 	gdRenderer := gdMarkdown.Renderer()

 	var b bytes.Buffer
 	if err := gdRenderer.Render(&b, contents, doc); err != nil {
 		return safehtml.HTML{}, nil, nil
 	}
 	htmlContent := sanitizeHTML(&b)
 	outline := readmeOutline(doc, contents)
 	return htmlContent, outline, nil
 }

 // sanitizeHTML sanitizes HTML from a bytes.Buffer so that it is safe.
 func sanitizeHTML(b *bytes.Buffer) safehtml.HTML {
 	p := bluemonday.UGCPolicy()

 	p.AllowAttrs("width", "align").OnElements("img")
 	p.AllowAttrs("width", "align").OnElements("div")
 	p.AllowAttrs("width", "align").OnElements("p")
 	// Allow accessible headings (i.e <div role="heading" aria-level="7">).
 	p.AllowAttrs("width", "align", "role", "aria-level").OnElements("div")
 	for _, h := range []string{"h1", "h2", "h3", "h4", "h5", "h6"} {
 		// Needed to preserve github styles heading font-sizes
 		p.AllowAttrs("class").OnElements(h)
 	}

 	s := string(p.SanitizeBytes(b.Bytes()))
 	return uncheckedconversions.HTMLFromStringKnownToSatisfyTypeContract(s)
 }

 // readmeOutline collects the headings from a readme into an outline
 // of the document. It keeps only the top two levels of nesting from
 // any set of headings. See tests for heading levels in TestReadme
 // for behavior.
 func readmeOutline(doc ast.Node, contents []byte) []*Heading {
 	var headings []*Heading
 	// l1 and l2 are used to keep track of the top two heading levels.
 	l1, l2 := math.MaxInt8, math.MaxInt8

 	ast.Walk(doc, func(n ast.Node, entering bool) (ast.WalkStatus, error) {
 		if n.Kind() == ast.KindHeading && entering {
 			heading := n.(*ast.Heading)
 			text := n.Text(contents)
 			section := Heading{
 				Level: heading.Level,
 				Text:  string(text),
 			}
 			if id, ok := heading.AttributeString("id"); ok {
 				section.ID = string(id.([]byte))
 			}
 			headings = append(headings, &section)
 			if heading.Level < l1 {
 				l2, l1 = l1, heading.Level
 			} else if heading.Level < l2 && heading.Level != l1 {
 				l2 = heading.Level
 			}
 			return ast.WalkSkipChildren, nil
 		}
 		return ast.WalkContinue, nil
 	})

 	var filtered []*Heading
 	for _, h := range headings {
 		if h.Level <= l2 {
 			filtered = append(filtered, h)
 		}
 	}
 	return filtered
 }
	// Copyright 2020 The Go Authors. All rights reserved.
	// Use of this source code is governed by a BSD-style
	// license that can be found in the LICENSE file.

	package frontend

	import (
	"bytes"
	"context"
	"math"

	"github.com/google/safehtml"
	"github.com/google/safehtml/template"
	"github.com/google/safehtml/uncheckedconversions"
	"github.com/microcosm-cc/bluemonday"
	"github.com/yuin/goldmark"
	emoji "github.com/yuin/goldmark-emoji"
	"github.com/yuin/goldmark/ast"
	"github.com/yuin/goldmark/extension"
	"github.com/yuin/goldmark/parser"
	"github.com/yuin/goldmark/renderer"
	goldmarkHtml "github.com/yuin/goldmark/renderer/html"
	"github.com/yuin/goldmark/text"
	"github.com/yuin/goldmark/util"
	"golang.org/x/pkgsite/internal"
	"golang.org/x/pkgsite/internal/derrors"
	)

	// Heading holds data about a heading within a readme used in the
	// sidebar template to render the readme outline.
	type Heading struct {
	// Level is the original level of the heading.
	Level int
	// Text is the content from the readme contained within a heading.
	Text string
	// ID corresponds to the ID attribute for a heading element
	// and is also used in an href to the corresponding section
	// within the readme outline. All ids are prefixed with readme-
	// to avoid name collisions.
	ID string
	}

	// Readme sanitizes readmeContents and returns a safehtml.HTML. If the readme filepath
	// indicates that this is a markdown file, it will render the markdown contents and
	// generate an outline from the parsed readmeContent's ast. Headings are prefixed with
	// "readme-" and heading levels are adjusted to start at h3 in order to nest them
	// properly within the rest of the page. The readme's original styling is preserved
	// in the html by giving headings a css class styled identical to their original
	// heading level.
	//
	// This function is exported for use in an external tool that uses this package to
	// compare readme files to see how changes in processing will affect them.
	func Readme(ctx context.Context, u internal.Unit) (_ safehtml.HTML, _ []Heading, err error) {
	defer derrors.Wrap(&err, "Readme(%q, %q, %q)", u.Path, u.ModulePath, u.Version)
	if u.Readme == nil \|\| u.Readme.Contents == "" {
	return safehtml.HTML{}, nil, nil
	}
	if !isMarkdown(u.Readme.Filepath) {
	t := template.Must(template.New("").Parse(`<pre class="readme">{{.}}</pre>`))
	h, err := t.ExecuteToHTML(u.Readme.Contents)
	if err != nil {
	return safehtml.HTML{}, nil, err
	}
	return h, nil, nil
	}

	// Sets priority value so that we always use our custom transformer
	// instead of the default ones. The default values are in:
	// https://github.com/yuin/goldmark/blob/7b90f04af43131db79ec320be0bd4744079b346f/parser/parser.go#L567
	const ASTTransformerPriority = 10000
	gdMarkdown := goldmark.New(
	goldmark.WithParserOptions(
	// WithHeadingAttribute allows us to include other attributes in
	// heading tags. This is useful for our aria-level implementation of
	// increasing heading rankings.
	parser.WithHeadingAttribute(),
	// Generates an id in every heading tag. This is used in github in
	// order to generate a link with a hash that a user would scroll to
	// <h1 id="goldmark">goldmark</h1> => github.com/yuin/goldmark#goldmark
	parser.WithAutoHeadingID(),
	// Include custom ASTTransformer using the readme and module info to
	// use translateRelativeLink and translateHTML to modify the AST
	// before it is rendered.
	parser.WithASTTransformers(util.Prioritized(&ASTTransformer{
	info: u.SourceInfo,
	readme: u.Readme,
	}, ASTTransformerPriority)),
	),
	// These extensions lets users write HTML code in the README. This is
	// fine since we process the contents using bluemonday after.
	goldmark.WithRendererOptions(goldmarkHtml.WithUnsafe(), goldmarkHtml.WithXHTML()),
	goldmark.WithExtensions(
	extension.GFM, // Support Github Flavored Markdown.
	emoji.Emoji, // Support Github markdown emoji markup.
	),
	)
	gdMarkdown.Renderer().AddOptions(
	renderer.WithNodeRenderers(
	util.Prioritized(NewHTMLRenderer(u.SourceInfo, u.Readme), 100),
	),
	)
	contents := []byte(u.Readme.Contents)
	gdParser := gdMarkdown.Parser()
	reader := text.NewReader(contents)
	doc := gdParser.Parse(reader)
	gdRenderer := gdMarkdown.Renderer()

	var b bytes.Buffer
	if err := gdRenderer.Render(&b, contents, doc); err != nil {
	return safehtml.HTML{}, nil, nil
	}
	htmlContent := sanitizeHTML(&b)
	outline := readmeOutline(doc, contents)
	return htmlContent, outline, nil
	}

	// sanitizeHTML sanitizes HTML from a bytes.Buffer so that it is safe.
	func sanitizeHTML(b *bytes.Buffer) safehtml.HTML {
	p := bluemonday.UGCPolicy()

	p.AllowAttrs("width", "align").OnElements("img")
	p.AllowAttrs("width", "align").OnElements("div")
	p.AllowAttrs("width", "align").OnElements("p")
	// Allow accessible headings (i.e <div role="heading" aria-level="7">).
	p.AllowAttrs("width", "align", "role", "aria-level").OnElements("div")
	for _, h := range []string{"h1", "h2", "h3", "h4", "h5", "h6"} {
	// Needed to preserve github styles heading font-sizes
	p.AllowAttrs("class").OnElements(h)
	}

	s := string(p.SanitizeBytes(b.Bytes()))
	return uncheckedconversions.HTMLFromStringKnownToSatisfyTypeContract(s)
	}

	// readmeOutline collects the headings from a readme into an outline
	// of the document. It keeps only the top two levels of nesting from
	// any set of headings. See tests for heading levels in TestReadme
	// for behavior.
	func readmeOutline(doc ast.Node, contents []byte) []*Heading {
	var headings []*Heading
	// l1 and l2 are used to keep track of the top two heading levels.
	l1, l2 := math.MaxInt8, math.MaxInt8

	ast.Walk(doc, func(n ast.Node, entering bool) (ast.WalkStatus, error) {
	if n.Kind() == ast.KindHeading && entering {
	heading := n.(*ast.Heading)
	text := n.Text(contents)
	section := Heading{
	Level: heading.Level,
	Text: string(text),
	}
	if id, ok := heading.AttributeString("id"); ok {
	section.ID = string(id.([]byte))
	}
	headings = append(headings, &section)
	if heading.Level < l1 {
	l2, l1 = l1, heading.Level
	} else if heading.Level < l2 && heading.Level != l1 {
	l2 = heading.Level
	}
	return ast.WalkSkipChildren, nil
	}
	return ast.WalkContinue, nil
	})

	var filtered []*Heading
	for _, h := range headings {
	if h.Level <= l2 {
	filtered = append(filtered, h)
	}
	}
	return filtered
	}