blob: 306d137136c489201849a8c9bf715cc74e46a9b8 [file] [log] [blame]
// Copyright 2020 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package frontend
import (
emoji ""
goldmarkHtml ""
// Heading holds data about a heading within a readme used in the
// sidebar template to render the readme outline.
type Heading struct {
// Level is the original level of the heading.
Level int
// Text is the content from the readme contained within a heading.
Text string
// ID corresponds to the ID attribute for a heading element
// and is also used in an href to the corresponding section
// within the readme outline. All ids are prefixed with readme-
// to avoid name collisions.
ID string
// Readme sanitizes readmeContents and returns a safehtml.HTML. If the readme filepath
// indicates that this is a markdown file, it will render the markdown contents and
// generate an outline from the parsed readmeContent's ast. Headings are prefixed with
// "readme-" and heading levels are adjusted to start at h3 in order to nest them
// properly within the rest of the page. The readme's original styling is preserved
// in the html by giving headings a css class styled identical to their original
// heading level.
// This function is exported for use in an external tool that uses this package to
// compare readme files to see how changes in processing will affect them.
func Readme(ctx context.Context, u *internal.Unit) (_ safehtml.HTML, _ []*Heading, err error) {
defer derrors.Wrap(&err, "Readme(%q, %q, %q)", u.Path, u.ModulePath, u.Version)
if u.Readme == nil || u.Readme.Contents == "" {
return safehtml.HTML{}, nil, nil
if !isMarkdown(u.Readme.Filepath) {
t := template.Must(template.New("").Parse(`<pre class="readme">{{.}}</pre>`))
h, err := t.ExecuteToHTML(u.Readme.Contents)
if err != nil {
return safehtml.HTML{}, nil, err
return h, nil, nil
// Sets priority value so that we always use our custom transformer
// instead of the default ones. The default values are in:
const ASTTransformerPriority = 10000
gdMarkdown := goldmark.New(
// WithHeadingAttribute allows us to include other attributes in
// heading tags. This is useful for our aria-level implementation of
// increasing heading rankings.
// Generates an id in every heading tag. This is used in github in
// order to generate a link with a hash that a user would scroll to
// <h1 id="goldmark">goldmark</h1> =>
// Include custom ASTTransformer using the readme and module info to
// use translateRelativeLink and translateHTML to modify the AST
// before it is rendered.
info: u.SourceInfo,
readme: u.Readme,
}, ASTTransformerPriority)),
// These extensions lets users write HTML code in the README. This is
// fine since we process the contents using bluemonday after.
goldmark.WithRendererOptions(goldmarkHtml.WithUnsafe(), goldmarkHtml.WithXHTML()),
extension.GFM, // Support Github Flavored Markdown.
emoji.Emoji, // Support Github markdown emoji markup.
util.Prioritized(NewHTMLRenderer(u.SourceInfo, u.Readme), 100),
contents := []byte(u.Readme.Contents)
gdParser := gdMarkdown.Parser()
reader := text.NewReader(contents)
pctx := parser.NewContext(parser.WithIDs(NewIDs()))
doc := gdParser.Parse(reader, parser.WithContext(pctx))
gdRenderer := gdMarkdown.Renderer()
var b bytes.Buffer
if err := gdRenderer.Render(&b, contents, doc); err != nil {
return safehtml.HTML{}, nil, nil
htmlContent := sanitizeHTML(&b)
outline := readmeOutline(doc, contents)
return htmlContent, outline, nil
// sanitizeHTML sanitizes HTML from a bytes.Buffer so that it is safe.
func sanitizeHTML(b *bytes.Buffer) safehtml.HTML {
p := bluemonday.UGCPolicy()
p.AllowAttrs("width", "align").OnElements("img")
p.AllowAttrs("width", "align").OnElements("div")
p.AllowAttrs("width", "align").OnElements("p")
// Allow accessible headings (i.e <div role="heading" aria-level="7">).
p.AllowAttrs("width", "align", "role", "aria-level").OnElements("div")
for _, h := range []string{"h1", "h2", "h3", "h4", "h5", "h6"} {
// Needed to preserve github styles heading font-sizes
s := string(p.SanitizeBytes(b.Bytes()))
return uncheckedconversions.HTMLFromStringKnownToSatisfyTypeContract(s)
// readmeOutline collects the headings from a readme into an outline
// of the document. It keeps only the top two levels of nesting from
// any set of headings. See tests for heading levels in TestReadme
// for behavior.
func readmeOutline(doc ast.Node, contents []byte) []*Heading {
var headings []*Heading
// l1 and l2 are used to keep track of the top two heading levels.
l1, l2 := math.MaxInt8, math.MaxInt8
ast.Walk(doc, func(n ast.Node, entering bool) (ast.WalkStatus, error) {
if n.Kind() == ast.KindHeading && entering {
var buffer bytes.Buffer
for c := n.FirstChild(); c != nil; c = c.NextSibling() {
// We keep only text content from the headings.
if c.Kind() == ast.KindText {
heading := n.(*ast.Heading)
section := Heading{
Level: heading.Level,
Text: buffer.String(),
if id, ok := heading.AttributeString("id"); ok {
section.ID = string(id.([]byte))
headings = append(headings, &section)
if heading.Level < l1 {
l2, l1 = l1, heading.Level
} else if heading.Level < l2 && heading.Level != l1 {
l2 = heading.Level
return ast.WalkSkipChildren, nil
return ast.WalkContinue, nil
var filtered []*Heading
for _, h := range headings {
if h.Level <= l2 {
filtered = append(filtered, h)
return filtered