blob: a54211e23b009e825c1854d45cb8f2d25f697c51 [file] [log] [blame]
// Copyright 2020 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package frontend
import (
"bytes"
"context"
"github.com/google/safehtml"
"github.com/google/safehtml/template"
"github.com/google/safehtml/uncheckedconversions"
"github.com/microcosm-cc/bluemonday"
"github.com/yuin/goldmark"
emoji "github.com/yuin/goldmark-emoji"
"github.com/yuin/goldmark/extension"
"github.com/yuin/goldmark/parser"
"github.com/yuin/goldmark/renderer"
goldmarkHtml "github.com/yuin/goldmark/renderer/html"
gmtext "github.com/yuin/goldmark/text"
"github.com/yuin/goldmark/util"
"golang.org/x/pkgsite/internal"
"golang.org/x/pkgsite/internal/derrors"
"golang.org/x/pkgsite/internal/log"
"golang.org/x/pkgsite/internal/source"
)
// Heading holds data about a heading and nested headings within a readme.
// This data is used in the sidebar template to render the readme outline.
type Heading struct {
// Level is the original level of the heading.
Level int
// Text is the content from the readme contained within a heading.
Text string
// ID corresponds to the ID attribute for a heading element
// and is also used in an href to the corresponding section
// within the readme outline. All ids are prefixed with readme-
// to avoid name collisions.
ID string
// Children are nested headings.
Children []*Heading
// parent is the heading this heading is nested within. Nil for top
// level headings.
parent *Heading
}
// Readme holds the result of processing a REAME file.
type Readme struct {
HTML safehtml.HTML // rendered HTML
Outline []*Heading // document headings
Links []link // links from the "Links" section
}
// ProcessReadme processes the README of unit u, if it has one.
// Processing includes rendering and sanitizing the HTML or Markdown,
// and extracting headings and links.
//
// Headings are prefixed with "readme-" and heading levels are adjusted to start
// at h3 in order to nest them properly within the rest of the page. The
// readme's original styling is preserved in the html by giving headings a css
// class styled identical to their original heading level.
//
// The extracted links are for display outside of the readme contents.
//
// This function is exported for use by external tools.
func ProcessReadme(ctx context.Context, u *internal.Unit) (_ *Readme, err error) {
defer derrors.WrapAndReport(&err, "ProcessReadme(%q, %q, %q)", u.Path, u.ModulePath, u.Version)
return processReadme(ctx, u.Readme, u.SourceInfo)
}
func processReadme(ctx context.Context, readme *internal.Readme, sourceInfo *source.Info) (frontendReadme *Readme, err error) {
if readme == nil || readme.Contents == "" {
return &Readme{}, nil
}
if !isMarkdown(readme.Filepath) {
t := template.Must(template.New("").Parse(`<pre class="readme">{{.}}</pre>`))
h, err := t.ExecuteToHTML(readme.Contents)
if err != nil {
return nil, err
}
return &Readme{HTML: h}, nil
}
// Sets priority value so that we always use our custom transformer
// instead of the default ones. The default values are in:
// https://github.com/yuin/goldmark/blob/7b90f04af43131db79ec320be0bd4744079b346f/parser/parser.go#L567
const astTransformerPriority = 10000
el := &extractLinks{ctx: ctx}
et := &extractTOC{ctx: ctx, removeTitle: true}
gdMarkdown := goldmark.New(
goldmark.WithParserOptions(
// WithHeadingAttribute allows us to include other attributes in
// heading tags. This is useful for our aria-level implementation of
// increasing heading rankings.
parser.WithHeadingAttribute(),
// Generates an id in every heading tag. This is used in github in
// order to generate a link with a hash that a user would scroll to
// <h1 id="goldmark">goldmark</h1> => github.com/yuin/goldmark#goldmark
parser.WithAutoHeadingID(),
// Include custom ASTTransformer using the readme and module info to
// use translateRelativeLink and translateHTML to modify the AST
// before it is rendered.
parser.WithASTTransformers(
util.Prioritized(&astTransformer{
info: sourceInfo,
readme: readme,
}, astTransformerPriority),
// Extract links after we have transformed the URLs.
util.Prioritized(el, astTransformerPriority+1),
util.Prioritized(et, astTransformerPriority+1),
),
),
// These extensions lets users write HTML code in the README. This is
// fine since we process the contents using bluemonday after.
goldmark.WithRendererOptions(goldmarkHtml.WithUnsafe(), goldmarkHtml.WithXHTML()),
goldmark.WithExtensions(
extension.GFM, // Support Github Flavored Markdown.
emoji.Emoji, // Support Github markdown emoji markup.
),
)
gdMarkdown.Renderer().AddOptions(
renderer.WithNodeRenderers(
util.Prioritized(newHTMLRenderer(sourceInfo, readme), 100),
),
)
contents := []byte(readme.Contents)
gdParser := gdMarkdown.Parser()
reader := gmtext.NewReader(contents)
pctx := parser.NewContext(parser.WithIDs(newIDs()))
doc := gdParser.Parse(reader, parser.WithContext(pctx))
gdRenderer := gdMarkdown.Renderer()
var b bytes.Buffer
defer func() {
// It's possible for gdRenderer.Render to panic. For example,
// https://pkg.go.dev/github.com/jinghzhu/k8scrd/pkg/crd/jinghzhu/v1
// results in a panic because gdRenderer.Render tries to index a slice
// out of bounds.
//
// In case of a panic from gdRenderer.Render, treat this as a normal
// error from that function.
if p := recover(); p != nil {
log.Debugf(ctx, "gdRenderer.Render: %v", p)
frontendReadme = &Readme{}
err = nil
}
}()
if err := gdRenderer.Render(&b, contents, doc); err != nil {
log.Debugf(ctx, "gdRenderer.Render: %v", err)
return &Readme{}, nil
}
return &Readme{
HTML: sanitizeHTML(&b),
Outline: et.Headings,
Links: el.links,
}, nil
}
// sanitizeHTML sanitizes HTML from a bytes.Buffer so that it is safe.
func sanitizeHTML(b *bytes.Buffer) safehtml.HTML {
p := bluemonday.UGCPolicy()
p.AllowAttrs("width", "align").OnElements("img")
p.AllowAttrs("width", "align").OnElements("div")
p.AllowAttrs("width", "align").OnElements("p")
// Allow accessible headings (i.e <div role="heading" aria-level="7">).
p.AllowAttrs("width", "align", "role", "aria-level").OnElements("div")
for _, h := range []string{"h1", "h2", "h3", "h4", "h5", "h6"} {
// Needed to preserve github styles heading font-sizes
p.AllowAttrs("class").OnElements(h)
}
s := string(p.SanitizeBytes(b.Bytes()))
return uncheckedconversions.HTMLFromStringKnownToSatisfyTypeContract(s)
}