internal/frontend/readme.go - pkgsite - Git at Google

 // Copyright 2020 The Go Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.

 package frontend

 import (
 	"bytes"
 	"context"

 	"github.com/google/safehtml"
 	"github.com/google/safehtml/template"
 	"github.com/google/safehtml/uncheckedconversions"
 	"github.com/microcosm-cc/bluemonday"
 	"github.com/yuin/goldmark"
 	emoji "github.com/yuin/goldmark-emoji"
 	"github.com/yuin/goldmark/extension"
 	"github.com/yuin/goldmark/parser"
 	"github.com/yuin/goldmark/renderer"
 	goldmarkHtml "github.com/yuin/goldmark/renderer/html"
 	gmtext "github.com/yuin/goldmark/text"
 	"github.com/yuin/goldmark/util"
 	"golang.org/x/pkgsite/internal"
 	"golang.org/x/pkgsite/internal/derrors"
 	"golang.org/x/pkgsite/internal/log"
 	"golang.org/x/pkgsite/internal/source"
 )

 // Heading holds data about a heading and nested headings within a readme.
 // This data is used in the sidebar template to render the readme outline.
 type Heading struct {
 	// Level is the original level of the heading.
 	Level int
 	// Text is the content from the readme contained within a heading.
 	Text string
 	// ID corresponds to the ID attribute for a heading element
 	// and is also used in an href to the corresponding section
 	// within the readme outline. All ids are prefixed with readme-
 	// to avoid name collisions.
 	ID string
 	// Children are nested headings.
 	Children []*Heading
 	// parent is the heading this heading is nested within. Nil for top
 	// level headings.
 	parent *Heading
 }

 // Readme holds the result of processing a REAME file.
 type Readme struct {
 	HTML    safehtml.HTML // rendered HTML
 	Outline []*Heading    // document headings
 	Links   []link        // links from the "Links" section
 }

 // ProcessReadme processes the README of unit u, if it has one.
 // Processing includes rendering and sanitizing the HTML or Markdown,
 // and extracting headings and links.
 //
 // Headings are prefixed with "readme-" and heading levels are adjusted to start
 // at h3 in order to nest them properly within the rest of the page. The
 // readme's original styling is preserved in the html by giving headings a css
 // class styled identical to their original heading level.
 //
 //  The extracted links are for display outside of the readme contents.
 //
 // This function is exported for use by external tools.
 func ProcessReadme(ctx context.Context, u *internal.Unit) (_ *Readme, err error) {
 	defer derrors.WrapAndReport(&err, "ProcessReadme(%q, %q, %q)", u.Path, u.ModulePath, u.Version)
 	return processReadme(ctx, u.Readme, u.SourceInfo)
 }

 func processReadme(ctx context.Context, readme *internal.Readme, sourceInfo *source.Info) (frontendReadme *Readme, err error) {
 	if readme == nil || readme.Contents == "" {
 		return &Readme{}, nil
 	}
 	if !isMarkdown(readme.Filepath) {
 		t := template.Must(template.New("").Parse(`<pre class="readme">{{.}}</pre>`))
 		h, err := t.ExecuteToHTML(readme.Contents)
 		if err != nil {
 			return nil, err
 		}
 		return &Readme{HTML: h}, nil
 	}

 	// Sets priority value so that we always use our custom transformer
 	// instead of the default ones. The default values are in:
 	// https://github.com/yuin/goldmark/blob/7b90f04af43131db79ec320be0bd4744079b346f/parser/parser.go#L567
 	const astTransformerPriority = 10000
 	el := &extractLinks{ctx: ctx}
 	et := &extractTOC{ctx: ctx, removeTitle: true}
 	gdMarkdown := goldmark.New(
 		goldmark.WithParserOptions(
 			// WithHeadingAttribute allows us to include other attributes in
 			// heading tags. This is useful for our aria-level implementation of
 			// increasing heading rankings.
 			parser.WithHeadingAttribute(),
 			// Generates an id in every heading tag. This is used in github in
 			// order to generate a link with a hash that a user would scroll to
 			// <h1 id="goldmark">goldmark</h1> => github.com/yuin/goldmark#goldmark
 			parser.WithAutoHeadingID(),
 			// Include custom ASTTransformer using the readme and module info to
 			// use translateRelativeLink and translateHTML to modify the AST
 			// before it is rendered.
 			parser.WithASTTransformers(
 				util.Prioritized(&astTransformer{
 					info:   sourceInfo,
 					readme: readme,
 				}, astTransformerPriority),
 				// Extract links after we have transformed the URLs.
 				util.Prioritized(el, astTransformerPriority+1),
 				util.Prioritized(et, astTransformerPriority+1),
 			),
 		),
 		// These extensions lets users write HTML code in the README. This is
 		// fine since we process the contents using bluemonday after.
 		goldmark.WithRendererOptions(goldmarkHtml.WithUnsafe(), goldmarkHtml.WithXHTML()),
 		goldmark.WithExtensions(
 			extension.GFM, // Support Github Flavored Markdown.
 			emoji.Emoji,   // Support Github markdown emoji markup.
 		),
 	)
 	gdMarkdown.Renderer().AddOptions(
 		renderer.WithNodeRenderers(
 			util.Prioritized(newHTMLRenderer(sourceInfo, readme), 100),
 		),
 	)
 	contents := []byte(readme.Contents)
 	gdParser := gdMarkdown.Parser()
 	reader := gmtext.NewReader(contents)
 	pctx := parser.NewContext(parser.WithIDs(newIDs()))
 	doc := gdParser.Parse(reader, parser.WithContext(pctx))
 	gdRenderer := gdMarkdown.Renderer()

 	var b bytes.Buffer
 	defer func() {
 		// It's possible for gdRenderer.Render to panic. For example,
 		// https://pkg.go.dev/github.com/jinghzhu/k8scrd/pkg/crd/jinghzhu/v1
 		// results in a panic because gdRenderer.Render tries to index a slice
 		// out of bounds.
 		//
 		// In case of a panic from gdRenderer.Render, treat this as a normal
 		// error from that function.
 		if p := recover(); p != nil {
 			log.Debugf(ctx, "gdRenderer.Render: %v", p)
 			frontendReadme = &Readme{}
 			err = nil
 		}
 	}()
 	if err := gdRenderer.Render(&b, contents, doc); err != nil {
 		log.Debugf(ctx, "gdRenderer.Render: %v", err)
 		return &Readme{}, nil
 	}
 	return &Readme{
 		HTML:    sanitizeHTML(&b),
 		Outline: et.Headings,
 		Links:   el.links,
 	}, nil
 }

 // sanitizeHTML sanitizes HTML from a bytes.Buffer so that it is safe.
 func sanitizeHTML(b *bytes.Buffer) safehtml.HTML {
 	p := bluemonday.UGCPolicy()

 	p.AllowAttrs("width", "align").OnElements("img")
 	p.AllowAttrs("width", "align").OnElements("div")
 	p.AllowAttrs("width", "align").OnElements("p")
 	// Allow accessible headings (i.e <div role="heading" aria-level="7">).
 	p.AllowAttrs("width", "align", "role", "aria-level").OnElements("div")
 	for _, h := range []string{"h1", "h2", "h3", "h4", "h5", "h6"} {
 		// Needed to preserve github styles heading font-sizes
 		p.AllowAttrs("class").OnElements(h)
 	}

 	s := string(p.SanitizeBytes(b.Bytes()))
 	return uncheckedconversions.HTMLFromStringKnownToSatisfyTypeContract(s)
 }
	// Copyright 2020 The Go Authors. All rights reserved.
	// Use of this source code is governed by a BSD-style
	// license that can be found in the LICENSE file.

	package frontend

	import (
	"bytes"
	"context"

	"github.com/google/safehtml"
	"github.com/google/safehtml/template"
	"github.com/google/safehtml/uncheckedconversions"
	"github.com/microcosm-cc/bluemonday"
	"github.com/yuin/goldmark"
	emoji "github.com/yuin/goldmark-emoji"
	"github.com/yuin/goldmark/extension"
	"github.com/yuin/goldmark/parser"
	"github.com/yuin/goldmark/renderer"
	goldmarkHtml "github.com/yuin/goldmark/renderer/html"
	gmtext "github.com/yuin/goldmark/text"
	"github.com/yuin/goldmark/util"
	"golang.org/x/pkgsite/internal"
	"golang.org/x/pkgsite/internal/derrors"
	"golang.org/x/pkgsite/internal/log"
	"golang.org/x/pkgsite/internal/source"
	)

	// Heading holds data about a heading and nested headings within a readme.
	// This data is used in the sidebar template to render the readme outline.
	type Heading struct {
	// Level is the original level of the heading.
	Level int
	// Text is the content from the readme contained within a heading.
	Text string
	// ID corresponds to the ID attribute for a heading element
	// and is also used in an href to the corresponding section
	// within the readme outline. All ids are prefixed with readme-
	// to avoid name collisions.
	ID string
	// Children are nested headings.
	Children []*Heading
	// parent is the heading this heading is nested within. Nil for top
	// level headings.
	parent *Heading
	}

	// Readme holds the result of processing a REAME file.
	type Readme struct {
	HTML safehtml.HTML // rendered HTML
	Outline []*Heading // document headings
	Links []link // links from the "Links" section
	}

	// ProcessReadme processes the README of unit u, if it has one.
	// Processing includes rendering and sanitizing the HTML or Markdown,
	// and extracting headings and links.
	//
	// Headings are prefixed with "readme-" and heading levels are adjusted to start
	// at h3 in order to nest them properly within the rest of the page. The
	// readme's original styling is preserved in the html by giving headings a css
	// class styled identical to their original heading level.
	//
	// The extracted links are for display outside of the readme contents.
	//
	// This function is exported for use by external tools.
	func ProcessReadme(ctx context.Context, u internal.Unit) (_ Readme, err error) {
	defer derrors.WrapAndReport(&err, "ProcessReadme(%q, %q, %q)", u.Path, u.ModulePath, u.Version)
	return processReadme(ctx, u.Readme, u.SourceInfo)
	}

	func processReadme(ctx context.Context, readme internal.Readme, sourceInfo source.Info) (frontendReadme *Readme, err error) {
	if readme == nil \|\| readme.Contents == "" {
	return &Readme{}, nil
	}
	if !isMarkdown(readme.Filepath) {
	t := template.Must(template.New("").Parse(`<pre class="readme">{{.}}</pre>`))
	h, err := t.ExecuteToHTML(readme.Contents)
	if err != nil {
	return nil, err
	}
	return &Readme{HTML: h}, nil
	}

	// Sets priority value so that we always use our custom transformer
	// instead of the default ones. The default values are in:
	// https://github.com/yuin/goldmark/blob/7b90f04af43131db79ec320be0bd4744079b346f/parser/parser.go#L567
	const astTransformerPriority = 10000
	el := &extractLinks{ctx: ctx}
	et := &extractTOC{ctx: ctx, removeTitle: true}
	gdMarkdown := goldmark.New(
	goldmark.WithParserOptions(
	// WithHeadingAttribute allows us to include other attributes in
	// heading tags. This is useful for our aria-level implementation of
	// increasing heading rankings.
	parser.WithHeadingAttribute(),
	// Generates an id in every heading tag. This is used in github in
	// order to generate a link with a hash that a user would scroll to
	// <h1 id="goldmark">goldmark</h1> => github.com/yuin/goldmark#goldmark
	parser.WithAutoHeadingID(),
	// Include custom ASTTransformer using the readme and module info to
	// use translateRelativeLink and translateHTML to modify the AST
	// before it is rendered.
	parser.WithASTTransformers(
	util.Prioritized(&astTransformer{
	info: sourceInfo,
	readme: readme,
	}, astTransformerPriority),
	// Extract links after we have transformed the URLs.
	util.Prioritized(el, astTransformerPriority+1),
	util.Prioritized(et, astTransformerPriority+1),
	),
	),
	// These extensions lets users write HTML code in the README. This is
	// fine since we process the contents using bluemonday after.
	goldmark.WithRendererOptions(goldmarkHtml.WithUnsafe(), goldmarkHtml.WithXHTML()),
	goldmark.WithExtensions(
	extension.GFM, // Support Github Flavored Markdown.
	emoji.Emoji, // Support Github markdown emoji markup.
	),
	)
	gdMarkdown.Renderer().AddOptions(
	renderer.WithNodeRenderers(
	util.Prioritized(newHTMLRenderer(sourceInfo, readme), 100),
	),
	)
	contents := []byte(readme.Contents)
	gdParser := gdMarkdown.Parser()
	reader := gmtext.NewReader(contents)
	pctx := parser.NewContext(parser.WithIDs(newIDs()))
	doc := gdParser.Parse(reader, parser.WithContext(pctx))
	gdRenderer := gdMarkdown.Renderer()

	var b bytes.Buffer
	defer func() {
	// It's possible for gdRenderer.Render to panic. For example,
	// https://pkg.go.dev/github.com/jinghzhu/k8scrd/pkg/crd/jinghzhu/v1
	// results in a panic because gdRenderer.Render tries to index a slice
	// out of bounds.
	//
	// In case of a panic from gdRenderer.Render, treat this as a normal
	// error from that function.
	if p := recover(); p != nil {
	log.Debugf(ctx, "gdRenderer.Render: %v", p)
	frontendReadme = &Readme{}
	err = nil
	}
	}()
	if err := gdRenderer.Render(&b, contents, doc); err != nil {
	log.Debugf(ctx, "gdRenderer.Render: %v", err)
	return &Readme{}, nil
	}
	return &Readme{
	HTML: sanitizeHTML(&b),
	Outline: et.Headings,
	Links: el.links,
	}, nil
	}

	// sanitizeHTML sanitizes HTML from a bytes.Buffer so that it is safe.
	func sanitizeHTML(b *bytes.Buffer) safehtml.HTML {
	p := bluemonday.UGCPolicy()

	p.AllowAttrs("width", "align").OnElements("img")
	p.AllowAttrs("width", "align").OnElements("div")
	p.AllowAttrs("width", "align").OnElements("p")
	// Allow accessible headings (i.e <div role="heading" aria-level="7">).
	p.AllowAttrs("width", "align", "role", "aria-level").OnElements("div")
	for _, h := range []string{"h1", "h2", "h3", "h4", "h5", "h6"} {
	// Needed to preserve github styles heading font-sizes
	p.AllowAttrs("class").OnElements(h)
	}

	s := string(p.SanitizeBytes(b.Bytes()))
	return uncheckedconversions.HTMLFromStringKnownToSatisfyTypeContract(s)
	}