blob: 7f2b9d05fffaadbdea4347b4c161321368967f0b [file] [log] [blame]
// Copyright 2019 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package frontend
import (
"bytes"
"context"
"fmt"
"io"
"net/url"
"path"
"path/filepath"
"strings"
"github.com/google/safehtml"
"github.com/google/safehtml/template"
"github.com/google/safehtml/uncheckedconversions"
"github.com/microcosm-cc/bluemonday"
"github.com/russross/blackfriday/v2"
"golang.org/x/net/html"
"golang.org/x/net/html/atom"
"golang.org/x/pkgsite/internal"
"golang.org/x/pkgsite/internal/derrors"
"golang.org/x/pkgsite/internal/source"
)
// OverviewDetails contains all of the data that the readme template
// needs to populate.
type OverviewDetails struct {
ModulePath string
ModuleURL string
PackageSourceURL string
ReadMe safehtml.HTML
ReadMeSource string
Redistributable bool
RepositoryURL string
}
// fetchOverviewDetails uses the given version to fetch an OverviewDetails.
// versionedLinks says whether the constructed URLs should have versions.
func fetchOverviewDetails(ctx context.Context, ds internal.DataSource, um *internal.UnitMeta, versionedLinks bool) (*OverviewDetails, error) {
u, err := ds.GetUnit(ctx, um, internal.WithReadme)
if err != nil {
return nil, err
}
var readme *internal.Readme
if u.Readme != nil {
readme = &internal.Readme{Filepath: u.Readme.Filepath, Contents: u.Readme.Contents}
}
mi := &internal.ModuleInfo{
ModulePath: um.ModulePath,
Version: um.Version,
CommitTime: um.CommitTime,
IsRedistributable: um.IsRedistributable,
SourceInfo: u.SourceInfo,
}
return constructOverviewDetails(ctx, mi, readme, u.IsRedistributable, versionedLinks)
}
// constructOverviewDetails uses the given module version and readme to
// construct an OverviewDetails. versionedLinks says whether the constructed URLs should have versions.
func constructOverviewDetails(ctx context.Context, mi *internal.ModuleInfo, readme *internal.Readme, isRedistributable bool, versionedLinks bool) (*OverviewDetails, error) {
var lv string
if versionedLinks {
lv = linkVersion(mi.Version, mi.ModulePath)
} else {
lv = internal.LatestVersion
}
overview := &OverviewDetails{
ModulePath: mi.ModulePath,
ModuleURL: constructModuleURL(mi.ModulePath, lv),
RepositoryURL: mi.SourceInfo.RepoURL(),
Redistributable: isRedistributable,
}
if overview.Redistributable && readme != nil {
overview.ReadMeSource = fileSource(mi.ModulePath, mi.Version, readme.Filepath)
r, err := ReadmeHTML(ctx, mi, readme)
if err != nil {
return nil, err
}
overview.ReadMe = r
}
return overview, nil
}
// fetchPackageOverviewDetails uses data for the given versioned directory to return an OverviewDetails.
func fetchPackageOverviewDetails(ctx context.Context, ds internal.DataSource, um *internal.UnitMeta, versionedLinks bool) (*OverviewDetails, error) {
od, err := fetchOverviewDetails(ctx, ds, um, versionedLinks)
if err != nil {
return nil, err
}
od.RepositoryURL = um.SourceInfo.RepoURL()
od.PackageSourceURL = um.SourceInfo.DirectoryURL(internal.Suffix(um.Path, um.ModulePath))
return od, nil
}
// ReadmeHTML sanitizes readmeContents based on bluemondy.UGCPolicy and returns
// a safehtml.HTML. If readmeFilePath indicates that this is a markdown file,
// it will also render the markdown contents using blackfriday.
//
// It is exported to support external testing.
func ReadmeHTML(ctx context.Context, mi *internal.ModuleInfo, readme *internal.Readme) (_ safehtml.HTML, err error) {
defer derrors.Wrap(&err, "readmeHTML(%s@%s)", mi.ModulePath, mi.Version)
if readme == nil || readme.Contents == "" {
return safehtml.HTML{}, nil
}
if !isMarkdown(readme.Filepath) {
t := template.Must(template.New("").Parse(`<pre class="readme">{{.}}</pre>`))
h, err := t.ExecuteToHTML(readme.Contents)
if err != nil {
return safehtml.HTML{}, err
}
return h, nil
}
// blackfriday.Run() uses CommonHTMLFlags and CommonExtensions by default.
renderer := blackfriday.NewHTMLRenderer(blackfriday.HTMLRendererParameters{Flags: blackfriday.CommonHTMLFlags})
parser := blackfriday.New(blackfriday.WithExtensions(blackfriday.CommonExtensions | blackfriday.AutoHeadingIDs))
// Render HTML similar to blackfriday.Run(), but here we implement a custom
// Walk function in order to modify image paths in the rendered HTML.
b := &bytes.Buffer{}
contents := bytes.ReplaceAll([]byte(readme.Contents), []byte("\r"), nil)
rootNode := parser.Parse(contents)
var walkErr error
rootNode.Walk(func(node *blackfriday.Node, entering bool) blackfriday.WalkStatus {
switch node.Type {
case blackfriday.Image, blackfriday.Link:
useRaw := node.Type == blackfriday.Image
if d := translateRelativeLink(string(node.LinkData.Destination), mi.SourceInfo, useRaw, readme); d != "" {
node.LinkData.Destination = []byte(d)
}
case blackfriday.HTMLBlock, blackfriday.HTMLSpan:
d, err := translateHTML(node.Literal, mi.SourceInfo, readme)
if err != nil {
walkErr = fmt.Errorf("couldn't transform html block(%s): %w", node.Literal, err)
return blackfriday.Terminate
}
node.Literal = d
}
return renderer.RenderNode(b, node, entering)
})
if walkErr != nil {
return safehtml.HTML{}, walkErr
}
return sanitizeHTML(b), nil
}
// sanitizeHTML reads HTML from r and sanitizes it to ensure it is safe.
func sanitizeHTML(r io.Reader) safehtml.HTML {
// bluemonday.UGCPolicy allows a broad selection of HTML elements and
// attributes that are safe for user generated content. This policy does
// not allow iframes, object, embed, styles, script, etc.
p := bluemonday.UGCPolicy()
// Allow width and align attributes on img, div, and p tags.
// This is used to center elements in a readme as well as to size it
// images appropriately where used, like the gin-gonic/logo/color.png
// image in the github.com/gin-gonic/gin README.
p.AllowAttrs("width", "align").OnElements("img")
p.AllowAttrs("width", "align").OnElements("div")
p.AllowAttrs("width", "align").OnElements("p")
s := p.SanitizeReader(r).String()
// Trust that bluemonday properly sanitizes the HTML.
return uncheckedconversions.HTMLFromStringKnownToSatisfyTypeContract(s)
}
// isMarkdown reports whether filename says that the file contains markdown.
func isMarkdown(filename string) bool {
ext := strings.ToLower(filepath.Ext(filename))
// https://tools.ietf.org/html/rfc7763 mentions both extensions.
return ext == ".md" || ext == ".markdown"
}
// translateRelativeLink converts relative image paths to absolute paths.
//
// README files sometimes use relative image paths to image files inside the
// repository. As the discovery site doesn't host the full repository content,
// in order for the image to render, we need to convert the relative path to an
// absolute URL to a hosted image.
func translateRelativeLink(dest string, info *source.Info, useRaw bool, readme *internal.Readme) string {
destURL, err := url.Parse(dest)
if err != nil || destURL.IsAbs() {
return ""
}
if destURL.Path == "" {
// This is a fragment; leave it.
return ""
}
// Paths are relative to the README location.
destPath := path.Join(path.Dir(readme.Filepath), path.Clean(trimmedEscapedPath(destURL)))
if useRaw {
return info.RawURL(destPath)
}
return info.FileURL(destPath)
}
// trimmedEscapedPath trims surrounding whitespace from u's path, then returns it escaped.
func trimmedEscapedPath(u *url.URL) string {
u.Path = strings.TrimSpace(u.Path)
return u.EscapedPath()
}
// translateHTML parses html text into parsed html nodes. It then
// iterates through the nodes and replaces the src key with a value
// that properly represents the source of the image from the repo.
func translateHTML(htmlText []byte, info *source.Info, readme *internal.Readme) (_ []byte, err error) {
defer derrors.Wrap(&err, "translateHTML(readme.Filepath=%s)", readme.Filepath)
r := bytes.NewReader(htmlText)
nodes, err := html.ParseFragment(r, nil)
if err != nil {
return nil, err
}
var buf bytes.Buffer
changed := false
for _, n := range nodes {
// We expect every parsed node to begin with <html><head></head><body>.
if n.DataAtom != atom.Html {
return nil, fmt.Errorf("top-level node is %q, expected 'html'", n.DataAtom)
}
// When the parsed html nodes don't have a valid structure
// (i.e: an html comment), then just return the original text.
if n.FirstChild == nil || n.FirstChild.NextSibling == nil || n.FirstChild.NextSibling.DataAtom != atom.Body {
return htmlText, nil
}
n = n.FirstChild.NextSibling
// n is now the body node. Walk all its children.
for c := n.FirstChild; c != nil; c = c.NextSibling {
if walkHTML(c, info, readme) {
changed = true
}
if err := html.Render(&buf, c); err != nil {
return nil, err
}
}
}
if changed {
return buf.Bytes(), nil
}
// If there were no changes, return the original.
return htmlText, nil
}
// walkHTML crawls through an html node and replaces the src
// tag link with a link that properly represents the image
// from the repo source.
// It reports whether it made a change.
func walkHTML(n *html.Node, info *source.Info, readme *internal.Readme) bool {
changed := false
if n.Type == html.ElementNode && n.DataAtom == atom.Img {
var attrs []html.Attribute
for _, a := range n.Attr {
if a.Key == "src" {
if v := translateRelativeLink(a.Val, info, true, readme); v != "" {
a.Val = v
changed = true
}
}
attrs = append(attrs, a)
}
n.Attr = attrs
}
for c := n.FirstChild; c != nil; c = c.NextSibling {
if walkHTML(c, info, readme) {
changed = true
}
}
return changed
}