blob: 0c076b046fe77df007d908276bd8f3fcb7f4169e [file] [log] [blame]
// Copyright 2019 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package frontend
import (
"bytes"
"context"
"fmt"
"html/template"
"net/url"
"path"
"path/filepath"
"strings"
"github.com/microcosm-cc/bluemonday"
"github.com/russross/blackfriday/v2"
"golang.org/x/net/html"
"golang.org/x/net/html/atom"
"golang.org/x/pkgsite/internal"
"golang.org/x/pkgsite/internal/experiment"
"golang.org/x/pkgsite/internal/log"
"golang.org/x/pkgsite/internal/stdlib"
)
// OverviewDetails contains all of the data that the readme template
// needs to populate.
type OverviewDetails struct {
ModulePath string
ModuleURL string
PackageSourceURL string
ReadMe template.HTML
ReadMeSource string
Redistributable bool
RepositoryURL string
}
// versionedLinks says whether the constructed URLs should have versions.
// constructOverviewDetails uses the given version to construct an OverviewDetails.
func constructOverviewDetails(ctx context.Context, mi *internal.ModuleInfo, readme *internal.Readme, isRedistributable bool, versionedLinks bool) *OverviewDetails {
var lv string
if versionedLinks {
lv = linkVersion(mi.Version, mi.ModulePath)
} else {
lv = internal.LatestVersion
}
overview := &OverviewDetails{
ModulePath: mi.ModulePath,
ModuleURL: constructModuleURL(mi.ModulePath, lv),
RepositoryURL: mi.SourceInfo.RepoURL(),
Redistributable: isRedistributable,
}
if overview.Redistributable && readme != nil {
overview.ReadMeSource = fileSource(mi.ModulePath, mi.Version, readme.Filepath)
overview.ReadMe = readmeHTML(ctx, mi, readme)
}
return overview
}
// fetchPackageOverviewDetails uses data for the given package to return an OverviewDetails.
func fetchPackageOverviewDetails(ctx context.Context, pkg *internal.LegacyVersionedPackage, versionedLinks bool) *OverviewDetails {
od := constructOverviewDetails(ctx, &pkg.ModuleInfo, &internal.Readme{Filepath: pkg.LegacyReadmeFilePath, Contents: pkg.LegacyReadmeContents},
pkg.LegacyPackage.IsRedistributable, versionedLinks)
od.PackageSourceURL = pkg.SourceInfo.DirectoryURL(packageSubdir(pkg.Path, pkg.ModulePath))
if !pkg.LegacyPackage.IsRedistributable {
od.Redistributable = false
}
return od
}
// fetchPackageOverviewDetailsNew uses data for the given versioned directory to return an OverviewDetails.
func fetchPackageOverviewDetailsNew(ctx context.Context, vdir *internal.VersionedDirectory, versionedLinks bool) *OverviewDetails {
var lv string
if versionedLinks {
lv = linkVersion(vdir.Version, vdir.ModulePath)
} else {
lv = internal.LatestVersion
}
overview := &OverviewDetails{
ModulePath: vdir.ModulePath,
ModuleURL: constructModuleURL(vdir.ModulePath, lv),
RepositoryURL: vdir.SourceInfo.RepoURL(),
Redistributable: vdir.DirectoryNew.IsRedistributable,
PackageSourceURL: vdir.SourceInfo.DirectoryURL(packageSubdir(vdir.Path, vdir.ModulePath)),
}
if overview.Redistributable && vdir.Readme != nil {
overview.ReadMeSource = fileSource(vdir.ModulePath, vdir.Version, vdir.Readme.Filepath)
overview.ReadMe = readmeHTML(ctx, &vdir.ModuleInfo, vdir.Readme)
}
return overview
}
// packageSubdir returns the subdirectory of the package relative to its module.
func packageSubdir(pkgPath, modulePath string) string {
switch {
case pkgPath == modulePath:
return ""
case modulePath == stdlib.ModulePath:
return pkgPath
default:
return strings.TrimPrefix(pkgPath, modulePath+"/")
}
}
// readmeHTML sanitizes readmeContents based on bluemondy.UGCPolicy and returns
// a template.HTML. If readmeFilePath indicates that this is a markdown file,
// it will also render the markdown contents using blackfriday.
func readmeHTML(ctx context.Context, mi *internal.ModuleInfo, readme *internal.Readme) template.HTML {
if readme == nil {
return ""
}
if !isMarkdown(readme.Filepath) {
return template.HTML(fmt.Sprintf(`<pre class="readme">%s</pre>`, template.HTMLEscapeString(readme.Contents)))
}
// bluemonday.UGCPolicy allows a broad selection of HTML elements and
// attributes that are safe for user generated content. This policy does
// not allow iframes, object, embed, styles, script, etc.
p := bluemonday.UGCPolicy()
// Allow width and align attributes on img, div, and p tags.
// This is used to center elements in a readme as well as to size it
// images appropriately where used, like the gin-gonic/logo/color.png
// image in the github.com/gin-gonic/gin README.
p.AllowAttrs("width", "align").OnElements("img")
p.AllowAttrs("width", "align").OnElements("div")
p.AllowAttrs("width", "align").OnElements("p")
// blackfriday.Run() uses CommonHTMLFlags and CommonExtensions by default.
renderer := blackfriday.NewHTMLRenderer(blackfriday.HTMLRendererParameters{Flags: blackfriday.CommonHTMLFlags})
parser := blackfriday.New(blackfriday.WithExtensions(blackfriday.CommonExtensions | blackfriday.AutoHeadingIDs))
// Render HTML similar to blackfriday.Run(), but here we implement a custom
// Walk function in order to modify image paths in the rendered HTML.
b := &bytes.Buffer{}
rootNode := parser.Parse([]byte(readme.Contents))
rootNode.Walk(func(node *blackfriday.Node, entering bool) blackfriday.WalkStatus {
switch node.Type {
case blackfriday.Image, blackfriday.Link:
useRaw := node.Type == blackfriday.Image
if d := translateRelativeLink(string(node.LinkData.Destination), mi, useRaw, readme); d != "" {
node.LinkData.Destination = []byte(d)
}
case blackfriday.HTMLBlock, blackfriday.HTMLSpan:
if experiment.IsActive(ctx, internal.ExperimentTranslateHTML) {
d, err := translateHTML(node.Literal, mi, readme)
if err != nil {
log.Errorf(context.Background(), "couldn't transform html block(%s): %v", node.Literal, err)
} else {
node.Literal = d
}
}
}
return renderer.RenderNode(b, node, entering)
})
return template.HTML(p.SanitizeReader(b).String())
}
// isMarkdown reports whether filename says that the file contains markdown.
func isMarkdown(filename string) bool {
ext := strings.ToLower(filepath.Ext(filename))
// https://tools.ietf.org/html/rfc7763 mentions both extensions.
return ext == ".md" || ext == ".markdown"
}
// translateRelativeLink converts relative image paths to absolute paths.
//
// README files sometimes use relative image paths to image files inside the
// repository. As the discovery site doesn't host the full repository content,
// in order for the image to render, we need to convert the relative path to an
// absolute URL to a hosted image.
func translateRelativeLink(dest string, mi *internal.ModuleInfo, useRaw bool, readme *internal.Readme) string {
destURL, err := url.Parse(dest)
if err != nil || destURL.IsAbs() {
return ""
}
if destURL.Path == "" {
// This is a fragment; leave it.
return ""
}
// Paths are relative to the README location.
destPath := path.Join(path.Dir(readme.Filepath), path.Clean(destURL.Path))
if useRaw {
return mi.SourceInfo.RawURL(destPath)
}
return mi.SourceInfo.FileURL(destPath)
}
// translateHTML parses html text into parsed html nodes. It then
// iterates through the nodes and replaces the src key with a value
// that properly represents the source of the image from the repo.
func translateHTML(htmlText []byte, mi *internal.ModuleInfo, readme *internal.Readme) ([]byte, error) {
r := bytes.NewReader(htmlText)
nodes, err := html.ParseFragment(r, nil)
if err != nil {
return nil, err
}
var buf bytes.Buffer
for _, n := range nodes {
// Every parsed node begins with <html><head></head><body>. Ignore that.
if n.DataAtom != atom.Html {
return htmlText, nil
}
// When the parsed html nodes don't have a valid structure
// (i.e: an html comment), then just return the original text.
if n.FirstChild == nil || n.FirstChild.NextSibling == nil || n.FirstChild.NextSibling.DataAtom != atom.Body {
return htmlText, nil
}
n = n.FirstChild.NextSibling.FirstChild
// If <html><head><body> </body>... has no children (empty content),
// then just return the original text.
if n == nil {
return htmlText, nil
}
walkHTML(n, mi, readme)
if err := html.Render(&buf, n); err != nil {
return nil, err
}
}
return buf.Bytes(), nil
}
// walkHTML crawls through an html node and replaces the src
// tag link with a link that properly represents the image
// from the repo source.
func walkHTML(n *html.Node, mi *internal.ModuleInfo, readme *internal.Readme) {
if n.Type == html.ElementNode && n.DataAtom == atom.Img {
var attrs []html.Attribute
for _, a := range n.Attr {
if a.Key == "src" {
if v := translateRelativeLink(a.Val, mi, true, readme); v != "" {
a.Val = v
}
}
attrs = append(attrs, a)
}
n.Attr = attrs
}
for c := n.FirstChild; c != nil; c = c.NextSibling {
walkHTML(c, mi, readme)
}
}