| /* |
| * Copyright 2020 The Go Authors. All rights reserved. |
| * Use of this source code is governed by a BSD-style |
| * license that can be found in the LICENSE file. |
| */ |
| |
| package frontend |
| |
| import ( |
| "bytes" |
| "context" |
| "fmt" |
| "regexp" |
| "strings" |
| "unicode" |
| |
| "github.com/yuin/goldmark/ast" |
| "github.com/yuin/goldmark/parser" |
| "github.com/yuin/goldmark/renderer" |
| "github.com/yuin/goldmark/renderer/html" |
| "github.com/yuin/goldmark/text" |
| "github.com/yuin/goldmark/util" |
| "golang.org/x/pkgsite/internal" |
| "golang.org/x/pkgsite/internal/log" |
| "golang.org/x/pkgsite/internal/source" |
| ) |
| |
| // astTransformer is a default transformer of the goldmark tree. We pass in |
| // readme information to use for the link transformations. |
| type astTransformer struct { |
| info *source.Info |
| readme *internal.Readme |
| } |
| |
| // Transform transforms the given AST tree. |
| func (g *astTransformer) Transform(node *ast.Document, reader text.Reader, pc parser.Context) { |
| _ = ast.Walk(node, func(n ast.Node, entering bool) (ast.WalkStatus, error) { |
| if !entering { |
| return ast.WalkContinue, nil |
| } |
| switch v := n.(type) { |
| case *ast.Image: |
| if d := translateLink(string(v.Destination), g.info, true, g.readme); d != "" { |
| v.Destination = []byte(d) |
| } |
| case *ast.Link: |
| if d := translateLink(string(v.Destination), g.info, false, g.readme); d != "" { |
| v.Destination = []byte(d) |
| } |
| } |
| return ast.WalkContinue, nil |
| }) |
| } |
| |
| // htmlRenderer is a renderer.NodeRenderer implementation that renders |
| // pkg.go.dev readme features. |
| type htmlRenderer struct { |
| html.Config |
| info *source.Info |
| readme *internal.Readme |
| // firstHeading and offset are used to calculate the first heading tag's level in a readme. |
| firstHeading bool |
| offset int |
| } |
| |
| // newHTMLRenderer creates a new HTMLRenderer for a readme. |
| func newHTMLRenderer(info *source.Info, readme *internal.Readme, opts ...html.Option) renderer.NodeRenderer { |
| r := &htmlRenderer{ |
| info: info, |
| readme: readme, |
| Config: html.NewConfig(), |
| firstHeading: true, |
| offset: 0, |
| } |
| for _, opt := range opts { |
| opt.SetHTMLOption(&r.Config) |
| } |
| return r |
| } |
| |
| // RegisterFuncs implements renderer.NodeRenderer.RegisterFuncs. |
| func (r *htmlRenderer) RegisterFuncs(reg renderer.NodeRendererFuncRegisterer) { |
| reg.Register(ast.KindHeading, r.renderHeading) |
| reg.Register(ast.KindHTMLBlock, r.renderHTMLBlock) |
| reg.Register(ast.KindRawHTML, r.renderRawHTML) |
| } |
| |
| func (r *htmlRenderer) renderHeading(w util.BufWriter, source []byte, node ast.Node, entering bool) (ast.WalkStatus, error) { |
| n := node.(*ast.Heading) |
| if r.firstHeading { |
| // The offset ensures the first heading is always an <h3>. |
| r.offset = 3 - n.Level |
| r.firstHeading = false |
| } |
| newLevel := n.Level + r.offset |
| if entering { |
| if n.Level > 6 { |
| _, _ = w.WriteString(fmt.Sprintf(`<div class="h%d" role="heading" aria-level="%d"`, newLevel, n.Level)) |
| } else { |
| _, _ = w.WriteString(fmt.Sprintf(`<h%d class="h%d"`, newLevel, n.Level)) |
| } |
| if n.Attributes() != nil { |
| html.RenderAttributes(w, node, html.HeadingAttributeFilter) |
| } |
| _ = w.WriteByte('>') |
| } else { |
| if n.Level > 6 { |
| _, _ = w.WriteString("</div>\n") |
| } else { |
| _, _ = w.WriteString(fmt.Sprintf("</h%d>\n", newLevel)) |
| } |
| } |
| return ast.WalkContinue, nil |
| } |
| |
| // renderHTMLBlock is copied directly from the goldmark source code and |
| // modified to call translateHTML in every block |
| func (r *htmlRenderer) renderHTMLBlock(w util.BufWriter, source []byte, node ast.Node, entering bool) (ast.WalkStatus, error) { |
| n := node.(*ast.HTMLBlock) |
| if entering { |
| if r.Unsafe { |
| l := n.Lines().Len() |
| for i := 0; i < l; i++ { |
| line := n.Lines().At(i) |
| d, err := translateHTML(line.Value(source), r.info, r.readme) |
| if err != nil { |
| return ast.WalkStop, err |
| } |
| _, _ = w.Write(d) |
| } |
| } else { |
| _, _ = w.WriteString("<!-- raw HTML omitted -->\n") |
| } |
| } else { |
| if n.HasClosure() { |
| if r.Unsafe { |
| closure := n.ClosureLine |
| _, _ = w.Write(closure.Value(source)) |
| } else { |
| _, _ = w.WriteString("<!-- raw HTML omitted -->\n") |
| } |
| } |
| } |
| return ast.WalkContinue, nil |
| } |
| |
| func (r *htmlRenderer) renderRawHTML(w util.BufWriter, source []byte, node ast.Node, entering bool) (ast.WalkStatus, error) { |
| if !entering { |
| return ast.WalkSkipChildren, nil |
| } |
| if r.Unsafe { |
| n := node.(*ast.RawHTML) |
| for i := 0; i < n.Segments.Len(); i++ { |
| segment := n.Segments.At(i) |
| d, err := translateHTML(segment.Value(source), r.info, r.readme) |
| if err != nil { |
| return ast.WalkStop, err |
| } |
| _, _ = w.Write(d) |
| } |
| return ast.WalkSkipChildren, nil |
| } |
| _, _ = w.WriteString("<!-- raw HTML omitted -->") |
| return ast.WalkSkipChildren, nil |
| } |
| |
| // ids is a collection of element ids in document. |
| type ids struct { |
| values map[string]bool |
| } |
| |
| // newIDs creates a collection of element ids in a document. |
| func newIDs() parser.IDs { |
| return &ids{ |
| values: map[string]bool{}, |
| } |
| } |
| |
| // Generate turns heading content from a markdown document into a heading id. |
| // First HTML markup and markdown images are stripped then unicode letters |
| // and numbers are used to generate the final result. Finally, all heading ids |
| // are prefixed with "readme-" to avoid name collisions with other ids on the |
| // unit page. Duplicated heading ids are given an incremental suffix. See |
| // readme_test.go for examples. |
| func (s *ids) Generate(value []byte, kind ast.NodeKind) []byte { |
| // Matches strings like `<tag attr="value">Text</tag>` or `[![Text](file.svg)](link.html)`. |
| r := regexp.MustCompile(`(<[^<>]+>|\[\!\[[^\]]+]\([^\)]+\)\]\([^\)]+\))`) |
| str := r.ReplaceAllString(string(value), "") |
| f := func(c rune) bool { |
| return !unicode.IsLetter(c) && !unicode.IsNumber(c) |
| } |
| str = strings.Join(strings.FieldsFunc(str, f), "-") |
| str = strings.ToLower(str) |
| if len(str) == 0 { |
| if kind == ast.KindHeading { |
| str = "heading" |
| } else { |
| str = "id" |
| } |
| } |
| key := str |
| for i := 1; ; i++ { |
| if _, ok := s.values[key]; !ok { |
| s.values[key] = true |
| break |
| } |
| key = fmt.Sprintf("%s-%d", str, i) |
| } |
| return []byte("readme-" + key) |
| } |
| |
| // Put implements Put from the goldmark parser IDs interface. |
| func (s *ids) Put(value []byte) { |
| s.values[string(value)] = true |
| } |
| |
| type extractLinks struct { |
| ctx context.Context |
| inLinksHeading bool |
| links []link |
| } |
| |
| // The name of the heading from which we extract links. |
| const linkHeadingText = "Links" |
| |
| var linkHeadingBytes = []byte(linkHeadingText) // for faster comparison to node contents |
| |
| // Transform extracts links from the "Links" section of a README. |
| func (e *extractLinks) Transform(node *ast.Document, reader text.Reader, pc parser.Context) { |
| err := ast.Walk(node, func(n ast.Node, entering bool) (ast.WalkStatus, error) { |
| if !entering { |
| return ast.WalkContinue, nil |
| } |
| switch n := n.(type) { |
| |
| case *ast.Heading: |
| // We are in the links heading from the point we see a heading with |
| // linkHeadingText until the point we see the next heading. |
| if e.inLinksHeading { |
| return ast.WalkStop, nil |
| } |
| if bytes.Equal(n.Text(reader.Source()), linkHeadingBytes) { |
| e.inLinksHeading = true |
| } |
| |
| case *ast.ListItem: |
| // When in the links heading, extract links from list items. |
| if !e.inLinksHeading { |
| return ast.WalkSkipChildren, nil |
| } |
| // We expect the pattern: ListItem -> TextBlock -> Link, with no |
| // other children. |
| if tb, ok := n.FirstChild().(*ast.TextBlock); ok { |
| if l, ok := tb.FirstChild().(*ast.Link); ok && l.NextSibling() == nil { |
| // Record the link. |
| e.links = append(e.links, link{ |
| Href: string(l.Destination), |
| Body: string(l.Text(reader.Source())), |
| }) |
| } |
| } |
| return ast.WalkSkipChildren, nil |
| } |
| |
| return ast.WalkContinue, nil |
| }) |
| if err != nil { |
| log.Errorf(e.ctx, "extractLinks.Transform: %v", err) |
| } |
| } |
| |
| type extractTOC struct { |
| ctx context.Context |
| headings []*Heading |
| } |
| |
| // Transform collects the headings from a readme into an outline |
| // of the document. It nests the headings based on the h-level hierarchy. |
| // See tests for heading levels in TestReadme for behavior. |
| func (e *extractTOC) Transform(node *ast.Document, reader text.Reader, pc parser.Context) { |
| var headings []*Heading |
| err := ast.Walk(node, func(n ast.Node, entering bool) (ast.WalkStatus, error) { |
| if n.Kind() == ast.KindHeading && entering { |
| var buffer bytes.Buffer |
| for c := n.FirstChild(); c != nil; c = c.NextSibling() { |
| // We keep only text content from the headings in the first pass. |
| if c.Kind() == ast.KindText { |
| buffer.Write(c.Text(reader.Source())) |
| } |
| } |
| // If the buffer is empty we take the text content from non-text nodes. |
| if buffer.Len() == 0 { |
| for c := n.FirstChild(); c != nil; c = c.NextSibling() { |
| buffer.Write(c.Text(reader.Source())) |
| } |
| } |
| heading := n.(*ast.Heading) |
| section := &Heading{ |
| Level: heading.Level, |
| Text: buffer.String(), |
| } |
| if id, ok := heading.AttributeString("id"); ok { |
| section.ID = string(id.([]byte)) |
| } |
| headings = append(headings, section) |
| return ast.WalkSkipChildren, nil |
| } |
| return ast.WalkContinue, nil |
| }) |
| if err != nil { |
| log.Errorf(e.ctx, "extractTOC.Transform: %v", err) |
| } |
| |
| // We nest the headings by walking through the list we extracted and |
| // establishing parent child relationships based on heading levels. |
| var nested []*Heading |
| for i, h := range headings { |
| if i == 0 { |
| nested = append(nested, h) |
| continue |
| } |
| parent := headings[i-1] |
| for parent != nil && parent.Level >= h.Level { |
| parent = parent.parent |
| } |
| if parent == nil { |
| nested = append(nested, h) |
| } else { |
| h.parent = parent |
| parent.Children = append(parent.Children, h) |
| } |
| } |
| // If there is only one top tevel heading with 1 or more children we |
| // assume it is the title of the document and remove it from the TOC. |
| if len(nested) == 1 && len(nested[0].Children) > 0 { |
| nested = nested[0].Children |
| } |
| e.headings = nested |
| } |