| // Copyright 2021 The Go Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style |
| // license that can be found in the LICENSE file. |
| |
| package markdown |
| |
| import ( |
| "bytes" |
| "fmt" |
| "reflect" |
| "slices" |
| "strings" |
| ) |
| |
| /* |
| |
| list block itself does not appear on stack? |
| item does |
| end of item returns block, |
| new item continues previous block if possible? |
| |
| if close leaves lines or blocks behind, panic |
| |
| close(b a list item, parent) |
| if b's parent's last block is list && item can be added to it, do so |
| else return new list |
| |
| or maybe not parent but just current list of blocks |
| |
| preserve LinkRefDefs? |
| |
| */ |
| |
| // Block is implemented by: |
| // |
| // CodeBLock |
| // Document |
| // Empty |
| // HTMLBlock |
| // Heading |
| // Item |
| // List |
| // Paragraph |
| // Quote |
| // Text |
| // ThematicBreak |
| type Block interface { |
| Pos() Position |
| PrintHTML(buf *bytes.Buffer) |
| printMarkdown(buf *bytes.Buffer, s mdState) |
| } |
| |
| type mdState struct { |
| prefix string |
| prefix1 string // for first line only |
| bullet rune // for list items |
| num int // for numbered list items |
| } |
| |
| type Position struct { |
| StartLine int |
| EndLine int |
| } |
| |
| func (p Position) Pos() Position { |
| return p |
| } |
| |
| type buildState interface { |
| blocks() []Block |
| pos() Position |
| last() Block |
| deleteLast() |
| |
| link(label string) *Link |
| defineLink(label string, link *Link) |
| newText(pos Position, text string) *Text |
| } |
| |
| type blockBuilder interface { |
| extend(p *parseState, s line) (line, bool) |
| build(buildState) Block |
| } |
| |
| type openBlock struct { |
| builder blockBuilder |
| inner []Block |
| pos Position |
| } |
| |
| type itemBuilder struct { |
| list *listBuilder |
| width int |
| haveContent bool |
| } |
| |
| func (p *parseState) last() Block { |
| ob := &p.stack[len(p.stack)-1] |
| return ob.inner[len(ob.inner)-1] |
| } |
| |
| func (p *parseState) deleteLast() { |
| ob := &p.stack[len(p.stack)-1] |
| ob.inner = ob.inner[:len(ob.inner)-1] |
| } |
| |
| type Text struct { |
| Position |
| Inline []Inline |
| raw string |
| } |
| |
| func (b *Text) PrintHTML(buf *bytes.Buffer) { |
| for _, x := range b.Inline { |
| x.PrintHTML(buf) |
| } |
| } |
| |
| func (b *Text) printMarkdown(buf *bytes.Buffer, s mdState) { |
| if s.prefix1 != "" { |
| buf.WriteString(s.prefix1) |
| } else { |
| buf.WriteString(s.prefix) |
| } |
| var prev Inline |
| for _, x := range b.Inline { |
| switch prev.(type) { |
| case *SoftBreak, *HardBreak: |
| buf.WriteString(s.prefix) |
| } |
| x.printMarkdown(buf) |
| prev = x |
| } |
| buf.WriteByte('\n') |
| } |
| |
| type rootBuilder struct{} |
| |
| func (b *rootBuilder) build(p buildState) Block { |
| return &Document{p.pos(), p.blocks(), p.(*parseState).links} |
| } |
| |
| type Document struct { |
| Position |
| Blocks []Block |
| Links map[string]*Link |
| } |
| |
| // A Parser is a Markdown parser. |
| // The exported fields in the struct can be filled in before calling |
| // [Parser.Parse] in order to customize the details of the parsing process. |
| // A Parser is safe for concurrent use by multiple goroutines. |
| type Parser struct { |
| // HeadingIDs determines whether the parser accepts |
| // the {#hdr} syntax for an HTML id="hdr" attribute on headings. |
| // For example, if HeadingIDs is true then the Markdown |
| // ## Overview {#overview} |
| // will render as the HTML |
| // <h2 id="overview">Overview</h2> |
| HeadingIDs bool |
| |
| // Strikethrough determines whether the parser accepts |
| // ~abc~ and ~~abc~~ as strikethrough syntax, producing |
| // <del>abc</del> in HTML. |
| Strikethrough bool |
| |
| // TaskListItems determines whether the parser accepts |
| // “task list items” as defined in GitHub Flavored Markdown. |
| // When a list item begins with the plain text [ ] or [x] |
| // that turns into an unchecked or checked check box. |
| TaskListItems bool |
| |
| // TODO |
| AutoLinkText bool |
| AutoLinkAssumeHTTP bool |
| |
| // TODO |
| Table bool |
| |
| // TODO |
| Emoji bool |
| |
| // TODO |
| SmartDot bool |
| SmartDash bool |
| SmartQuote bool |
| } |
| |
| type parseState struct { |
| *Parser |
| |
| root *Document |
| links map[string]*Link |
| lineno int |
| stack []openBlock |
| lineDepth int |
| |
| corner bool // noticed corner case to ignore in cross-implementation testing |
| |
| // inlines |
| s string |
| emitted int // s[:emitted] has been emitted into list |
| list []Inline |
| |
| // for fixup at end |
| lists []*List |
| texts []*Text |
| |
| backticks backtickParser |
| } |
| |
| func (p *parseState) newText(pos Position, text string) *Text { |
| b := &Text{Position: pos, raw: text} |
| p.texts = append(p.texts, b) |
| return b |
| } |
| |
| func (p *parseState) blocks() []Block { |
| b := &p.stack[len(p.stack)-1] |
| return b.inner |
| } |
| |
| func (p *parseState) pos() Position { |
| b := &p.stack[len(p.stack)-1] |
| return b.pos |
| } |
| |
| func (p *Parser) Parse(text string) *Document { |
| d, _ := p.parse(text) |
| return d |
| } |
| |
| func (p *Parser) parse(text string) (d *Document, corner bool) { |
| var ps parseState |
| ps.Parser = p |
| if strings.Contains(text, "\x00") { |
| text = strings.ReplaceAll(text, "\x00", "\uFFFD") |
| ps.corner = true // goldmark does not replace NUL |
| } |
| |
| ps.lineDepth = -1 |
| ps.addBlock(&rootBuilder{}) |
| for text != "" { |
| var ln string |
| i := strings.Index(text, "\n") |
| j := strings.Index(text, "\r") |
| var nl byte |
| switch { |
| case j >= 0 && (i < 0 || j < i): // have \r, maybe \r\n |
| ln = text[:j] |
| if i == j+1 { |
| text = text[j+2:] |
| nl = '\r' + '\n' |
| } else { |
| text = text[j+1:] |
| nl = '\r' |
| } |
| case i >= 0: |
| ln, text = text[:i], text[i+1:] |
| nl = '\n' |
| default: |
| ln, text = text, "" |
| } |
| ps.lineno++ |
| ps.addLine(line{text: ln, nl: nl}) |
| } |
| ps.trimStack(0) |
| |
| for _, t := range ps.texts { |
| t.Inline = ps.inline(t.raw) |
| } |
| |
| if p.TaskListItems { |
| for _, list := range ps.lists { |
| ps.taskList(list) |
| } |
| } |
| |
| return ps.root, ps.corner |
| } |
| |
| func (p *parseState) curB() blockBuilder { |
| if p.lineDepth < len(p.stack) { |
| return p.stack[p.lineDepth].builder |
| } |
| return nil |
| } |
| |
| func (p *parseState) nextB() blockBuilder { |
| if p.lineDepth+1 < len(p.stack) { |
| return p.stack[p.lineDepth+1].builder |
| } |
| return nil |
| } |
| func (p *parseState) trimStack(depth int) { |
| if len(p.stack) < depth { |
| panic("trimStack") |
| } |
| for len(p.stack) > depth { |
| p.closeBlock() |
| } |
| } |
| |
| func (p *parseState) addBlock(c blockBuilder) { |
| p.trimStack(p.lineDepth + 1) |
| p.stack = append(p.stack, openBlock{}) |
| ob := &p.stack[len(p.stack)-1] |
| ob.builder = c |
| ob.pos.StartLine = p.lineno |
| ob.pos.EndLine = p.lineno |
| } |
| |
| func (p *parseState) doneBlock(b Block) { |
| p.trimStack(p.lineDepth + 1) |
| ob := &p.stack[len(p.stack)-1] |
| ob.inner = append(ob.inner, b) |
| } |
| |
| func (p *parseState) para() *paraBuilder { |
| if b, ok := p.stack[len(p.stack)-1].builder.(*paraBuilder); ok { |
| return b |
| } |
| return nil |
| } |
| |
| func (p *parseState) closeBlock() Block { |
| b := &p.stack[len(p.stack)-1] |
| if b.builder == nil { |
| println("closeBlock", len(p.stack)-1) |
| } |
| blk := b.builder.build(p) |
| if list, ok := blk.(*List); ok { |
| p.corner = p.corner || listCorner(list) |
| if p.TaskListItems { |
| p.lists = append(p.lists, list) |
| } |
| } |
| p.stack = p.stack[:len(p.stack)-1] |
| if len(p.stack) > 0 { |
| b := &p.stack[len(p.stack)-1] |
| b.inner = append(b.inner, blk) |
| // _ = b |
| } else { |
| p.root = blk.(*Document) |
| } |
| return blk |
| } |
| |
| func (p *parseState) link(label string) *Link { |
| return p.links[label] |
| } |
| |
| func (p *parseState) defineLink(label string, link *Link) { |
| if p.links == nil { |
| p.links = make(map[string]*Link) |
| } |
| p.links[label] = link |
| } |
| |
| type line struct { |
| spaces int |
| i int |
| tab int |
| text string |
| nl byte // newline character ending this line: \r or \n or zero for EOF |
| } |
| |
| func (p *parseState) addLine(s line) { |
| // Process continued prefixes. |
| p.lineDepth = 0 |
| for ; p.lineDepth+1 < len(p.stack); p.lineDepth++ { |
| old := s |
| var ok bool |
| s, ok = p.stack[p.lineDepth+1].builder.extend(p, s) |
| if !old.isBlank() && (ok || s != old) { |
| p.stack[p.lineDepth+1].pos.EndLine = p.lineno |
| } |
| if !ok { |
| break |
| } |
| } |
| |
| if s.isBlank() { |
| p.trimStack(p.lineDepth + 1) |
| return |
| } |
| |
| // Process new prefixes, if any. |
| Prefixes: |
| // Start new block inside p.stack[depth]. |
| for _, fn := range news { |
| if l, ok := fn(p, s); ok { |
| s = l |
| if s.isBlank() { |
| return |
| } |
| p.lineDepth++ |
| goto Prefixes |
| } |
| } |
| |
| newPara(p, s) |
| } |
| |
| func (c *rootBuilder) extend(p *parseState, s line) (line, bool) { |
| panic("root extend") |
| } |
| |
| var news = []func(*parseState, line) (line, bool){ |
| newQuote, |
| newATXHeading, |
| newSetextHeading, |
| newHR, |
| newListItem, |
| newHTML, |
| newFence, |
| newPre, |
| } |
| |
| func (s *line) peek() byte { |
| if s.spaces > 0 { |
| return ' ' |
| } |
| if s.i >= len(s.text) { |
| return 0 |
| } |
| return s.text[s.i] |
| } |
| |
| func (s *line) skipSpace() { |
| s.spaces = 0 |
| for s.i < len(s.text) && (s.text[s.i] == ' ' || s.text[s.i] == '\t') { |
| s.i++ |
| } |
| } |
| |
| func (s *line) trimSpace(min, max int, eolOK bool) bool { |
| t := *s |
| for n := 0; n < max; n++ { |
| if t.spaces > 0 { |
| t.spaces-- |
| continue |
| } |
| if t.i >= len(t.text) && eolOK { |
| continue |
| } |
| if t.i < len(t.text) { |
| switch t.text[t.i] { |
| case '\t': |
| t.spaces = 4 - (t.i-t.tab)&3 - 1 |
| t.i++ |
| t.tab = t.i |
| continue |
| case ' ': |
| t.i++ |
| continue |
| } |
| } |
| if n >= min { |
| break |
| } |
| return false |
| } |
| *s = t |
| return true |
| } |
| |
| func (s *line) trim(c byte) bool { |
| if s.spaces > 0 { |
| if c == ' ' { |
| s.spaces-- |
| return true |
| } |
| return false |
| } |
| if s.i < len(s.text) && s.text[s.i] == c { |
| s.i++ |
| return true |
| } |
| return false |
| } |
| |
| func (s *line) string() string { |
| switch s.spaces { |
| case 0: |
| return s.text[s.i:] |
| case 1: |
| return " " + s.text[s.i:] |
| case 2: |
| return " " + s.text[s.i:] |
| case 3: |
| return " " + s.text[s.i:] |
| } |
| panic("bad spaces") |
| } |
| |
| func trimLeftSpaceTab(s string) string { |
| i := 0 |
| for i < len(s) && (s[i] == ' ' || s[i] == '\t') { |
| i++ |
| } |
| return s[i:] |
| } |
| |
| func trimRightSpaceTab(s string) string { |
| j := len(s) |
| for j > 0 && (s[j-1] == ' ' || s[j-1] == '\t') { |
| j-- |
| } |
| return s[:j] |
| } |
| |
| func trimSpaceTab(s string) string { |
| i := 0 |
| for i < len(s) && (s[i] == ' ' || s[i] == '\t') { |
| i++ |
| } |
| s = s[i:] |
| j := len(s) |
| for j > 0 && (s[j-1] == ' ' || s[j-1] == '\t') { |
| j-- |
| } |
| return s[:j] |
| } |
| |
| func trimSpace(s string) string { |
| i := 0 |
| for i < len(s) && (s[i] == ' ' || s[i] == '\t') { |
| i++ |
| } |
| s = s[i:] |
| j := len(s) |
| for j > 0 && (s[j-1] == ' ' || s[j-1] == '\t') { |
| j-- |
| } |
| return s[:j] |
| } |
| |
| func trimSpaceTabNewline(s string) string { |
| i := 0 |
| for i < len(s) && (s[i] == ' ' || s[i] == '\t' || s[i] == '\n') { |
| i++ |
| } |
| s = s[i:] |
| j := len(s) |
| for j > 0 && (s[j-1] == ' ' || s[j-1] == '\t' || s[j-1] == '\n') { |
| j-- |
| } |
| return s[:j] |
| } |
| |
| func (s *line) isBlank() bool { |
| return trimLeftSpaceTab(s.text[s.i:]) == "" |
| } |
| |
| func (s *line) eof() bool { |
| return s.i >= len(s.text) |
| } |
| |
| func (s *line) trimSpaceString() string { |
| return trimLeftSpaceTab(s.text[s.i:]) |
| } |
| |
| func (s *line) trimString() string { |
| return trimSpaceTab(s.text[s.i:]) |
| } |
| |
| func ToHTML(b Block) string { |
| var buf bytes.Buffer |
| b.PrintHTML(&buf) |
| return buf.String() |
| } |
| |
| func ToMarkdown(b Block) string { |
| var buf bytes.Buffer |
| b.printMarkdown(&buf, mdState{}) |
| s := buf.String() |
| // Remove final extra newline. |
| if strings.HasSuffix(s, "\n\n") { |
| s = s[:len(s)-1] |
| } |
| return s |
| } |
| |
| func (b *Document) PrintHTML(buf *bytes.Buffer) { |
| for _, c := range b.Blocks { |
| c.PrintHTML(buf) |
| } |
| } |
| |
| func (b *Document) printMarkdown(buf *bytes.Buffer, s mdState) { |
| printMarkdownBlocks(b.Blocks, buf, s) |
| // Print links sorted by keys for deterministic output. |
| var keys []string |
| for k := range b.Links { |
| keys = append(keys, k) |
| } |
| slices.Sort(keys) |
| for _, k := range keys { |
| l := b.Links[k] |
| fmt.Fprintf(buf, "[%s]: %s", k, l.URL) |
| printLinkTitleMarkdown(buf, l.Title, l.TitleChar) |
| buf.WriteByte('\n') |
| } |
| } |
| |
| func printMarkdownBlocks(bs []Block, buf *bytes.Buffer, s mdState) { |
| prevEnd := 0 |
| for _, b := range bs { |
| // Preserve blank lines between blocks. |
| if prevEnd > 0 { |
| for i := prevEnd + 1; i < b.Pos().StartLine; i++ { |
| buf.WriteString(trimRightSpaceTab(s.prefix)) |
| buf.WriteByte('\n') |
| } |
| } |
| b.printMarkdown(buf, s) |
| prevEnd = b.Pos().EndLine |
| s.prefix1 = "" // item prefix only for first block |
| } |
| } |
| |
| var ( |
| blockType = reflect.TypeOf(new(Block)).Elem() |
| blocksType = reflect.TypeOf(new([]Block)).Elem() |
| inlinesType = reflect.TypeOf(new([]Inline)).Elem() |
| ) |
| |
| func printb(buf *bytes.Buffer, b Block, prefix string) { |
| fmt.Fprintf(buf, "(%T", b) |
| v := reflect.ValueOf(b) |
| v = reflect.Indirect(v) |
| if v.Kind() != reflect.Struct { |
| fmt.Fprintf(buf, " %v", b) |
| } |
| t := v.Type() |
| for i := 0; i < t.NumField(); i++ { |
| tf := t.Field(i) |
| if !tf.IsExported() { |
| continue |
| } |
| if tf.Type == inlinesType { |
| printis(buf, v.Field(i).Interface().([]Inline)) |
| } else if tf.Type.Kind() == reflect.Slice && tf.Type.Elem().Kind() == reflect.String { |
| fmt.Fprintf(buf, " %s:%q", tf.Name, v.Field(i)) |
| } else if tf.Type != blocksType && !tf.Type.Implements(blockType) && tf.Type.Kind() != reflect.Slice { |
| fmt.Fprintf(buf, " %s:%v", tf.Name, v.Field(i)) |
| } |
| } |
| |
| prefix += "\t" |
| for i := 0; i < t.NumField(); i++ { |
| tf := t.Field(i) |
| if !tf.IsExported() { |
| continue |
| } |
| if tf.Type.Implements(blockType) { |
| fmt.Fprintf(buf, "\n%s", prefix) |
| printb(buf, v.Field(i).Interface().(Block), prefix) |
| } else if tf.Type == blocksType { |
| vf := v.Field(i) |
| for i := 0; i < vf.Len(); i++ { |
| fmt.Fprintf(buf, "\n%s", prefix) |
| printb(buf, vf.Index(i).Interface().(Block), prefix) |
| } |
| } else if tf.Type.Kind() == reflect.Slice && tf.Type != inlinesType && tf.Type.Elem().Kind() != reflect.String { |
| fmt.Fprintf(buf, "\n%s%s:", prefix, t.Field(i).Name) |
| printslice(buf, v.Field(i), prefix) |
| } |
| } |
| fmt.Fprintf(buf, ")") |
| } |
| |
| func printslice(buf *bytes.Buffer, v reflect.Value, prefix string) { |
| if v.Type().Elem().Kind() == reflect.Slice { |
| for i := 0; i < v.Len(); i++ { |
| fmt.Fprintf(buf, "\n%s#%d:", prefix, i) |
| printslice(buf, v.Index(i), prefix+"\t") |
| } |
| return |
| } |
| for i := 0; i < v.Len(); i++ { |
| fmt.Fprintf(buf, " ") |
| printb(buf, v.Index(i).Interface().(Block), prefix+"\t") |
| } |
| } |
| |
| func printi(buf *bytes.Buffer, in Inline) { |
| fmt.Fprintf(buf, "%T(", in) |
| v := reflect.ValueOf(in).Elem() |
| text := v.FieldByName("Text") |
| if text.IsValid() { |
| fmt.Fprintf(buf, "%q", text) |
| } |
| inner := v.FieldByName("Inner") |
| if inner.IsValid() { |
| printis(buf, inner.Interface().([]Inline)) |
| } |
| buf.WriteString(")") |
| } |
| |
| func printis(buf *bytes.Buffer, ins []Inline) { |
| for _, in := range ins { |
| buf.WriteByte(' ') |
| printi(buf, in) |
| } |
| } |
| |
| func dump(b Block) string { |
| var buf bytes.Buffer |
| printb(&buf, b, "") |
| return buf.String() |
| } |