blob: 014ae4a68a8d2500d8e4e7e9166c8409a7ca936c [file] [log] [blame]
// Copyright 2021 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package markdown
import (
list block itself does not appear on stack?
item does
end of item returns block,
new item continues previous block if possible?
if close leaves lines or blocks behind, panic
close(b a list item, parent)
if b's parent's last block is list && item can be added to it, do so
else return new list
or maybe not parent but just current list of blocks
preserve LinkRefDefs?
// Block is implemented by:
// CodeBLock
// Document
// Empty
// HTMLBlock
// Heading
// Item
// List
// Paragraph
// Quote
// Text
// ThematicBreak
type Block interface {
Pos() Position
PrintHTML(buf *bytes.Buffer)
printMarkdown(buf *bytes.Buffer, s mdState)
type mdState struct {
prefix string
prefix1 string // for first line only
bullet rune // for list items
num int // for numbered list items
type Position struct {
StartLine int
EndLine int
func (p Position) Pos() Position {
return p
type buildState interface {
blocks() []Block
pos() Position
last() Block
link(label string) *Link
defineLink(label string, link *Link)
newText(pos Position, text string) *Text
type blockBuilder interface {
extend(p *parseState, s line) (line, bool)
build(buildState) Block
type openBlock struct {
builder blockBuilder
inner []Block
pos Position
type itemBuilder struct {
list *listBuilder
width int
haveContent bool
func (p *parseState) last() Block {
ob := &p.stack[len(p.stack)-1]
return ob.inner[len(ob.inner)-1]
func (p *parseState) deleteLast() {
ob := &p.stack[len(p.stack)-1]
ob.inner = ob.inner[:len(ob.inner)-1]
type Text struct {
Inline []Inline
raw string
func (b *Text) PrintHTML(buf *bytes.Buffer) {
for _, x := range b.Inline {
func (b *Text) printMarkdown(buf *bytes.Buffer, s mdState) {
if s.prefix1 != "" {
} else {
var prev Inline
for _, x := range b.Inline {
switch prev.(type) {
case *SoftBreak, *HardBreak:
prev = x
type rootBuilder struct{}
func (b *rootBuilder) build(p buildState) Block {
return &Document{p.pos(), p.blocks(), p.(*parseState).links}
type Document struct {
Blocks []Block
Links map[string]*Link
// A Parser is a Markdown parser.
// The exported fields in the struct can be filled in before calling
// [Parser.Parse] in order to customize the details of the parsing process.
// A Parser is safe for concurrent use by multiple goroutines.
type Parser struct {
// HeadingIDs determines whether the parser accepts
// the {#hdr} syntax for an HTML id="hdr" attribute on headings.
// For example, if HeadingIDs is true then the Markdown
// ## Overview {#overview}
// will render as the HTML
// <h2 id="overview">Overview</h2>
HeadingIDs bool
// Strikethrough determines whether the parser accepts
// ~abc~ and ~~abc~~ as strikethrough syntax, producing
// <del>abc</del> in HTML.
Strikethrough bool
// TaskListItems determines whether the parser accepts
// “task list items” as defined in GitHub Flavored Markdown.
// When a list item begins with the plain text [ ] or [x]
// that turns into an unchecked or checked check box.
TaskListItems bool
AutoLinkText bool
AutoLinkAssumeHTTP bool
Table bool
Emoji bool
SmartDot bool
SmartDash bool
SmartQuote bool
type parseState struct {
root *Document
links map[string]*Link
lineno int
stack []openBlock
lineDepth int
corner bool // noticed corner case to ignore in cross-implementation testing
// inlines
s string
emitted int // s[:emitted] has been emitted into list
list []Inline
// for fixup at end
lists []*List
texts []*Text
backticks backtickParser
func (p *parseState) newText(pos Position, text string) *Text {
b := &Text{Position: pos, raw: text}
p.texts = append(p.texts, b)
return b
func (p *parseState) blocks() []Block {
b := &p.stack[len(p.stack)-1]
return b.inner
func (p *parseState) pos() Position {
b := &p.stack[len(p.stack)-1]
return b.pos
func (p *Parser) Parse(text string) *Document {
d, _ := p.parse(text)
return d
func (p *Parser) parse(text string) (d *Document, corner bool) {
var ps parseState
ps.Parser = p
if strings.Contains(text, "\x00") {
text = strings.ReplaceAll(text, "\x00", "\uFFFD")
ps.corner = true // goldmark does not replace NUL
ps.lineDepth = -1
for text != "" {
var ln string
i := strings.Index(text, "\n")
j := strings.Index(text, "\r")
var nl byte
switch {
case j >= 0 && (i < 0 || j < i): // have \r, maybe \r\n
ln = text[:j]
if i == j+1 {
text = text[j+2:]
nl = '\r' + '\n'
} else {
text = text[j+1:]
nl = '\r'
case i >= 0:
ln, text = text[:i], text[i+1:]
nl = '\n'
ln, text = text, ""
ps.addLine(line{text: ln, nl: nl})
for _, t := range ps.texts {
t.Inline = ps.inline(t.raw)
if p.TaskListItems {
for _, list := range ps.lists {
return ps.root, ps.corner
func (p *parseState) curB() blockBuilder {
if p.lineDepth < len(p.stack) {
return p.stack[p.lineDepth].builder
return nil
func (p *parseState) nextB() blockBuilder {
if p.lineDepth+1 < len(p.stack) {
return p.stack[p.lineDepth+1].builder
return nil
func (p *parseState) trimStack(depth int) {
if len(p.stack) < depth {
for len(p.stack) > depth {
func (p *parseState) addBlock(c blockBuilder) {
p.trimStack(p.lineDepth + 1)
p.stack = append(p.stack, openBlock{})
ob := &p.stack[len(p.stack)-1]
ob.builder = c
ob.pos.StartLine = p.lineno
ob.pos.EndLine = p.lineno
func (p *parseState) doneBlock(b Block) {
p.trimStack(p.lineDepth + 1)
ob := &p.stack[len(p.stack)-1]
ob.inner = append(ob.inner, b)
func (p *parseState) para() *paraBuilder {
if b, ok := p.stack[len(p.stack)-1].builder.(*paraBuilder); ok {
return b
return nil
func (p *parseState) closeBlock() Block {
b := &p.stack[len(p.stack)-1]
if b.builder == nil {
println("closeBlock", len(p.stack)-1)
blk :=
if list, ok := blk.(*List); ok {
p.corner = p.corner || listCorner(list)
if p.TaskListItems {
p.lists = append(p.lists, list)
p.stack = p.stack[:len(p.stack)-1]
if len(p.stack) > 0 {
b := &p.stack[len(p.stack)-1]
b.inner = append(b.inner, blk)
// _ = b
} else {
p.root = blk.(*Document)
return blk
func (p *parseState) link(label string) *Link {
return p.links[label]
func (p *parseState) defineLink(label string, link *Link) {
if p.links == nil {
p.links = make(map[string]*Link)
p.links[label] = link
type line struct {
spaces int
i int
tab int
text string
nl byte // newline character ending this line: \r or \n or zero for EOF
func (p *parseState) addLine(s line) {
// Process continued prefixes.
p.lineDepth = 0
for ; p.lineDepth+1 < len(p.stack); p.lineDepth++ {
old := s
var ok bool
s, ok = p.stack[p.lineDepth+1].builder.extend(p, s)
if !old.isBlank() && (ok || s != old) {
p.stack[p.lineDepth+1].pos.EndLine = p.lineno
if !ok {
if s.isBlank() {
p.trimStack(p.lineDepth + 1)
// Process new prefixes, if any.
// Start new block inside p.stack[depth].
for _, fn := range news {
if l, ok := fn(p, s); ok {
s = l
if s.isBlank() {
goto Prefixes
newPara(p, s)
func (c *rootBuilder) extend(p *parseState, s line) (line, bool) {
panic("root extend")
var news = []func(*parseState, line) (line, bool){
func (s *line) peek() byte {
if s.spaces > 0 {
return ' '
if s.i >= len(s.text) {
return 0
return s.text[s.i]
func (s *line) skipSpace() {
s.spaces = 0
for s.i < len(s.text) && (s.text[s.i] == ' ' || s.text[s.i] == '\t') {
func (s *line) trimSpace(min, max int, eolOK bool) bool {
t := *s
for n := 0; n < max; n++ {
if t.spaces > 0 {
if t.i >= len(t.text) && eolOK {
if t.i < len(t.text) {
switch t.text[t.i] {
case '\t':
t.spaces = 4 - ( - 1
t.i++ = t.i
case ' ':
if n >= min {
return false
*s = t
return true
func (s *line) trim(c byte) bool {
if s.spaces > 0 {
if c == ' ' {
return true
return false
if s.i < len(s.text) && s.text[s.i] == c {
return true
return false
func (s *line) string() string {
switch s.spaces {
case 0:
return s.text[s.i:]
case 1:
return " " + s.text[s.i:]
case 2:
return " " + s.text[s.i:]
case 3:
return " " + s.text[s.i:]
panic("bad spaces")
func trimLeftSpaceTab(s string) string {
i := 0
for i < len(s) && (s[i] == ' ' || s[i] == '\t') {
return s[i:]
func trimRightSpaceTab(s string) string {
j := len(s)
for j > 0 && (s[j-1] == ' ' || s[j-1] == '\t') {
return s[:j]
func trimSpaceTab(s string) string {
i := 0
for i < len(s) && (s[i] == ' ' || s[i] == '\t') {
s = s[i:]
j := len(s)
for j > 0 && (s[j-1] == ' ' || s[j-1] == '\t') {
return s[:j]
func trimSpace(s string) string {
i := 0
for i < len(s) && (s[i] == ' ' || s[i] == '\t') {
s = s[i:]
j := len(s)
for j > 0 && (s[j-1] == ' ' || s[j-1] == '\t') {
return s[:j]
func trimSpaceTabNewline(s string) string {
i := 0
for i < len(s) && (s[i] == ' ' || s[i] == '\t' || s[i] == '\n') {
s = s[i:]
j := len(s)
for j > 0 && (s[j-1] == ' ' || s[j-1] == '\t' || s[j-1] == '\n') {
return s[:j]
func (s *line) isBlank() bool {
return trimLeftSpaceTab(s.text[s.i:]) == ""
func (s *line) eof() bool {
return s.i >= len(s.text)
func (s *line) trimSpaceString() string {
return trimLeftSpaceTab(s.text[s.i:])
func (s *line) trimString() string {
return trimSpaceTab(s.text[s.i:])
func ToHTML(b Block) string {
var buf bytes.Buffer
return buf.String()
func ToMarkdown(b Block) string {
var buf bytes.Buffer
b.printMarkdown(&buf, mdState{})
s := buf.String()
// Remove final extra newline.
if strings.HasSuffix(s, "\n\n") {
s = s[:len(s)-1]
return s
func (b *Document) PrintHTML(buf *bytes.Buffer) {
for _, c := range b.Blocks {
func (b *Document) printMarkdown(buf *bytes.Buffer, s mdState) {
printMarkdownBlocks(b.Blocks, buf, s)
// Print links sorted by keys for deterministic output.
var keys []string
for k := range b.Links {
keys = append(keys, k)
for _, k := range keys {
l := b.Links[k]
fmt.Fprintf(buf, "[%s]: %s", k, l.URL)
printLinkTitleMarkdown(buf, l.Title, l.TitleChar)
func printMarkdownBlocks(bs []Block, buf *bytes.Buffer, s mdState) {
prevEnd := 0
for _, b := range bs {
// Preserve blank lines between blocks.
if prevEnd > 0 {
for i := prevEnd + 1; i < b.Pos().StartLine; i++ {
b.printMarkdown(buf, s)
prevEnd = b.Pos().EndLine
s.prefix1 = "" // item prefix only for first block
var (
blockType = reflect.TypeOf(new(Block)).Elem()
blocksType = reflect.TypeOf(new([]Block)).Elem()
inlinesType = reflect.TypeOf(new([]Inline)).Elem()
func printb(buf *bytes.Buffer, b Block, prefix string) {
fmt.Fprintf(buf, "(%T", b)
v := reflect.ValueOf(b)
v = reflect.Indirect(v)
if v.Kind() != reflect.Struct {
fmt.Fprintf(buf, " %v", b)
t := v.Type()
for i := 0; i < t.NumField(); i++ {
tf := t.Field(i)
if !tf.IsExported() {
if tf.Type == inlinesType {
printis(buf, v.Field(i).Interface().([]Inline))
} else if tf.Type.Kind() == reflect.Slice && tf.Type.Elem().Kind() == reflect.String {
fmt.Fprintf(buf, " %s:%q", tf.Name, v.Field(i))
} else if tf.Type != blocksType && !tf.Type.Implements(blockType) && tf.Type.Kind() != reflect.Slice {
fmt.Fprintf(buf, " %s:%v", tf.Name, v.Field(i))
prefix += "\t"
for i := 0; i < t.NumField(); i++ {
tf := t.Field(i)
if !tf.IsExported() {
if tf.Type.Implements(blockType) {
fmt.Fprintf(buf, "\n%s", prefix)
printb(buf, v.Field(i).Interface().(Block), prefix)
} else if tf.Type == blocksType {
vf := v.Field(i)
for i := 0; i < vf.Len(); i++ {
fmt.Fprintf(buf, "\n%s", prefix)
printb(buf, vf.Index(i).Interface().(Block), prefix)
} else if tf.Type.Kind() == reflect.Slice && tf.Type != inlinesType && tf.Type.Elem().Kind() != reflect.String {
fmt.Fprintf(buf, "\n%s%s:", prefix, t.Field(i).Name)
printslice(buf, v.Field(i), prefix)
fmt.Fprintf(buf, ")")
func printslice(buf *bytes.Buffer, v reflect.Value, prefix string) {
if v.Type().Elem().Kind() == reflect.Slice {
for i := 0; i < v.Len(); i++ {
fmt.Fprintf(buf, "\n%s#%d:", prefix, i)
printslice(buf, v.Index(i), prefix+"\t")
for i := 0; i < v.Len(); i++ {
fmt.Fprintf(buf, " ")
printb(buf, v.Index(i).Interface().(Block), prefix+"\t")
func printi(buf *bytes.Buffer, in Inline) {
fmt.Fprintf(buf, "%T(", in)
v := reflect.ValueOf(in).Elem()
text := v.FieldByName("Text")
if text.IsValid() {
fmt.Fprintf(buf, "%q", text)
inner := v.FieldByName("Inner")
if inner.IsValid() {
printis(buf, inner.Interface().([]Inline))
func printis(buf *bytes.Buffer, ins []Inline) {
for _, in := range ins {
buf.WriteByte(' ')
printi(buf, in)
func dump(b Block) string {
var buf bytes.Buffer
printb(&buf, b, "")
return buf.String()