diff options
Diffstat (limited to 'src/cmd/vendor/rsc.io/markdown/parse.go')
| -rw-r--r-- | src/cmd/vendor/rsc.io/markdown/parse.go | 713 |
1 files changed, 713 insertions, 0 deletions
diff --git a/src/cmd/vendor/rsc.io/markdown/parse.go b/src/cmd/vendor/rsc.io/markdown/parse.go new file mode 100644 index 0000000000..014ae4a68a --- /dev/null +++ b/src/cmd/vendor/rsc.io/markdown/parse.go @@ -0,0 +1,713 @@ +// Copyright 2021 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package markdown + +import ( + "bytes" + "fmt" + "reflect" + "slices" + "strings" +) + +/* + +list block itself does not appear on stack? +item does +end of item returns block, +new item continues previous block if possible? + +if close leaves lines or blocks behind, panic + +close(b a list item, parent) + if b's parent's last block is list && item can be added to it, do so + else return new list + +or maybe not parent but just current list of blocks + +preserve LinkRefDefs? + +*/ + +// Block is implemented by: +// +// CodeBLock +// Document +// Empty +// HTMLBlock +// Heading +// Item +// List +// Paragraph +// Quote +// Text +// ThematicBreak +type Block interface { + Pos() Position + PrintHTML(buf *bytes.Buffer) + printMarkdown(buf *bytes.Buffer, s mdState) +} + +type mdState struct { + prefix string + prefix1 string // for first line only + bullet rune // for list items + num int // for numbered list items +} + +type Position struct { + StartLine int + EndLine int +} + +func (p Position) Pos() Position { + return p +} + +type buildState interface { + blocks() []Block + pos() Position + last() Block + deleteLast() + + link(label string) *Link + defineLink(label string, link *Link) + newText(pos Position, text string) *Text +} + +type blockBuilder interface { + extend(p *parseState, s line) (line, bool) + build(buildState) Block +} + +type openBlock struct { + builder blockBuilder + inner []Block + pos Position +} + +type itemBuilder struct { + list *listBuilder + width int + haveContent bool +} + +func (p *parseState) last() Block { + ob := &p.stack[len(p.stack)-1] + return ob.inner[len(ob.inner)-1] +} + +func (p *parseState) deleteLast() { + ob := &p.stack[len(p.stack)-1] + ob.inner = ob.inner[:len(ob.inner)-1] +} + +type Text struct { + Position + Inline []Inline + raw string +} + +func (b *Text) PrintHTML(buf *bytes.Buffer) { + for _, x := range b.Inline { + x.PrintHTML(buf) + } +} + +func (b *Text) printMarkdown(buf *bytes.Buffer, s mdState) { + if s.prefix1 != "" { + buf.WriteString(s.prefix1) + } else { + buf.WriteString(s.prefix) + } + var prev Inline + for _, x := range b.Inline { + switch prev.(type) { + case *SoftBreak, *HardBreak: + buf.WriteString(s.prefix) + } + x.printMarkdown(buf) + prev = x + } + buf.WriteByte('\n') +} + +type rootBuilder struct{} + +func (b *rootBuilder) build(p buildState) Block { + return &Document{p.pos(), p.blocks(), p.(*parseState).links} +} + +type Document struct { + Position + Blocks []Block + Links map[string]*Link +} + +// A Parser is a Markdown parser. +// The exported fields in the struct can be filled in before calling +// [Parser.Parse] in order to customize the details of the parsing process. +// A Parser is safe for concurrent use by multiple goroutines. +type Parser struct { + // HeadingIDs determines whether the parser accepts + // the {#hdr} syntax for an HTML id="hdr" attribute on headings. + // For example, if HeadingIDs is true then the Markdown + // ## Overview {#overview} + // will render as the HTML + // <h2 id="overview">Overview</h2> + HeadingIDs bool + + // Strikethrough determines whether the parser accepts + // ~abc~ and ~~abc~~ as strikethrough syntax, producing + // <del>abc</del> in HTML. + Strikethrough bool + + // TaskListItems determines whether the parser accepts + // “task list items” as defined in GitHub Flavored Markdown. + // When a list item begins with the plain text [ ] or [x] + // that turns into an unchecked or checked check box. + TaskListItems bool + + // TODO + AutoLinkText bool + AutoLinkAssumeHTTP bool + + // TODO + Table bool + + // TODO + Emoji bool + + // TODO + SmartDot bool + SmartDash bool + SmartQuote bool +} + +type parseState struct { + *Parser + + root *Document + links map[string]*Link + lineno int + stack []openBlock + lineDepth int + + corner bool // noticed corner case to ignore in cross-implementation testing + + // inlines + s string + emitted int // s[:emitted] has been emitted into list + list []Inline + + // for fixup at end + lists []*List + texts []*Text + + backticks backtickParser +} + +func (p *parseState) newText(pos Position, text string) *Text { + b := &Text{Position: pos, raw: text} + p.texts = append(p.texts, b) + return b +} + +func (p *parseState) blocks() []Block { + b := &p.stack[len(p.stack)-1] + return b.inner +} + +func (p *parseState) pos() Position { + b := &p.stack[len(p.stack)-1] + return b.pos +} + +func (p *Parser) Parse(text string) *Document { + d, _ := p.parse(text) + return d +} + +func (p *Parser) parse(text string) (d *Document, corner bool) { + var ps parseState + ps.Parser = p + if strings.Contains(text, "\x00") { + text = strings.ReplaceAll(text, "\x00", "\uFFFD") + ps.corner = true // goldmark does not replace NUL + } + + ps.lineDepth = -1 + ps.addBlock(&rootBuilder{}) + for text != "" { + var ln string + i := strings.Index(text, "\n") + j := strings.Index(text, "\r") + var nl byte + switch { + case j >= 0 && (i < 0 || j < i): // have \r, maybe \r\n + ln = text[:j] + if i == j+1 { + text = text[j+2:] + nl = '\r' + '\n' + } else { + text = text[j+1:] + nl = '\r' + } + case i >= 0: + ln, text = text[:i], text[i+1:] + nl = '\n' + default: + ln, text = text, "" + } + ps.lineno++ + ps.addLine(line{text: ln, nl: nl}) + } + ps.trimStack(0) + + for _, t := range ps.texts { + t.Inline = ps.inline(t.raw) + } + + if p.TaskListItems { + for _, list := range ps.lists { + ps.taskList(list) + } + } + + return ps.root, ps.corner +} + +func (p *parseState) curB() blockBuilder { + if p.lineDepth < len(p.stack) { + return p.stack[p.lineDepth].builder + } + return nil +} + +func (p *parseState) nextB() blockBuilder { + if p.lineDepth+1 < len(p.stack) { + return p.stack[p.lineDepth+1].builder + } + return nil +} +func (p *parseState) trimStack(depth int) { + if len(p.stack) < depth { + panic("trimStack") + } + for len(p.stack) > depth { + p.closeBlock() + } +} + +func (p *parseState) addBlock(c blockBuilder) { + p.trimStack(p.lineDepth + 1) + p.stack = append(p.stack, openBlock{}) + ob := &p.stack[len(p.stack)-1] + ob.builder = c + ob.pos.StartLine = p.lineno + ob.pos.EndLine = p.lineno +} + +func (p *parseState) doneBlock(b Block) { + p.trimStack(p.lineDepth + 1) + ob := &p.stack[len(p.stack)-1] + ob.inner = append(ob.inner, b) +} + +func (p *parseState) para() *paraBuilder { + if b, ok := p.stack[len(p.stack)-1].builder.(*paraBuilder); ok { + return b + } + return nil +} + +func (p *parseState) closeBlock() Block { + b := &p.stack[len(p.stack)-1] + if b.builder == nil { + println("closeBlock", len(p.stack)-1) + } + blk := b.builder.build(p) + if list, ok := blk.(*List); ok { + p.corner = p.corner || listCorner(list) + if p.TaskListItems { + p.lists = append(p.lists, list) + } + } + p.stack = p.stack[:len(p.stack)-1] + if len(p.stack) > 0 { + b := &p.stack[len(p.stack)-1] + b.inner = append(b.inner, blk) + // _ = b + } else { + p.root = blk.(*Document) + } + return blk +} + +func (p *parseState) link(label string) *Link { + return p.links[label] +} + +func (p *parseState) defineLink(label string, link *Link) { + if p.links == nil { + p.links = make(map[string]*Link) + } + p.links[label] = link +} + +type line struct { + spaces int + i int + tab int + text string + nl byte // newline character ending this line: \r or \n or zero for EOF +} + +func (p *parseState) addLine(s line) { + // Process continued prefixes. + p.lineDepth = 0 + for ; p.lineDepth+1 < len(p.stack); p.lineDepth++ { + old := s + var ok bool + s, ok = p.stack[p.lineDepth+1].builder.extend(p, s) + if !old.isBlank() && (ok || s != old) { + p.stack[p.lineDepth+1].pos.EndLine = p.lineno + } + if !ok { + break + } + } + + if s.isBlank() { + p.trimStack(p.lineDepth + 1) + return + } + + // Process new prefixes, if any. +Prefixes: + // Start new block inside p.stack[depth]. + for _, fn := range news { + if l, ok := fn(p, s); ok { + s = l + if s.isBlank() { + return + } + p.lineDepth++ + goto Prefixes + } + } + + newPara(p, s) +} + +func (c *rootBuilder) extend(p *parseState, s line) (line, bool) { + panic("root extend") +} + +var news = []func(*parseState, line) (line, bool){ + newQuote, + newATXHeading, + newSetextHeading, + newHR, + newListItem, + newHTML, + newFence, + newPre, +} + +func (s *line) peek() byte { + if s.spaces > 0 { + return ' ' + } + if s.i >= len(s.text) { + return 0 + } + return s.text[s.i] +} + +func (s *line) skipSpace() { + s.spaces = 0 + for s.i < len(s.text) && (s.text[s.i] == ' ' || s.text[s.i] == '\t') { + s.i++ + } +} + +func (s *line) trimSpace(min, max int, eolOK bool) bool { + t := *s + for n := 0; n < max; n++ { + if t.spaces > 0 { + t.spaces-- + continue + } + if t.i >= len(t.text) && eolOK { + continue + } + if t.i < len(t.text) { + switch t.text[t.i] { + case '\t': + t.spaces = 4 - (t.i-t.tab)&3 - 1 + t.i++ + t.tab = t.i + continue + case ' ': + t.i++ + continue + } + } + if n >= min { + break + } + return false + } + *s = t + return true +} + +func (s *line) trim(c byte) bool { + if s.spaces > 0 { + if c == ' ' { + s.spaces-- + return true + } + return false + } + if s.i < len(s.text) && s.text[s.i] == c { + s.i++ + return true + } + return false +} + +func (s *line) string() string { + switch s.spaces { + case 0: + return s.text[s.i:] + case 1: + return " " + s.text[s.i:] + case 2: + return " " + s.text[s.i:] + case 3: + return " " + s.text[s.i:] + } + panic("bad spaces") +} + +func trimLeftSpaceTab(s string) string { + i := 0 + for i < len(s) && (s[i] == ' ' || s[i] == '\t') { + i++ + } + return s[i:] +} + +func trimRightSpaceTab(s string) string { + j := len(s) + for j > 0 && (s[j-1] == ' ' || s[j-1] == '\t') { + j-- + } + return s[:j] +} + +func trimSpaceTab(s string) string { + i := 0 + for i < len(s) && (s[i] == ' ' || s[i] == '\t') { + i++ + } + s = s[i:] + j := len(s) + for j > 0 && (s[j-1] == ' ' || s[j-1] == '\t') { + j-- + } + return s[:j] +} + +func trimSpace(s string) string { + i := 0 + for i < len(s) && (s[i] == ' ' || s[i] == '\t') { + i++ + } + s = s[i:] + j := len(s) + for j > 0 && (s[j-1] == ' ' || s[j-1] == '\t') { + j-- + } + return s[:j] +} + +func trimSpaceTabNewline(s string) string { + i := 0 + for i < len(s) && (s[i] == ' ' || s[i] == '\t' || s[i] == '\n') { + i++ + } + s = s[i:] + j := len(s) + for j > 0 && (s[j-1] == ' ' || s[j-1] == '\t' || s[j-1] == '\n') { + j-- + } + return s[:j] +} + +func (s *line) isBlank() bool { + return trimLeftSpaceTab(s.text[s.i:]) == "" +} + +func (s *line) eof() bool { + return s.i >= len(s.text) +} + +func (s *line) trimSpaceString() string { + return trimLeftSpaceTab(s.text[s.i:]) +} + +func (s *line) trimString() string { + return trimSpaceTab(s.text[s.i:]) +} + +func ToHTML(b Block) string { + var buf bytes.Buffer + b.PrintHTML(&buf) + return buf.String() +} + +func ToMarkdown(b Block) string { + var buf bytes.Buffer + b.printMarkdown(&buf, mdState{}) + s := buf.String() + // Remove final extra newline. + if strings.HasSuffix(s, "\n\n") { + s = s[:len(s)-1] + } + return s +} + +func (b *Document) PrintHTML(buf *bytes.Buffer) { + for _, c := range b.Blocks { + c.PrintHTML(buf) + } +} + +func (b *Document) printMarkdown(buf *bytes.Buffer, s mdState) { + printMarkdownBlocks(b.Blocks, buf, s) + // Print links sorted by keys for deterministic output. + var keys []string + for k := range b.Links { + keys = append(keys, k) + } + slices.Sort(keys) + for _, k := range keys { + l := b.Links[k] + fmt.Fprintf(buf, "[%s]: %s", k, l.URL) + printLinkTitleMarkdown(buf, l.Title, l.TitleChar) + buf.WriteByte('\n') + } +} + +func printMarkdownBlocks(bs []Block, buf *bytes.Buffer, s mdState) { + prevEnd := 0 + for _, b := range bs { + // Preserve blank lines between blocks. + if prevEnd > 0 { + for i := prevEnd + 1; i < b.Pos().StartLine; i++ { + buf.WriteString(trimRightSpaceTab(s.prefix)) + buf.WriteByte('\n') + } + } + b.printMarkdown(buf, s) + prevEnd = b.Pos().EndLine + s.prefix1 = "" // item prefix only for first block + } +} + +var ( + blockType = reflect.TypeOf(new(Block)).Elem() + blocksType = reflect.TypeOf(new([]Block)).Elem() + inlinesType = reflect.TypeOf(new([]Inline)).Elem() +) + +func printb(buf *bytes.Buffer, b Block, prefix string) { + fmt.Fprintf(buf, "(%T", b) + v := reflect.ValueOf(b) + v = reflect.Indirect(v) + if v.Kind() != reflect.Struct { + fmt.Fprintf(buf, " %v", b) + } + t := v.Type() + for i := 0; i < t.NumField(); i++ { + tf := t.Field(i) + if !tf.IsExported() { + continue + } + if tf.Type == inlinesType { + printis(buf, v.Field(i).Interface().([]Inline)) + } else if tf.Type.Kind() == reflect.Slice && tf.Type.Elem().Kind() == reflect.String { + fmt.Fprintf(buf, " %s:%q", tf.Name, v.Field(i)) + } else if tf.Type != blocksType && !tf.Type.Implements(blockType) && tf.Type.Kind() != reflect.Slice { + fmt.Fprintf(buf, " %s:%v", tf.Name, v.Field(i)) + } + } + + prefix += "\t" + for i := 0; i < t.NumField(); i++ { + tf := t.Field(i) + if !tf.IsExported() { + continue + } + if tf.Type.Implements(blockType) { + fmt.Fprintf(buf, "\n%s", prefix) + printb(buf, v.Field(i).Interface().(Block), prefix) + } else if tf.Type == blocksType { + vf := v.Field(i) + for i := 0; i < vf.Len(); i++ { + fmt.Fprintf(buf, "\n%s", prefix) + printb(buf, vf.Index(i).Interface().(Block), prefix) + } + } else if tf.Type.Kind() == reflect.Slice && tf.Type != inlinesType && tf.Type.Elem().Kind() != reflect.String { + fmt.Fprintf(buf, "\n%s%s:", prefix, t.Field(i).Name) + printslice(buf, v.Field(i), prefix) + } + } + fmt.Fprintf(buf, ")") +} + +func printslice(buf *bytes.Buffer, v reflect.Value, prefix string) { + if v.Type().Elem().Kind() == reflect.Slice { + for i := 0; i < v.Len(); i++ { + fmt.Fprintf(buf, "\n%s#%d:", prefix, i) + printslice(buf, v.Index(i), prefix+"\t") + } + return + } + for i := 0; i < v.Len(); i++ { + fmt.Fprintf(buf, " ") + printb(buf, v.Index(i).Interface().(Block), prefix+"\t") + } +} + +func printi(buf *bytes.Buffer, in Inline) { + fmt.Fprintf(buf, "%T(", in) + v := reflect.ValueOf(in).Elem() + text := v.FieldByName("Text") + if text.IsValid() { + fmt.Fprintf(buf, "%q", text) + } + inner := v.FieldByName("Inner") + if inner.IsValid() { + printis(buf, inner.Interface().([]Inline)) + } + buf.WriteString(")") +} + +func printis(buf *bytes.Buffer, ins []Inline) { + for _, in := range ins { + buf.WriteByte(' ') + printi(buf, in) + } +} + +func dump(b Block) string { + var buf bytes.Buffer + printb(&buf, b, "") + return buf.String() +} |
