diff options
| author | Jonathan Amsterdam <jba@google.com> | 2023-11-15 09:50:01 -0500 |
|---|---|---|
| committer | Jonathan Amsterdam <jba@google.com> | 2024-01-24 13:01:26 +0000 |
| commit | 8b23b7b04234424791e26b8d2d26f61ef1311a9f (patch) | |
| tree | d0cebc42f94025d062d5669bf27046d3025b67a6 /src/cmd/vendor/rsc.io/markdown/html.go | |
| parent | b4e7d630bc6fbf654a20a4bebda94a8150811bea (diff) | |
| download | go-8b23b7b04234424791e26b8d2d26f61ef1311a9f.tar.xz | |
src/cmd/relnote, doc/next: add release note check
Add a test that every file in api/next has corresponding
release note fragments.
Vendor in golang.org/x/build/relnote, which brings along some other
things.
Modify dist/test.go to configure the test to run on some trybots.
For #64169.
Change-Id: If87d11350ea6b2605bc3ab31c491fa28f1d6ee7d
Cq-Include-Trybots: luci.golang.try:gotip-linux-amd64-longtest
Reviewed-on: https://go-review.googlesource.com/c/go/+/556995
Reviewed-by: Dmitri Shuralyov <dmitshur@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Dmitri Shuralyov <dmitshur@golang.org>
Diffstat (limited to 'src/cmd/vendor/rsc.io/markdown/html.go')
| -rw-r--r-- | src/cmd/vendor/rsc.io/markdown/html.go | 507 |
1 files changed, 507 insertions, 0 deletions
diff --git a/src/cmd/vendor/rsc.io/markdown/html.go b/src/cmd/vendor/rsc.io/markdown/html.go new file mode 100644 index 0000000000..dc126b788a --- /dev/null +++ b/src/cmd/vendor/rsc.io/markdown/html.go @@ -0,0 +1,507 @@ +// Copyright 2021 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package markdown + +import ( + "bytes" + "strconv" + "strings" + "unicode" +) + +type HTMLBlock struct { + Position + Text []string +} + +func (b *HTMLBlock) PrintHTML(buf *bytes.Buffer) { + for _, s := range b.Text { + buf.WriteString(s) + buf.WriteString("\n") + } +} + +func (b *HTMLBlock) printMarkdown(buf *bytes.Buffer, s mdState) { + if s.prefix1 != "" { + buf.WriteString(s.prefix1) + } else { + buf.WriteString(s.prefix) + } + b.PrintHTML(buf) +} + +type htmlBuilder struct { + endBlank bool + text []string + endFunc func(string) bool +} + +func (c *htmlBuilder) extend(p *parseState, s line) (line, bool) { + if c.endBlank && s.isBlank() { + return s, false + } + t := s.string() + c.text = append(c.text, t) + if c.endFunc != nil && c.endFunc(t) { + return line{}, false + } + return line{}, true +} + +func (c *htmlBuilder) build(p buildState) Block { + return &HTMLBlock{ + p.pos(), + c.text, + } +} + +func newHTML(p *parseState, s line) (line, bool) { + peek := s + if p.startHTML(&peek) { + return line{}, true + } + return s, false +} + +func (p *parseState) startHTML(s *line) bool { + tt := *s + tt.trimSpace(0, 3, false) + if tt.peek() != '<' { + return false + } + t := tt.string() + + var end string + switch { + case strings.HasPrefix(t, "<!--"): + end = "-->" + case strings.HasPrefix(t, "<?"): + end = "?>" + case strings.HasPrefix(t, "<![CDATA["): + end = "]]>" + case strings.HasPrefix(t, "<!") && len(t) >= 3 && isLetter(t[2]): + if 'a' <= t[2] && t[2] <= 'z' { + // Goldmark and the Dingus only accept <!UPPER> not <!lower>. + p.corner = true + } + end = ">" + } + if end != "" { + b := &htmlBuilder{endFunc: func(s string) bool { return strings.Contains(s, end) }} + p.addBlock(b) + b.text = append(b.text, s.string()) + if b.endFunc(t) { + p.closeBlock() + } + return true + } + + // case 6 + i := 1 + if i < len(t) && t[i] == '/' { + i++ + } + buf := make([]byte, 0, 16) + for ; i < len(t) && len(buf) < 16; i++ { + c := t[i] + if 'A' <= c && c <= 'Z' { + c += 'a' - 'A' + } + if !('a' <= c && c <= 'z') && !('0' <= c && c <= '9') { + break + } + buf = append(buf, c) + } + var sep byte + if i < len(t) { + switch t[i] { + default: + goto Next + case ' ', '\t', '>': + // ok + sep = t[i] + case '/': + if i+1 >= len(t) || t[i+1] != '>' { + goto Next + } + } + } + + if len(buf) == 0 { + goto Next + } + { + c := buf[0] + var ok bool + for _, name := range htmlTags { + if name[0] == c && len(name) == len(buf) && name == string(buf) { + if sep == '\t' { + // Goldmark recognizes space here but not tab. + // testdata/extra.txt 143.md + p.corner = true + } + ok = true + break + } + } + if !ok { + goto Next + } + } + + { + b := &htmlBuilder{endBlank: true} + p.addBlock(b) + b.text = append(b.text, s.string()) + return true + } + +Next: + // case 1 + if len(t) > 1 && t[1] != '/' && (i >= len(t) || t[i] == ' ' || t[i] == '\t' || t[i] == '>') { + switch string(buf) { + case "pre", "script", "style", "textarea": + b := &htmlBuilder{endFunc: hasEndPre} + p.addBlock(b) + b.text = append(b.text, s.string()) + if hasEndPre(t) { + p.closeBlock() + } + return true + } + } + + // case 7 + if p.para() == nil { + if _, e, ok := parseHTMLOpenTag(p, t, 0); ok && skipSpace(t, e) == len(t) { + if e != len(t) { + // Goldmark disallows trailing space + p.corner = true + } + b := &htmlBuilder{endBlank: true} + p.addBlock(b) + b.text = append(b.text, s.string()) + return true + } + if _, e, ok := parseHTMLClosingTag(p, t, 0); ok && skipSpace(t, e) == len(t) { + b := &htmlBuilder{endBlank: true} + p.addBlock(b) + b.text = append(b.text, s.string()) + return true + } + } + + return false +} + +func hasEndPre(s string) bool { + for i := 0; i < len(s); i++ { + if s[i] == '<' && i+1 < len(s) && s[i+1] == '/' { + buf := make([]byte, 0, 8) + for i += 2; i < len(s) && len(buf) < 8; i++ { + c := s[i] + if 'A' <= c && c <= 'Z' { + c += 'a' - 'A' + } + if c < 'a' || 'z' < c { + break + } + buf = append(buf, c) + } + if i < len(s) && s[i] == '>' { + switch string(buf) { + case "pre", "script", "style", "textarea": + return true + } + } + } + } + return false +} + +func parseHTMLTag(p *parseState, s string, i int) (Inline, int, bool) { + // “An HTML tag consists of an open tag, a closing tag, an HTML comment, + // a processing instruction, a declaration, or a CDATA section.” + if i+3 <= len(s) && s[i] == '<' { + switch s[i+1] { + default: + return parseHTMLOpenTag(p, s, i) + case '/': + return parseHTMLClosingTag(p, s, i) + case '!': + switch s[i+2] { + case '-': + return parseHTMLComment(s, i) + case '[': + return parseHTMLCDATA(s, i) + default: + return parseHTMLDecl(p, s, i) + } + case '?': + return parseHTMLProcInst(s, i) + } + } + return nil, 0, false +} + +func parseHTMLOpenTag(p *parseState, s string, i int) (Inline, int, bool) { + if i >= len(s) || s[i] != '<' { + return nil, 0, false + } + // “An open tag consists of a < character, a tag name, zero or more attributes, + // optional spaces, tabs, and up to one line ending, an optional / character, and a > character.” + if name, j, ok := parseTagName(s, i+1); ok { + switch name { + case "pre", "script", "style", "textarea": + // Goldmark treats these as starting a new HTMLBlock + // and ending the paragraph they appear in. + p.corner = true + } + for { + if j >= len(s) || s[j] != ' ' && s[j] != '\t' && s[j] != '\n' && s[j] != '/' && s[j] != '>' { + return nil, 0, false + } + _, k, ok := parseAttr(p, s, j) + if !ok { + break + } + j = k + } + k := skipSpace(s, j) + if k != j { + // Goldmark mishandles spaces before >. + p.corner = true + } + j = k + if j < len(s) && s[j] == '/' { + j++ + } + if j < len(s) && s[j] == '>' { + return &HTMLTag{s[i : j+1]}, j + 1, true + } + } + return nil, 0, false +} + +func parseHTMLClosingTag(p *parseState, s string, i int) (Inline, int, bool) { + // “A closing tag consists of the string </, a tag name, + // optional spaces, tabs, and up to one line ending, and the character >.” + if i+2 >= len(s) || s[i] != '<' || s[i+1] != '/' { + return nil, 0, false + } + if skipSpace(s, i+2) != i+2 { + // Goldmark allows spaces here but the spec and the Dingus do not. + p.corner = true + } + + if _, j, ok := parseTagName(s, i+2); ok { + j = skipSpace(s, j) + if j < len(s) && s[j] == '>' { + return &HTMLTag{s[i : j+1]}, j + 1, true + } + } + return nil, 0, false +} + +func parseTagName(s string, i int) (string, int, bool) { + // “A tag name consists of an ASCII letter followed by zero or more ASCII letters, digits, or hyphens (-).” + if i < len(s) && isLetter(s[i]) { + j := i + 1 + for j < len(s) && isLDH(s[j]) { + j++ + } + return s[i:j], j, true + } + return "", 0, false +} + +func parseAttr(p *parseState, s string, i int) (string, int, bool) { + // “An attribute consists of spaces, tabs, and up to one line ending, + // an attribute name, and an optional attribute value specification.” + i = skipSpace(s, i) + if _, j, ok := parseAttrName(s, i); ok { + if _, k, ok := parseAttrValueSpec(p, s, j); ok { + j = k + } + return s[i:j], j, true + } + return "", 0, false +} + +func parseAttrName(s string, i int) (string, int, bool) { + // “An attribute name consists of an ASCII letter, _, or :, + // followed by zero or more ASCII letters, digits, _, ., :, or -.” + if i+1 < len(s) && (isLetter(s[i]) || s[i] == '_' || s[i] == ':') { + j := i + 1 + for j < len(s) && (isLDH(s[j]) || s[j] == '_' || s[j] == '.' || s[j] == ':') { + j++ + } + return s[i:j], j, true + } + return "", 0, false +} + +func parseAttrValueSpec(p *parseState, s string, i int) (string, int, bool) { + // “An attribute value specification consists of + // optional spaces, tabs, and up to one line ending, + // a = character, + // optional spaces, tabs, and up to one line ending, + // and an attribute value.” + i = skipSpace(s, i) + if i+1 < len(s) && s[i] == '=' { + i = skipSpace(s, i+1) + if _, j, ok := parseAttrValue(s, i); ok { + p.corner = p.corner || strings.Contains(s[i:j], "\ufffd") + return s[i:j], j, true + } + } + return "", 0, false +} + +func parseAttrValue(s string, i int) (string, int, bool) { + // “An attribute value consists of + // an unquoted attribute value, + // a single-quoted attribute value, + // or a double-quoted attribute value.” + // TODO: No escaping??? + if i < len(s) && (s[i] == '\'' || s[i] == '"') { + // “A single-quoted attribute value consists of ', + // zero or more characters not including ', and a final '.” + // “A double-quoted attribute value consists of ", + // zero or more characters not including ", and a final ".” + if j := strings.IndexByte(s[i+1:], s[i]); j >= 0 { + end := i + 1 + j + 1 + return s[i:end], end, true + } + } + + // “An unquoted attribute value is a nonempty string of characters + // not including spaces, tabs, line endings, ", ', =, <, >, or `.” + j := i + for j < len(s) && strings.IndexByte(" \t\n\"'=<>`", s[j]) < 0 { + j++ + } + if j > i { + return s[i:j], j, true + } + return "", 0, false +} + +func parseHTMLComment(s string, i int) (Inline, int, bool) { + // “An HTML comment consists of <!-- + text + -->, + // where text does not start with > or ->, + // does not end with -, and does not contain --.” + if !strings.HasPrefix(s[i:], "<!-->") && + !strings.HasPrefix(s[i:], "<!--->") { + if x, end, ok := parseHTMLMarker(s, i, "<!--", "-->"); ok { + if t := x.(*HTMLTag).Text; !strings.Contains(t[len("<!--"):len(t)-len("->")], "--") { + return x, end, ok + } + } + } + return nil, 0, false +} + +func parseHTMLCDATA(s string, i int) (Inline, int, bool) { + // “A CDATA section consists of the string <![CDATA[, + // a string of characters not including the string ]]>, and the string ]]>.” + return parseHTMLMarker(s, i, "<![CDATA[", "]]>") +} + +func parseHTMLDecl(p *parseState, s string, i int) (Inline, int, bool) { + // “A declaration consists of the string <!, an ASCII letter, + // zero or more characters not including the character >, and the character >.” + if i+2 < len(s) && isLetter(s[i+2]) { + if 'a' <= s[i+2] && s[i+2] <= 'z' { + p.corner = true // goldmark requires uppercase + } + return parseHTMLMarker(s, i, "<!", ">") + } + return nil, 0, false +} + +func parseHTMLProcInst(s string, i int) (Inline, int, bool) { + // “A processing instruction consists of the string <?, + // a string of characters not including the string ?>, and the string ?>.” + return parseHTMLMarker(s, i, "<?", "?>") +} + +func parseHTMLMarker(s string, i int, prefix, suffix string) (Inline, int, bool) { + if strings.HasPrefix(s[i:], prefix) { + if j := strings.Index(s[i+len(prefix):], suffix); j >= 0 { + end := i + len(prefix) + j + len(suffix) + return &HTMLTag{s[i:end]}, end, true + } + } + return nil, 0, false +} + +func parseHTMLEntity(_ *parseState, s string, i int) (Inline, int, int, bool) { + start := i + if i+1 < len(s) && s[i+1] == '#' { + i += 2 + var r, end int + if i < len(s) && (s[i] == 'x' || s[i] == 'X') { + // hex + i++ + j := i + for j < len(s) && isHexDigit(s[j]) { + j++ + } + if j-i < 1 || j-i > 6 || j >= len(s) || s[j] != ';' { + return nil, 0, 0, false + } + r64, _ := strconv.ParseInt(s[i:j], 16, 0) + r = int(r64) + end = j + 1 + } else { + // decimal + j := i + for j < len(s) && isDigit(s[j]) { + j++ + } + if j-i < 1 || j-i > 7 || j >= len(s) || s[j] != ';' { + return nil, 0, 0, false + } + r, _ = strconv.Atoi(s[i:j]) + end = j + 1 + } + if r > unicode.MaxRune || r == 0 { + r = unicode.ReplacementChar + } + return &Plain{string(rune(r))}, start, end, true + } + + // Max name in list is 32 bytes. Try for 64 for good measure. + for j := i + 1; j < len(s) && j-i < 64; j++ { + if s[j] == '&' { // Stop possible quadratic search on &&&&&&&. + break + } + if s[j] == ';' { + if r, ok := htmlEntity[s[i:j+1]]; ok { + return &Plain{r}, start, j + 1, true + } + break + } + } + + return nil, 0, 0, false +} + +type HTMLTag struct { + Text string +} + +func (*HTMLTag) Inline() {} + +func (x *HTMLTag) PrintHTML(buf *bytes.Buffer) { + buf.WriteString(x.Text) +} + +func (x *HTMLTag) printMarkdown(buf *bytes.Buffer) { + x.PrintHTML(buf) +} + +func (x *HTMLTag) PrintText(buf *bytes.Buffer) {} |
