aboutsummaryrefslogtreecommitdiff
path: root/html_backend.go
diff options
context:
space:
mode:
authorShulhan <ms@kilabit.info>2022-11-21 01:20:03 +0700
committerShulhan <ms@kilabit.info>2022-11-27 21:14:18 +0700
commit5c7bfc04dc3d2cd60e84c80229804fdcd615709e (patch)
tree63b9749699090421b55355d0f3e1f5c1b99875a1 /html_backend.go
parent3d1caba74842d7dd0800cd0351ddb4c47364a541 (diff)
downloadasciidoctor-go-5c7bfc04dc3d2cd60e84c80229804fdcd615709e.tar.xz
all: implement inline macro for passthrough ("pass:")
The inline passthrough "pass:" can be used to control the substitutions applied to a run of text. Ref: https://docs.asciidoctor.org/asciidoc/latest/pass/pass-macro/
Diffstat (limited to 'html_backend.go')
-rw-r--r--html_backend.go521
1 files changed, 521 insertions, 0 deletions
diff --git a/html_backend.go b/html_backend.go
index 4c2e467..1bbefcb 100644
--- a/html_backend.go
+++ b/html_backend.go
@@ -9,6 +9,7 @@ import (
"io"
"strings"
+ libascii "github.com/shuLhan/share/lib/ascii"
libstrings "github.com/shuLhan/share/lib/strings"
)
@@ -60,6 +61,519 @@ const (
htmlSymbolZeroWidthSpace = `&#8203;`
)
+// htmlSubs apply the text substitutions to element.raw based on applySubs in
+// the following order: c, q, a, r, m, p.
+// If applySubs is 0, it will return element.raw as is.
+func htmlSubs(doc *Document, el *element) []byte {
+ var (
+ input = el.raw
+ )
+ if el.applySubs == 0 {
+ return input
+ }
+ if el.applySubs&passSubChar != 0 {
+ input = htmlSubsChar(input)
+ }
+ if el.applySubs&passSubQuote != 0 {
+ input = htmlSubsQuote(input)
+ }
+ if el.applySubs&passSubAttr != 0 {
+ input = htmlSubsAttr(doc, input)
+ }
+ if el.applySubs&passSubRepl != 0 {
+ input = htmlSubsRepl(input)
+ }
+ if el.applySubs&passSubMacro != 0 {
+ input = htmlSubsMacro(doc, input, el.kind == elKindInlinePass)
+ }
+ return input
+}
+
+// htmlSubsChar replace character '<', '>', and '&' with "&lt;", "&gt;", and
+// "&amp;".
+//
+// Ref: https://docs.asciidoctor.org/asciidoc/latest/subs/special-characters/
+func htmlSubsChar(input []byte) []byte {
+ var (
+ bb bytes.Buffer
+ c byte
+ )
+ for _, c = range input {
+ if c == '<' {
+ bb.WriteString(`&lt;`)
+ continue
+ }
+ if c == '>' {
+ bb.WriteString(`&gt;`)
+ continue
+ }
+ if c == '&' {
+ bb.WriteString(`&amp;`)
+ continue
+ }
+ bb.WriteByte(c)
+ }
+ return bb.Bytes()
+}
+
+// htmlSubsQuote replace inline markup with its HTML markup.
+// The following inline markup ara parsed and substitutes,
+//
+// - emphasis: _word_ with "<em>word</em>".
+// - strong: *word* with "<strong>word</strong>".
+// - monospace: `word` with "<code>word</code>".
+// - superscript: ^word^ with "<sup>word</sup>".
+// - subscript: ~word~ with "<sub>word</sub>".
+// - double curved quotes: "`word`" with "&#8220;word&#8221;"
+// - single curved quotes: '`word`' with "&#8216;word&#8217;"
+//
+// Ref: https://docs.asciidoctor.org/asciidoc/latest/subs/quotes/
+func htmlSubsQuote(input []byte) []byte {
+ var (
+ bb bytes.Buffer
+ x int
+ idx int
+ text []byte
+ c1 byte
+ nextc byte
+ )
+ for x < len(input) {
+ c1 = input[x]
+
+ x++
+ if x == len(input) {
+ // Nothing left to parsed.
+ bb.WriteByte(c1)
+ break
+ }
+ nextc = input[x]
+
+ if c1 == '_' {
+ text, idx = indexByteUnescape(input[x:], c1)
+ if text == nil {
+ bb.WriteByte(c1)
+ continue
+ }
+ bb.WriteString(`<em>`)
+ bb.Write(text)
+ bb.WriteString(`</em>`)
+ x = x + idx + 1
+ continue
+ }
+ if c1 == '*' {
+ text, idx = indexByteUnescape(input[x:], c1)
+ if text == nil {
+ bb.WriteByte(c1)
+ continue
+ }
+ bb.WriteString(`<strong>`)
+ bb.Write(text)
+ bb.WriteString(`</strong>`)
+ x = x + idx + 1
+ continue
+ }
+ if c1 == '`' {
+ text, idx = indexByteUnescape(input[x:], c1)
+ if text == nil {
+ bb.WriteByte(c1)
+ continue
+ }
+ bb.WriteString(`<code>`)
+ bb.Write(text)
+ bb.WriteString(`</code>`)
+ x = x + idx + 1
+ continue
+ }
+ if c1 == '^' {
+ text, idx = indexByteUnescape(input[x:], c1)
+ if text == nil {
+ bb.WriteByte(c1)
+ continue
+ }
+ bb.WriteString(`<sup>`)
+ bb.Write(text)
+ bb.WriteString(`</sup>`)
+ x = x + idx + 1
+ continue
+ }
+ if c1 == '~' {
+ text, idx = indexByteUnescape(input[x:], c1)
+ if text == nil {
+ bb.WriteByte(c1)
+ continue
+ }
+ bb.WriteString(`<sub>`)
+ bb.Write(text)
+ bb.WriteString(`</sub>`)
+ x = x + idx + 1
+ continue
+ }
+ if c1 == '"' {
+ if nextc != '`' {
+ bb.WriteByte(c1)
+ continue
+ }
+ if x+1 == len(input) {
+ bb.WriteByte(c1)
+ continue
+ }
+
+ text, idx = indexUnescape(input[x+1:], []byte("`\""))
+ if text == nil {
+ bb.WriteByte(c1)
+ continue
+ }
+ bb.WriteString(htmlSymbolLeftDoubleQuote)
+ bb.Write(text)
+ bb.WriteString(htmlSymbolRightDoubleQuote)
+ x = x + idx + 3
+ continue
+ }
+ if c1 == '\'' {
+ if nextc != '`' {
+ bb.WriteByte(c1)
+ continue
+ }
+ if x+1 == len(input) {
+ bb.WriteByte(c1)
+ continue
+ }
+
+ text, idx = indexUnescape(input[x+1:], []byte("`'"))
+ if text == nil {
+ bb.WriteByte(c1)
+ continue
+ }
+ bb.WriteString(htmlSymbolLeftSingleQuote)
+ bb.Write(text)
+ bb.WriteString(htmlSymbolRightSingleQuote)
+ x = x + idx + 3
+ continue
+ }
+ bb.WriteByte(c1)
+ }
+ return bb.Bytes()
+}
+
+// htmlSubsAttr replace attribute (the `{...}`) with its values.
+//
+// Ref: https://docs.asciidoctor.org/asciidoc/latest/subs/attributes/
+func htmlSubsAttr(doc *Document, input []byte) []byte {
+ var (
+ bb bytes.Buffer
+ key string
+ val string
+ vbytes []byte
+ idx int
+ x int
+ c byte
+ ok bool
+ )
+
+ for x < len(input) {
+ c = input[x]
+ x++
+ if c != '{' {
+ bb.WriteByte(c)
+ continue
+ }
+
+ vbytes, idx = indexByteUnescape(input[x:], '}')
+ if vbytes == nil {
+ bb.WriteByte(c)
+ continue
+ }
+ vbytes = bytes.TrimSpace(vbytes)
+ vbytes = bytes.ToLower(vbytes)
+
+ key = string(vbytes)
+ val, ok = _attrRef[key]
+ if ok {
+ bb.WriteString(val)
+ x = x + idx + 1
+ continue
+ }
+
+ val, ok = doc.Attributes[key]
+ if !ok {
+ bb.WriteByte(c)
+ continue
+ }
+
+ // Add prefix "mailto:" if the ref name start with email, so
+ // it can be parsed by caller as macro link.
+ if key == `email` || strings.HasPrefix(key, `email_`) {
+ val = `mailto:` + val + `[` + val + `]`
+ }
+
+ bb.WriteString(val)
+ x = x + idx + 1
+ }
+
+ return bb.Bytes()
+}
+
+// htmlSubsRepl substitutes special characters with HTML unicode.
+//
+// The special characters are,
+//
+// - (C) replaced with &#169;
+// - (R) : &#174;
+// - (TM) : &#8482;
+// - -- : &#8212; Only replaced if between two word characters, between a
+// word character and a line boundary, or flanked by spaces.
+// When flanked by space characters (e.g., a -- b), the normal spaces are
+// replaced by thin spaces (&#8201;).
+// - ... : &#8230;
+// - -> : &#8594;
+// - => : &#8658;
+// - <- : &#8592;
+// - <= : &#8656;
+// - ' : &#8217;
+//
+// According to [the documentation], this substitution step also recognizes
+// HTML and XML character references as well as decimal and hexadecimal
+// Unicode code points, but we only cover the above right now.
+//
+// [the documentation]: https://docs.asciidoctor.org/asciidoc/latest/subs/replacements/
+func htmlSubsRepl(input []byte) (out []byte) {
+ var (
+ text []byte
+ x int
+ idx int
+ c1 byte
+ nextc byte
+ prevc byte
+ )
+
+ out = make([]byte, 0, len(input))
+
+ for x < len(input) {
+ prevc = c1
+ c1 = input[x]
+
+ x++
+ if x == len(input) {
+ out = append(out, c1)
+ break
+ }
+ nextc = input[x]
+
+ if c1 == '(' {
+ text, idx = indexByteUnescape(input[x:], ')')
+ if len(text) == 1 {
+ if text[0] == 'C' {
+ out = append(out, []byte(htmlSymbolCopyright)...)
+ x = x + idx + 1
+ c1 = ')'
+ continue
+ }
+ if text[0] == 'R' {
+ out = append(out, []byte(htmlSymbolRegistered)...)
+ x = x + idx + 1
+ c1 = ')'
+ continue
+ }
+ } else if len(text) == 2 {
+ if text[0] == 'T' && text[1] == 'M' {
+ out = append(out, []byte(htmlSymbolTrademark)...)
+ x = x + idx + 1
+ c1 = ')'
+ continue
+ }
+ }
+
+ out = append(out, c1)
+ continue
+ }
+ if c1 == '-' {
+ if nextc == '>' {
+ out = append(out, []byte(htmlSymbolSingleRightArrow)...)
+ x++
+ c1 = nextc
+ continue
+ }
+ if nextc == '-' {
+ if x+1 >= len(input) {
+ out = append(out, c1)
+ continue
+ }
+ // set c1 to the third character after '--'.
+ c1 = input[x+1]
+ if libascii.IsSpace(prevc) && libascii.IsSpace(c1) {
+ out = out[:len(out)-1]
+ out = append(out, []byte(htmlSymbolThinSpace)...)
+ out = append(out, []byte(htmlSymbolEmdash)...)
+ out = append(out, []byte(htmlSymbolThinSpace)...)
+ x += 2
+ continue
+ }
+ if libascii.IsAlpha(prevc) && libascii.IsAlpha(c1) {
+ out = append(out, []byte(htmlSymbolEmdash)...)
+ x++
+ continue
+ }
+ }
+ out = append(out, c1)
+ continue
+ }
+ if c1 == '=' {
+ if nextc == '>' {
+ out = append(out, []byte(htmlSymbolDoubleRightArrow)...)
+ x++
+ c1 = nextc
+ continue
+ }
+ out = append(out, c1)
+ continue
+ }
+ if c1 == '<' {
+ if nextc == '-' {
+ out = append(out, []byte(htmlSymbolSingleLeftArrow)...)
+ x++
+ continue
+ }
+ if nextc == '=' {
+ out = append(out, []byte(htmlSymbolDoubleLeftArrow)...)
+ x++
+ continue
+ }
+ out = append(out, c1)
+ continue
+ }
+ if c1 == '.' {
+ if nextc != '.' {
+ out = append(out, c1)
+ continue
+ }
+ if x+1 >= len(input) {
+ out = append(out, c1)
+ continue
+ }
+ // Set c1 to the third character.
+ c1 = input[x+1]
+ if c1 == '.' {
+ out = append(out, []byte(htmlSymbolEllipsis)...)
+ x += 2
+ continue
+ }
+ out = append(out, c1)
+ continue
+ }
+ if c1 == '\'' {
+ if libascii.IsAlpha(prevc) {
+ out = append(out, []byte(htmlSymbolApostrophe)...)
+ continue
+ }
+ out = append(out, c1)
+ continue
+ }
+ out = append(out, c1)
+ }
+ return out
+}
+
+// htmlSubsMacro substitutes macro with its HTML markup.
+func htmlSubsMacro(doc *Document, input []byte, isInlinePass bool) (out []byte) {
+ var (
+ el *element
+ bb bytes.Buffer
+ macroName string
+ x int
+ n int
+ c byte
+ )
+
+ for x < len(input) {
+ c = input[x]
+ if c != ':' {
+ out = append(out, c)
+ x++
+ continue
+ }
+
+ macroName = parseMacroName(input[:x])
+ if len(macroName) == 0 {
+ out = append(out, c)
+ x++
+ continue
+ }
+
+ switch macroName {
+ case macroFootnote:
+ el, n = parseMacroFootnote(doc, input[x+1:])
+ if el == nil {
+ out = append(out, c)
+ x++
+ continue
+ }
+ x += n
+ n = len(out)
+ out = out[:n-len(macroName)] // Undo the macro name
+ bb.Reset()
+ htmlWriteFootnote(el, &bb)
+ out = append(out, bb.Bytes()...)
+
+ case macroFTP, macroHTTPS, macroHTTP, macroIRC, macroLink, macroMailto:
+ el, n = parseURL(doc, macroName, input[x+1:])
+ if el == nil {
+ out = append(out, c)
+ x++
+ continue
+ }
+ x += n
+ n = len(out)
+ out = out[:n-len(macroName)]
+ bb.Reset()
+ htmlWriteURLBegin(el, &bb)
+ if el.child != nil {
+ el.child.toHTML(doc, &bb)
+ }
+ htmlWriteURLEnd(&bb)
+ out = append(out, bb.Bytes()...)
+
+ case macroImage:
+ el, n = parseInlineImage(doc, input[x+1:])
+ if el == nil {
+ out = append(out, c)
+ x++
+ continue
+ }
+ x += n
+ n = len(out)
+ out = out[:n-len(macroName)]
+ bb.Reset()
+ htmlWriteInlineImage(el, &bb)
+ out = append(out, bb.Bytes()...)
+
+ case macroPass:
+ if isInlinePass {
+ // Prevent recursive substitutions.
+ out = append(out, c)
+ x++
+ continue
+ }
+ el, n = parseMacroPass(input[x+1:])
+ if el == nil {
+ out = append(out, c)
+ x++
+ continue
+ }
+ x += n
+ n = len(out)
+ out = out[:n-len(macroName)]
+ bb.Reset()
+ htmlWriteInlinePass(doc, el, &bb)
+ out = append(out, bb.Bytes()...)
+
+ default:
+ out = append(out, c)
+ x++
+ }
+ }
+ return out
+}
+
func htmlWriteBlockBegin(el *element, out io.Writer, addClass string) {
fmt.Fprint(out, "\n<div")
@@ -560,6 +1074,13 @@ func htmlWriteInlineImage(el *element, out io.Writer) {
fmt.Fprint(out, `</span>`)
}
+func htmlWriteInlinePass(doc *Document, el *element, out io.Writer) {
+ var (
+ text []byte = htmlSubs(doc, el)
+ )
+ fmt.Fprint(out, string(text))
+}
+
func htmlWriteListDescription(el *element, out io.Writer) {
var openTag string
if el.isStyleQandA() {