diff options
Diffstat (limited to 'html_backend.go')
| -rw-r--r-- | html_backend.go | 521 |
1 files changed, 521 insertions, 0 deletions
diff --git a/html_backend.go b/html_backend.go index 4c2e467..1bbefcb 100644 --- a/html_backend.go +++ b/html_backend.go @@ -9,6 +9,7 @@ import ( "io" "strings" + libascii "github.com/shuLhan/share/lib/ascii" libstrings "github.com/shuLhan/share/lib/strings" ) @@ -60,6 +61,519 @@ const ( htmlSymbolZeroWidthSpace = `​` ) +// htmlSubs apply the text substitutions to element.raw based on applySubs in +// the following order: c, q, a, r, m, p. +// If applySubs is 0, it will return element.raw as is. +func htmlSubs(doc *Document, el *element) []byte { + var ( + input = el.raw + ) + if el.applySubs == 0 { + return input + } + if el.applySubs&passSubChar != 0 { + input = htmlSubsChar(input) + } + if el.applySubs&passSubQuote != 0 { + input = htmlSubsQuote(input) + } + if el.applySubs&passSubAttr != 0 { + input = htmlSubsAttr(doc, input) + } + if el.applySubs&passSubRepl != 0 { + input = htmlSubsRepl(input) + } + if el.applySubs&passSubMacro != 0 { + input = htmlSubsMacro(doc, input, el.kind == elKindInlinePass) + } + return input +} + +// htmlSubsChar replace character '<', '>', and '&' with "<", ">", and +// "&". +// +// Ref: https://docs.asciidoctor.org/asciidoc/latest/subs/special-characters/ +func htmlSubsChar(input []byte) []byte { + var ( + bb bytes.Buffer + c byte + ) + for _, c = range input { + if c == '<' { + bb.WriteString(`<`) + continue + } + if c == '>' { + bb.WriteString(`>`) + continue + } + if c == '&' { + bb.WriteString(`&`) + continue + } + bb.WriteByte(c) + } + return bb.Bytes() +} + +// htmlSubsQuote replace inline markup with its HTML markup. +// The following inline markup ara parsed and substitutes, +// +// - emphasis: _word_ with "<em>word</em>". +// - strong: *word* with "<strong>word</strong>". +// - monospace: `word` with "<code>word</code>". +// - superscript: ^word^ with "<sup>word</sup>". +// - subscript: ~word~ with "<sub>word</sub>". +// - double curved quotes: "`word`" with "“word”" +// - single curved quotes: '`word`' with "‘word’" +// +// Ref: https://docs.asciidoctor.org/asciidoc/latest/subs/quotes/ +func htmlSubsQuote(input []byte) []byte { + var ( + bb bytes.Buffer + x int + idx int + text []byte + c1 byte + nextc byte + ) + for x < len(input) { + c1 = input[x] + + x++ + if x == len(input) { + // Nothing left to parsed. + bb.WriteByte(c1) + break + } + nextc = input[x] + + if c1 == '_' { + text, idx = indexByteUnescape(input[x:], c1) + if text == nil { + bb.WriteByte(c1) + continue + } + bb.WriteString(`<em>`) + bb.Write(text) + bb.WriteString(`</em>`) + x = x + idx + 1 + continue + } + if c1 == '*' { + text, idx = indexByteUnescape(input[x:], c1) + if text == nil { + bb.WriteByte(c1) + continue + } + bb.WriteString(`<strong>`) + bb.Write(text) + bb.WriteString(`</strong>`) + x = x + idx + 1 + continue + } + if c1 == '`' { + text, idx = indexByteUnescape(input[x:], c1) + if text == nil { + bb.WriteByte(c1) + continue + } + bb.WriteString(`<code>`) + bb.Write(text) + bb.WriteString(`</code>`) + x = x + idx + 1 + continue + } + if c1 == '^' { + text, idx = indexByteUnescape(input[x:], c1) + if text == nil { + bb.WriteByte(c1) + continue + } + bb.WriteString(`<sup>`) + bb.Write(text) + bb.WriteString(`</sup>`) + x = x + idx + 1 + continue + } + if c1 == '~' { + text, idx = indexByteUnescape(input[x:], c1) + if text == nil { + bb.WriteByte(c1) + continue + } + bb.WriteString(`<sub>`) + bb.Write(text) + bb.WriteString(`</sub>`) + x = x + idx + 1 + continue + } + if c1 == '"' { + if nextc != '`' { + bb.WriteByte(c1) + continue + } + if x+1 == len(input) { + bb.WriteByte(c1) + continue + } + + text, idx = indexUnescape(input[x+1:], []byte("`\"")) + if text == nil { + bb.WriteByte(c1) + continue + } + bb.WriteString(htmlSymbolLeftDoubleQuote) + bb.Write(text) + bb.WriteString(htmlSymbolRightDoubleQuote) + x = x + idx + 3 + continue + } + if c1 == '\'' { + if nextc != '`' { + bb.WriteByte(c1) + continue + } + if x+1 == len(input) { + bb.WriteByte(c1) + continue + } + + text, idx = indexUnescape(input[x+1:], []byte("`'")) + if text == nil { + bb.WriteByte(c1) + continue + } + bb.WriteString(htmlSymbolLeftSingleQuote) + bb.Write(text) + bb.WriteString(htmlSymbolRightSingleQuote) + x = x + idx + 3 + continue + } + bb.WriteByte(c1) + } + return bb.Bytes() +} + +// htmlSubsAttr replace attribute (the `{...}`) with its values. +// +// Ref: https://docs.asciidoctor.org/asciidoc/latest/subs/attributes/ +func htmlSubsAttr(doc *Document, input []byte) []byte { + var ( + bb bytes.Buffer + key string + val string + vbytes []byte + idx int + x int + c byte + ok bool + ) + + for x < len(input) { + c = input[x] + x++ + if c != '{' { + bb.WriteByte(c) + continue + } + + vbytes, idx = indexByteUnescape(input[x:], '}') + if vbytes == nil { + bb.WriteByte(c) + continue + } + vbytes = bytes.TrimSpace(vbytes) + vbytes = bytes.ToLower(vbytes) + + key = string(vbytes) + val, ok = _attrRef[key] + if ok { + bb.WriteString(val) + x = x + idx + 1 + continue + } + + val, ok = doc.Attributes[key] + if !ok { + bb.WriteByte(c) + continue + } + + // Add prefix "mailto:" if the ref name start with email, so + // it can be parsed by caller as macro link. + if key == `email` || strings.HasPrefix(key, `email_`) { + val = `mailto:` + val + `[` + val + `]` + } + + bb.WriteString(val) + x = x + idx + 1 + } + + return bb.Bytes() +} + +// htmlSubsRepl substitutes special characters with HTML unicode. +// +// The special characters are, +// +// - (C) replaced with © +// - (R) : ® +// - (TM) : ™ +// - -- : — Only replaced if between two word characters, between a +// word character and a line boundary, or flanked by spaces. +// When flanked by space characters (e.g., a -- b), the normal spaces are +// replaced by thin spaces ( ). +// - ... : … +// - -> : → +// - => : ⇒ +// - <- : ← +// - <= : ⇐ +// - ' : ’ +// +// According to [the documentation], this substitution step also recognizes +// HTML and XML character references as well as decimal and hexadecimal +// Unicode code points, but we only cover the above right now. +// +// [the documentation]: https://docs.asciidoctor.org/asciidoc/latest/subs/replacements/ +func htmlSubsRepl(input []byte) (out []byte) { + var ( + text []byte + x int + idx int + c1 byte + nextc byte + prevc byte + ) + + out = make([]byte, 0, len(input)) + + for x < len(input) { + prevc = c1 + c1 = input[x] + + x++ + if x == len(input) { + out = append(out, c1) + break + } + nextc = input[x] + + if c1 == '(' { + text, idx = indexByteUnescape(input[x:], ')') + if len(text) == 1 { + if text[0] == 'C' { + out = append(out, []byte(htmlSymbolCopyright)...) + x = x + idx + 1 + c1 = ')' + continue + } + if text[0] == 'R' { + out = append(out, []byte(htmlSymbolRegistered)...) + x = x + idx + 1 + c1 = ')' + continue + } + } else if len(text) == 2 { + if text[0] == 'T' && text[1] == 'M' { + out = append(out, []byte(htmlSymbolTrademark)...) + x = x + idx + 1 + c1 = ')' + continue + } + } + + out = append(out, c1) + continue + } + if c1 == '-' { + if nextc == '>' { + out = append(out, []byte(htmlSymbolSingleRightArrow)...) + x++ + c1 = nextc + continue + } + if nextc == '-' { + if x+1 >= len(input) { + out = append(out, c1) + continue + } + // set c1 to the third character after '--'. + c1 = input[x+1] + if libascii.IsSpace(prevc) && libascii.IsSpace(c1) { + out = out[:len(out)-1] + out = append(out, []byte(htmlSymbolThinSpace)...) + out = append(out, []byte(htmlSymbolEmdash)...) + out = append(out, []byte(htmlSymbolThinSpace)...) + x += 2 + continue + } + if libascii.IsAlpha(prevc) && libascii.IsAlpha(c1) { + out = append(out, []byte(htmlSymbolEmdash)...) + x++ + continue + } + } + out = append(out, c1) + continue + } + if c1 == '=' { + if nextc == '>' { + out = append(out, []byte(htmlSymbolDoubleRightArrow)...) + x++ + c1 = nextc + continue + } + out = append(out, c1) + continue + } + if c1 == '<' { + if nextc == '-' { + out = append(out, []byte(htmlSymbolSingleLeftArrow)...) + x++ + continue + } + if nextc == '=' { + out = append(out, []byte(htmlSymbolDoubleLeftArrow)...) + x++ + continue + } + out = append(out, c1) + continue + } + if c1 == '.' { + if nextc != '.' { + out = append(out, c1) + continue + } + if x+1 >= len(input) { + out = append(out, c1) + continue + } + // Set c1 to the third character. + c1 = input[x+1] + if c1 == '.' { + out = append(out, []byte(htmlSymbolEllipsis)...) + x += 2 + continue + } + out = append(out, c1) + continue + } + if c1 == '\'' { + if libascii.IsAlpha(prevc) { + out = append(out, []byte(htmlSymbolApostrophe)...) + continue + } + out = append(out, c1) + continue + } + out = append(out, c1) + } + return out +} + +// htmlSubsMacro substitutes macro with its HTML markup. +func htmlSubsMacro(doc *Document, input []byte, isInlinePass bool) (out []byte) { + var ( + el *element + bb bytes.Buffer + macroName string + x int + n int + c byte + ) + + for x < len(input) { + c = input[x] + if c != ':' { + out = append(out, c) + x++ + continue + } + + macroName = parseMacroName(input[:x]) + if len(macroName) == 0 { + out = append(out, c) + x++ + continue + } + + switch macroName { + case macroFootnote: + el, n = parseMacroFootnote(doc, input[x+1:]) + if el == nil { + out = append(out, c) + x++ + continue + } + x += n + n = len(out) + out = out[:n-len(macroName)] // Undo the macro name + bb.Reset() + htmlWriteFootnote(el, &bb) + out = append(out, bb.Bytes()...) + + case macroFTP, macroHTTPS, macroHTTP, macroIRC, macroLink, macroMailto: + el, n = parseURL(doc, macroName, input[x+1:]) + if el == nil { + out = append(out, c) + x++ + continue + } + x += n + n = len(out) + out = out[:n-len(macroName)] + bb.Reset() + htmlWriteURLBegin(el, &bb) + if el.child != nil { + el.child.toHTML(doc, &bb) + } + htmlWriteURLEnd(&bb) + out = append(out, bb.Bytes()...) + + case macroImage: + el, n = parseInlineImage(doc, input[x+1:]) + if el == nil { + out = append(out, c) + x++ + continue + } + x += n + n = len(out) + out = out[:n-len(macroName)] + bb.Reset() + htmlWriteInlineImage(el, &bb) + out = append(out, bb.Bytes()...) + + case macroPass: + if isInlinePass { + // Prevent recursive substitutions. + out = append(out, c) + x++ + continue + } + el, n = parseMacroPass(input[x+1:]) + if el == nil { + out = append(out, c) + x++ + continue + } + x += n + n = len(out) + out = out[:n-len(macroName)] + bb.Reset() + htmlWriteInlinePass(doc, el, &bb) + out = append(out, bb.Bytes()...) + + default: + out = append(out, c) + x++ + } + } + return out +} + func htmlWriteBlockBegin(el *element, out io.Writer, addClass string) { fmt.Fprint(out, "\n<div") @@ -560,6 +1074,13 @@ func htmlWriteInlineImage(el *element, out io.Writer) { fmt.Fprint(out, `</span>`) } +func htmlWriteInlinePass(doc *Document, el *element, out io.Writer) { + var ( + text []byte = htmlSubs(doc, el) + ) + fmt.Fprint(out, string(text)) +} + func htmlWriteListDescription(el *element, out io.Writer) { var openTag string if el.isStyleQandA() { |
