From d9582373ece2f5da42725e22bb90c5daa93316df Mon Sep 17 00:00:00 2001 From: Shulhan Date: Mon, 19 Dec 2022 22:52:09 +0700 Subject: all: support multi line attribute values If the attribute value end with backslash '\', the value continue to the next line. --- _doc/SPECS.adoc | 2 +- document_parser.go | 78 +++++++++++++++++++++++++++++++++++++++++-- document_test.go | 47 ++++++++++++++++++++++++++ parser.go | 46 ------------------------- testdata/html/header.adoc | 12 +++++++ testdata/html/header.exp.html | 34 +++++++++++++++++++ testdata/html/to-html.sh | 5 +++ 7 files changed, 175 insertions(+), 49 deletions(-) create mode 100644 testdata/html/header.adoc create mode 100644 testdata/html/header.exp.html create mode 100755 testdata/html/to-html.sh diff --git a/_doc/SPECS.adoc b/_doc/SPECS.adoc index 6a5244f..7bfae81 100644 --- a/_doc/SPECS.adoc +++ b/_doc/SPECS.adoc @@ -128,7 +128,7 @@ DOC_REV_DATE = 1*2DIGIT 3*ALPHA 4*DIGIT There are also metadata which affect how the document rendered, ---- -DOC_ATTRIBUTE = ":" DOC_ATTR_KEY ":" *STRING LF +DOC_ATTRIBUTE = ":" DOC_ATTR_KEY ":" *LINE ("\" *LINE) LF DOC_ATTR_KEY = ( "toc" / "sectanchors" / "sectlinks" / "imagesdir" / "data-uri" / *META_KEY ) LF diff --git a/document_parser.go b/document_parser.go index e00dd81..1117b99 100644 --- a/document_parser.go +++ b/document_parser.go @@ -6,7 +6,9 @@ package asciidoctor import ( "bytes" "fmt" + "io" + "github.com/shuLhan/share/lib/ascii" "github.com/shuLhan/share/lib/debug" ) @@ -196,6 +198,78 @@ func (docp *documentParser) line(logp string) (spaces, line []byte, ok bool) { return spaces, line, true } +// parseAttribute parse document attribute and return its key and optional +// value. +func (docp *documentParser) parseAttribute(line []byte, strict bool) (key, value string, ok bool) { + var ( + bb bytes.Buffer + p int + x int + ) + + if !(ascii.IsAlnum(line[1]) || line[1] == '_') { + return ``, ``, false + } + + bb.WriteByte(line[1]) + x = 2 + for ; x < len(line); x++ { + if line[x] == ':' { + break + } + if ascii.IsAlnum(line[x]) || line[x] == '_' || + line[x] == '-' || line[x] == '!' { + bb.WriteByte(line[x]) + continue + } + if strict { + return ``, ``, false + } + } + if x == len(line) { + return ``, ``, false + } + + key = bb.String() + + line = line[x+1:] + p = len(line) + if p > 0 && line[p-1] == '\\' { + bb.Reset() + line = line[:p-1] + bb.Write(line) + docp.parseMultiline(&bb) + line = bb.Bytes() + } + + line = bytes.TrimSpace(line) + value = string(line) + + return key, value, true +} + +// parseMultiline multiline value where each line end with `\`. +func (docp *documentParser) parseMultiline(out io.Writer) { + var ( + isMultiline = true + + line []byte + p int + ) + for isMultiline && docp.lineNum < len(docp.lines) { + line = docp.lines[docp.lineNum] + p = len(line) + + if line[p-1] == '\\' { + line = line[:p-1] + } else { + isMultiline = false + } + _, _ = out.Write(line) + docp.lineNum++ + } +} + func (docp *documentParser) parseBlock(parent *element, term int) { var ( logp = `parseBlock` @@ -297,7 +371,7 @@ func (docp *documentParser) parseBlock(parent *element, term int) { case lineKindAttribute: var ( - key, value, ok = parseAttribute(line, false) + key, value, ok = docp.parseAttribute(line, false) ) if ok { if key == attrNameIcons { @@ -635,7 +709,7 @@ func (docp *documentParser) parseHeader() { continue } if line[0] == ':' { - key, value, ok = parseAttribute(line, false) + key, value, ok = docp.parseAttribute(line, false) if ok { docp.doc.Attributes.apply(key, value) } diff --git a/document_test.go b/document_test.go index a6f60e7..42e9b03 100644 --- a/document_test.go +++ b/document_test.go @@ -4,6 +4,7 @@ package asciidoctor import ( + "bytes" "os" "testing" @@ -94,3 +95,49 @@ func TestParse_document_title(t *testing.T) { test.Assert(t, `String`, c.expString, got.Title.String()) } } + +func TestDocument_ToHTML(t *testing.T) { + type testCase struct { + name string + fileAdoc string + fileExpHtml string + } + + var ( + cases = []testCase{{ + name: `header`, + fileAdoc: `testdata/html/header.adoc`, + fileExpHtml: `testdata/html/header.exp.html`, + }} + + c testCase + doc *Document + err error + got bytes.Buffer + exp []byte + ) + + for _, c = range cases { + doc, err = Open(c.fileAdoc) + if err != nil { + t.Fatal(err) + } + + // Since we cannot overwrite the asciidoctor output for + // generator, we override ourself. + doc.Attributes[MetaNameGenerator] = `Asciidoctor 2.0.18` + + got.Reset() + + err = doc.ToHTML(&got) + if err != nil { + t.Fatal(err) + } + + exp, err = os.ReadFile(c.fileExpHtml) + if err != nil { + t.Fatal(err) + } + test.Assert(t, c.name, string(exp), got.String()) + } +} diff --git a/parser.go b/parser.go index fe6e0ff..1eb53c4 100644 --- a/parser.go +++ b/parser.go @@ -553,52 +553,6 @@ func isValidID(id []byte) bool { return true } -// parseAttribute parse document attribute and return its key and optional -// value. -// -// DOC_ATTRIBUTE = ":" DOC_ATTR_KEY ":" *STRING LF -// -// DOC_ATTR_KEY = ( "toc" / "sectanchors" / "sectlinks" -// / "imagesdir" / "data-uri" / *META_KEY ) LF -// -// META_KEY_CHAR = (A..Z | a..z | 0..9 | '_') -// -// META_KEY = 1META_KEY_CHAR *(META_KEY_CHAR | '-') -func parseAttribute(line []byte, strict bool) (key, value string, ok bool) { - var ( - sb strings.Builder - valb []byte - x int - ) - - if !(ascii.IsAlnum(line[1]) || line[1] == '_') { - return ``, ``, false - } - - sb.WriteByte(line[1]) - x = 2 - for ; x < len(line); x++ { - if line[x] == ':' { - break - } - if ascii.IsAlnum(line[x]) || line[x] == '_' || - line[x] == '-' || line[x] == '!' { - sb.WriteByte(line[x]) - continue - } - if strict { - return ``, ``, false - } - } - if x == len(line) { - return ``, ``, false - } - - valb = bytes.TrimSpace(line[x+1:]) - - return sb.String(), string(valb), true -} - // parseAttrRef parse the attribute reference, an attribute key wrapped by // "{" "}". If the attribute reference exist, replace the content with the // attribute value and reset the parser state to zero. diff --git a/testdata/html/header.adoc b/testdata/html/header.adoc new file mode 100644 index 0000000..d807033 --- /dev/null +++ b/testdata/html/header.adoc @@ -0,0 +1,12 @@ += Title +John Doe +v1.0, 15 Dec 2022 +:description: Multiline \ +description \ +with backslash +:keywords: multiline, \ +key, \ +words +:last-update-label!: + +Document body. diff --git a/testdata/html/header.exp.html b/testdata/html/header.exp.html new file mode 100644 index 0000000..54ebd62 --- /dev/null +++ b/testdata/html/header.exp.html @@ -0,0 +1,34 @@ + + + + + + + + + + +Title + + + +
+
+

Document body.

+
+
+ + + \ No newline at end of file diff --git a/testdata/html/to-html.sh b/testdata/html/to-html.sh new file mode 100755 index 0000000..7d45b15 --- /dev/null +++ b/testdata/html/to-html.sh @@ -0,0 +1,5 @@ +#!/bin/sh + +## Script to convert all adoc files to HTML without stylesheet. + +asciidoctor -a stylesheet! -o header.exp.html header.adoc -- cgit v1.3