diff options
| author | Shulhan <ms@kilabit.info> | 2024-08-12 23:40:43 +0700 |
|---|---|---|
| committer | Shulhan <ms@kilabit.info> | 2024-08-12 23:40:43 +0700 |
| commit | e98aea8108d267fc5f8364d0e611698f413095c5 (patch) | |
| tree | 4d57df6f2b6af99e0c798079f6347728101b99c8 | |
| parent | 9a2bcad4d62c7fd93a3411e0a60cfa07c2b04051 (diff) | |
| download | asciidoctor-go-e98aea8108d267fc5f8364d0e611698f413095c5.tar.xz | |
all: use strict document header format
Previously, an empty line before Document Title cause the parser
stop parsing the document header, now an empty lines are skipped.
Also document attribute can be place anywhere, either before or
after title, and in between attributes; now it can be only placed
after revision or author or title.
| -rw-r--r-- | _doc/SPECS.adoc | 18 | ||||
| -rw-r--r-- | document_parser.go | 115 | ||||
| -rw-r--r-- | document_test.go | 5 | ||||
| -rw-r--r-- | testdata/document_title_test.txt | 2 | ||||
| -rw-r--r-- | testdata/header_with_empty_line_test.txt | 4 | ||||
| -rw-r--r-- | testdata/test.adoc | 4 | ||||
| -rw-r--r-- | testdata/test.got.html | 2 |
7 files changed, 87 insertions, 63 deletions
diff --git a/_doc/SPECS.adoc b/_doc/SPECS.adoc index 3ceb8ae..82f81ba 100644 --- a/_doc/SPECS.adoc +++ b/_doc/SPECS.adoc @@ -67,21 +67,19 @@ REF_ID = 1*ALPHA *("-" / "_" / ALPHA / DIGIT) {url_ref}/document/header/[Reference^]. Document header consist of title and optional authors, a revision, and zero or -more metadata. -The document metadata can be in any order, before or after title, but the -author and revision MUST be after title and in order. +more attributes. +The author and revision MUST be after title and in order. +The document attributes can be in any order, after title, author or +revision. ---- -DOC_HEADER = *(DOC_ATTRIBUTE / COMMENTS) - "=" SP DOC_TITLE LF - (*DOC_ATTRIBUTE) - DOC_AUTHORS LF - (*DOC_ATTRIBUTE) - DOC_REVISION LF +DOC_HEADER = [ "=" SP DOC_TITLE LF + [ DOC_AUTHORS LF + [ DOC_REVISION LF ]]] (*DOC_ATTRIBUTE) + LF ---- -There are no empty line before and after the document header. An empty line mark as the end of document header. === Title diff --git a/document_parser.go b/document_parser.go index 5dc4a2e..3b529fc 100644 --- a/document_parser.go +++ b/document_parser.go @@ -686,72 +686,76 @@ func (docp *documentParser) parseBlock(parent *element, term int) { // The document attributes can be in any order, but the author and revision // MUST be in order. // -// DOC_HEADER = *(DOC_ATTRIBUTE / COMMENTS) -// "=" SP *ADOC_WORD LF -// (*DOC_ATTRIBUTE) -// DOC_AUTHORS LF -// (*DOC_ATTRIBUTE) -// DOC_REVISION LF +// DOC_HEADER = [ "=" SP *ADOC_WORD LF +// [ DOC_AUTHORS LF +// [ DOC_REVISION LF ]]] // (*DOC_ATTRIBUTE) +// LF func (docp *documentParser) parseHeader() { - const ( - stateBegin int = iota - stateTitle - stateAuthor - stateRevision + var ( + logp = `parseHeader` + line []byte + ok bool ) - var ( - logp = `parseHeader` - state = stateBegin + line, ok = docp.skipCommentAndEmptyLine() + if !ok { + return + } + if docp.kind == lineKindText && isTitle(line) { + docp.doc.header.Write(bytes.TrimSpace(line[2:])) + docp.doc.Title.raw = string(docp.doc.header.raw) - key string - value string - line []byte - ok bool - ) - for { _, line, ok = docp.line(logp) if !ok { return } - if len(line) == 0 { + if docp.kind == lineKindText { + docp.doc.rawAuthors = string(line) + + _, line, ok = docp.line(logp) + if !ok { + return + } + if docp.kind == lineKindText { + docp.doc.rawRevision = string(line) + line = nil + } + } + } + + // Parse the rest of attributes until we found an empty line or + // line with non-attribute. + for { + if line == nil { + _, line, ok = docp.line(logp) + if !ok { + return + } + } + if docp.kind == lineKindEmpty { return } - if bytes.HasPrefix(line, []byte(`////`)) { + if docp.kind == lineKindBlockComment { docp.parseIgnoreCommentBlock() + line = nil continue } - if bytes.HasPrefix(line, []byte(`//`)) { + if docp.kind == lineKindComment { + line = nil continue } - if line[0] == ':' { + if docp.kind == lineKindAttribute { + var key, value string key, value, ok = docp.parseAttribute(line, false) if ok { docp.doc.Attributes.apply(key, value) } + line = nil continue } - if state == stateBegin { - if isTitle(line) { - docp.doc.header.Write(bytes.TrimSpace(line[2:])) - docp.doc.Title.raw = string(docp.doc.header.raw) - state = stateTitle - } else { - docp.doc.rawAuthors = string(line) - state = stateAuthor - } - continue - } - switch state { - case stateTitle: - docp.doc.rawAuthors = string(line) - state = stateAuthor - - case stateAuthor: - docp.doc.rawRevision = string(line) - state = stateRevision - } + docp.lineNum-- + break } } @@ -1572,3 +1576,26 @@ func (docp *documentParser) parseParagraph(parent, el *element, line []byte, ter el.parseInlineMarkup(docp.doc, elKindText) return line } + +func (docp *documentParser) skipCommentAndEmptyLine() (line []byte, ok bool) { + var logp = `skipCommentAndEmptyLine` + + for { + _, line, ok = docp.line(logp) + if !ok { + return nil, false + } + if docp.kind == lineKindEmpty { + continue + } + if docp.kind == lineKindBlockComment { + docp.parseIgnoreCommentBlock() + continue + } + if docp.kind == lineKindComment { + continue + } + break + } + return line, true +} diff --git a/document_test.go b/document_test.go index b15781c..c4cc995 100644 --- a/document_test.go +++ b/document_test.go @@ -71,8 +71,9 @@ func TestParse_document_title(t *testing.T) { expString: `a: b: c`, }, { // With custom separator. - content: `:title-separator: x -= Mainx sub`, + content: ` += Mainx sub +:title-separator: x`, exp: DocumentTitle{ Main: `Main`, Sub: `sub`, diff --git a/testdata/document_title_test.txt b/testdata/document_title_test.txt index 9856204..95d1e35 100644 --- a/testdata/document_title_test.txt +++ b/testdata/document_title_test.txt @@ -25,8 +25,8 @@ output_call: htmlWriteHeader </div> >>> With custom separator -:title-separator: x = Mainx sub +:title-separator: x <<< With custom separator <div id="header"> diff --git a/testdata/header_with_empty_line_test.txt b/testdata/header_with_empty_line_test.txt index c164769..30c7e7c 100644 --- a/testdata/header_with_empty_line_test.txt +++ b/testdata/header_with_empty_line_test.txt @@ -12,11 +12,9 @@ Below is empty line with spaces. <<< <div id="header"> +<h1>Title</h1> </div> <div id="content"> -<div class="paragraph"> -<p>= Title</p> -</div> </div> <div id="footer"> <div id="footer-text"> diff --git a/testdata/test.adoc b/testdata/test.adoc index ee99271..e5c4109 100644 --- a/testdata/test.adoc +++ b/testdata/test.adoc @@ -1,10 +1,10 @@ // SPDX-FileCopyrightText: 2020 M. Shulhan <ms@kilabit.info> // SPDX-License-Identifier: GPL-3.0-or-later = _Example `Document` **title**_ -:metadata key: value Author A <a@a.com>; Author mid_dle B <b@b.com> -:unclosed metadata: v1.1.1, 18 July 2020: remark +:metadata key: value +:unclosed metadata: :sectnums: :sectlinks: :sectanchors: diff --git a/testdata/test.got.html b/testdata/test.got.html index b567740..2299f90 100644 --- a/testdata/test.got.html +++ b/testdata/test.got.html @@ -3056,7 +3056,7 @@ this sidebar.</p> <div id="footer"> <div id="footer-text"> 1.1.1<br> -Last updated 2024-04-04 21:22:35 +0700 +Last updated 2024-08-12 23:31:24 +0700 </div> </div> </body> |
