diff options
| -rw-r--r-- | _doc/SPECS.adoc | 18 | ||||
| -rw-r--r-- | document_parser.go | 115 | ||||
| -rw-r--r-- | document_test.go | 5 | ||||
| -rw-r--r-- | testdata/document_title_test.txt | 2 | ||||
| -rw-r--r-- | testdata/header_with_empty_line_test.txt | 4 | ||||
| -rw-r--r-- | testdata/test.adoc | 4 | ||||
| -rw-r--r-- | testdata/test.got.html | 2 |
7 files changed, 87 insertions, 63 deletions
diff --git a/_doc/SPECS.adoc b/_doc/SPECS.adoc index 3ceb8ae..82f81ba 100644 --- a/_doc/SPECS.adoc +++ b/_doc/SPECS.adoc @@ -67,21 +67,19 @@ REF_ID = 1*ALPHA *("-" / "_" / ALPHA / DIGIT) {url_ref}/document/header/[Reference^]. Document header consist of title and optional authors, a revision, and zero or -more metadata. -The document metadata can be in any order, before or after title, but the -author and revision MUST be after title and in order. +more attributes. +The author and revision MUST be after title and in order. +The document attributes can be in any order, after title, author or +revision. ---- -DOC_HEADER = *(DOC_ATTRIBUTE / COMMENTS) - "=" SP DOC_TITLE LF - (*DOC_ATTRIBUTE) - DOC_AUTHORS LF - (*DOC_ATTRIBUTE) - DOC_REVISION LF +DOC_HEADER = [ "=" SP DOC_TITLE LF + [ DOC_AUTHORS LF + [ DOC_REVISION LF ]]] (*DOC_ATTRIBUTE) + LF ---- -There are no empty line before and after the document header. An empty line mark as the end of document header. === Title diff --git a/document_parser.go b/document_parser.go index 5dc4a2e..3b529fc 100644 --- a/document_parser.go +++ b/document_parser.go @@ -686,72 +686,76 @@ func (docp *documentParser) parseBlock(parent *element, term int) { // The document attributes can be in any order, but the author and revision // MUST be in order. // -// DOC_HEADER = *(DOC_ATTRIBUTE / COMMENTS) -// "=" SP *ADOC_WORD LF -// (*DOC_ATTRIBUTE) -// DOC_AUTHORS LF -// (*DOC_ATTRIBUTE) -// DOC_REVISION LF +// DOC_HEADER = [ "=" SP *ADOC_WORD LF +// [ DOC_AUTHORS LF +// [ DOC_REVISION LF ]]] // (*DOC_ATTRIBUTE) +// LF func (docp *documentParser) parseHeader() { - const ( - stateBegin int = iota - stateTitle - stateAuthor - stateRevision + var ( + logp = `parseHeader` + line []byte + ok bool ) - var ( - logp = `parseHeader` - state = stateBegin + line, ok = docp.skipCommentAndEmptyLine() + if !ok { + return + } + if docp.kind == lineKindText && isTitle(line) { + docp.doc.header.Write(bytes.TrimSpace(line[2:])) + docp.doc.Title.raw = string(docp.doc.header.raw) - key string - value string - line []byte - ok bool - ) - for { _, line, ok = docp.line(logp) if !ok { return } - if len(line) == 0 { + if docp.kind == lineKindText { + docp.doc.rawAuthors = string(line) + + _, line, ok = docp.line(logp) + if !ok { + return + } + if docp.kind == lineKindText { + docp.doc.rawRevision = string(line) + line = nil + } + } + } + + // Parse the rest of attributes until we found an empty line or + // line with non-attribute. + for { + if line == nil { + _, line, ok = docp.line(logp) + if !ok { + return + } + } + if docp.kind == lineKindEmpty { return } - if bytes.HasPrefix(line, []byte(`////`)) { + if docp.kind == lineKindBlockComment { docp.parseIgnoreCommentBlock() + line = nil continue } - if bytes.HasPrefix(line, []byte(`//`)) { + if docp.kind == lineKindComment { + line = nil continue } - if line[0] == ':' { + if docp.kind == lineKindAttribute { + var key, value string key, value, ok = docp.parseAttribute(line, false) if ok { docp.doc.Attributes.apply(key, value) } + line = nil continue } - if state == stateBegin { - if isTitle(line) { - docp.doc.header.Write(bytes.TrimSpace(line[2:])) - docp.doc.Title.raw = string(docp.doc.header.raw) - state = stateTitle - } else { - docp.doc.rawAuthors = string(line) - state = stateAuthor - } - continue - } - switch state { - case stateTitle: - docp.doc.rawAuthors = string(line) - state = stateAuthor - - case stateAuthor: - docp.doc.rawRevision = string(line) - state = stateRevision - } + docp.lineNum-- + break } } @@ -1572,3 +1576,26 @@ func (docp *documentParser) parseParagraph(parent, el *element, line []byte, ter el.parseInlineMarkup(docp.doc, elKindText) return line } + +func (docp *documentParser) skipCommentAndEmptyLine() (line []byte, ok bool) { + var logp = `skipCommentAndEmptyLine` + + for { + _, line, ok = docp.line(logp) + if !ok { + return nil, false + } + if docp.kind == lineKindEmpty { + continue + } + if docp.kind == lineKindBlockComment { + docp.parseIgnoreCommentBlock() + continue + } + if docp.kind == lineKindComment { + continue + } + break + } + return line, true +} diff --git a/document_test.go b/document_test.go index b15781c..c4cc995 100644 --- a/document_test.go +++ b/document_test.go @@ -71,8 +71,9 @@ func TestParse_document_title(t *testing.T) { expString: `a: b: c`, }, { // With custom separator. - content: `:title-separator: x -= Mainx sub`, + content: ` += Mainx sub +:title-separator: x`, exp: DocumentTitle{ Main: `Main`, Sub: `sub`, diff --git a/testdata/document_title_test.txt b/testdata/document_title_test.txt index 9856204..95d1e35 100644 --- a/testdata/document_title_test.txt +++ b/testdata/document_title_test.txt @@ -25,8 +25,8 @@ output_call: htmlWriteHeader </div> >>> With custom separator -:title-separator: x = Mainx sub +:title-separator: x <<< With custom separator <div id="header"> diff --git a/testdata/header_with_empty_line_test.txt b/testdata/header_with_empty_line_test.txt index c164769..30c7e7c 100644 --- a/testdata/header_with_empty_line_test.txt +++ b/testdata/header_with_empty_line_test.txt @@ -12,11 +12,9 @@ Below is empty line with spaces. <<< <div id="header"> +<h1>Title</h1> </div> <div id="content"> -<div class="paragraph"> -<p>= Title</p> -</div> </div> <div id="footer"> <div id="footer-text"> diff --git a/testdata/test.adoc b/testdata/test.adoc index ee99271..e5c4109 100644 --- a/testdata/test.adoc +++ b/testdata/test.adoc @@ -1,10 +1,10 @@ // SPDX-FileCopyrightText: 2020 M. Shulhan <ms@kilabit.info> // SPDX-License-Identifier: GPL-3.0-or-later = _Example `Document` **title**_ -:metadata key: value Author A <a@a.com>; Author mid_dle B <b@b.com> -:unclosed metadata: v1.1.1, 18 July 2020: remark +:metadata key: value +:unclosed metadata: :sectnums: :sectlinks: :sectanchors: diff --git a/testdata/test.got.html b/testdata/test.got.html index b567740..2299f90 100644 --- a/testdata/test.got.html +++ b/testdata/test.got.html @@ -3056,7 +3056,7 @@ this sidebar.</p> <div id="footer"> <div id="footer-text"> 1.1.1<br> -Last updated 2024-04-04 21:22:35 +0700 +Last updated 2024-08-12 23:31:24 +0700 </div> </div> </body> |
