From e98aea8108d267fc5f8364d0e611698f413095c5 Mon Sep 17 00:00:00 2001 From: Shulhan Date: Mon, 12 Aug 2024 23:40:43 +0700 Subject: all: use strict document header format Previously, an empty line before Document Title cause the parser stop parsing the document header, now an empty lines are skipped. Also document attribute can be place anywhere, either before or after title, and in between attributes; now it can be only placed after revision or author or title. --- _doc/SPECS.adoc | 18 +++-- document_parser.go | 115 +++++++++++++++++++------------ document_test.go | 5 +- testdata/document_title_test.txt | 2 +- testdata/header_with_empty_line_test.txt | 4 +- testdata/test.adoc | 4 +- testdata/test.got.html | 2 +- 7 files changed, 87 insertions(+), 63 deletions(-) diff --git a/_doc/SPECS.adoc b/_doc/SPECS.adoc index 3ceb8ae..82f81ba 100644 --- a/_doc/SPECS.adoc +++ b/_doc/SPECS.adoc @@ -67,21 +67,19 @@ REF_ID = 1*ALPHA *("-" / "_" / ALPHA / DIGIT) {url_ref}/document/header/[Reference^]. Document header consist of title and optional authors, a revision, and zero or -more metadata. -The document metadata can be in any order, before or after title, but the -author and revision MUST be after title and in order. +more attributes. +The author and revision MUST be after title and in order. +The document attributes can be in any order, after title, author or +revision. ---- -DOC_HEADER = *(DOC_ATTRIBUTE / COMMENTS) - "=" SP DOC_TITLE LF - (*DOC_ATTRIBUTE) - DOC_AUTHORS LF - (*DOC_ATTRIBUTE) - DOC_REVISION LF +DOC_HEADER = [ "=" SP DOC_TITLE LF + [ DOC_AUTHORS LF + [ DOC_REVISION LF ]]] (*DOC_ATTRIBUTE) + LF ---- -There are no empty line before and after the document header. An empty line mark as the end of document header. === Title diff --git a/document_parser.go b/document_parser.go index 5dc4a2e..3b529fc 100644 --- a/document_parser.go +++ b/document_parser.go @@ -686,72 +686,76 @@ func (docp *documentParser) parseBlock(parent *element, term int) { // The document attributes can be in any order, but the author and revision // MUST be in order. // -// DOC_HEADER = *(DOC_ATTRIBUTE / COMMENTS) -// "=" SP *ADOC_WORD LF -// (*DOC_ATTRIBUTE) -// DOC_AUTHORS LF -// (*DOC_ATTRIBUTE) -// DOC_REVISION LF +// DOC_HEADER = [ "=" SP *ADOC_WORD LF +// [ DOC_AUTHORS LF +// [ DOC_REVISION LF ]]] // (*DOC_ATTRIBUTE) +// LF func (docp *documentParser) parseHeader() { - const ( - stateBegin int = iota - stateTitle - stateAuthor - stateRevision + var ( + logp = `parseHeader` + line []byte + ok bool ) - var ( - logp = `parseHeader` - state = stateBegin + line, ok = docp.skipCommentAndEmptyLine() + if !ok { + return + } + if docp.kind == lineKindText && isTitle(line) { + docp.doc.header.Write(bytes.TrimSpace(line[2:])) + docp.doc.Title.raw = string(docp.doc.header.raw) - key string - value string - line []byte - ok bool - ) - for { _, line, ok = docp.line(logp) if !ok { return } - if len(line) == 0 { + if docp.kind == lineKindText { + docp.doc.rawAuthors = string(line) + + _, line, ok = docp.line(logp) + if !ok { + return + } + if docp.kind == lineKindText { + docp.doc.rawRevision = string(line) + line = nil + } + } + } + + // Parse the rest of attributes until we found an empty line or + // line with non-attribute. + for { + if line == nil { + _, line, ok = docp.line(logp) + if !ok { + return + } + } + if docp.kind == lineKindEmpty { return } - if bytes.HasPrefix(line, []byte(`////`)) { + if docp.kind == lineKindBlockComment { docp.parseIgnoreCommentBlock() + line = nil continue } - if bytes.HasPrefix(line, []byte(`//`)) { + if docp.kind == lineKindComment { + line = nil continue } - if line[0] == ':' { + if docp.kind == lineKindAttribute { + var key, value string key, value, ok = docp.parseAttribute(line, false) if ok { docp.doc.Attributes.apply(key, value) } + line = nil continue } - if state == stateBegin { - if isTitle(line) { - docp.doc.header.Write(bytes.TrimSpace(line[2:])) - docp.doc.Title.raw = string(docp.doc.header.raw) - state = stateTitle - } else { - docp.doc.rawAuthors = string(line) - state = stateAuthor - } - continue - } - switch state { - case stateTitle: - docp.doc.rawAuthors = string(line) - state = stateAuthor - - case stateAuthor: - docp.doc.rawRevision = string(line) - state = stateRevision - } + docp.lineNum-- + break } } @@ -1572,3 +1576,26 @@ func (docp *documentParser) parseParagraph(parent, el *element, line []byte, ter el.parseInlineMarkup(docp.doc, elKindText) return line } + +func (docp *documentParser) skipCommentAndEmptyLine() (line []byte, ok bool) { + var logp = `skipCommentAndEmptyLine` + + for { + _, line, ok = docp.line(logp) + if !ok { + return nil, false + } + if docp.kind == lineKindEmpty { + continue + } + if docp.kind == lineKindBlockComment { + docp.parseIgnoreCommentBlock() + continue + } + if docp.kind == lineKindComment { + continue + } + break + } + return line, true +} diff --git a/document_test.go b/document_test.go index b15781c..c4cc995 100644 --- a/document_test.go +++ b/document_test.go @@ -71,8 +71,9 @@ func TestParse_document_title(t *testing.T) { expString: `a: b: c`, }, { // With custom separator. - content: `:title-separator: x -= Mainx sub`, + content: ` += Mainx sub +:title-separator: x`, exp: DocumentTitle{ Main: `Main`, Sub: `sub`, diff --git a/testdata/document_title_test.txt b/testdata/document_title_test.txt index 9856204..95d1e35 100644 --- a/testdata/document_title_test.txt +++ b/testdata/document_title_test.txt @@ -25,8 +25,8 @@ output_call: htmlWriteHeader >>> With custom separator -:title-separator: x = Mainx sub +:title-separator: x <<< With custom separator