summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorShulhan <ms@kilabit.info>2024-08-12 23:40:43 +0700
committerShulhan <ms@kilabit.info>2024-08-12 23:40:43 +0700
commite98aea8108d267fc5f8364d0e611698f413095c5 (patch)
tree4d57df6f2b6af99e0c798079f6347728101b99c8
parent9a2bcad4d62c7fd93a3411e0a60cfa07c2b04051 (diff)
downloadasciidoctor-go-e98aea8108d267fc5f8364d0e611698f413095c5.tar.xz
all: use strict document header format
Previously, an empty line before Document Title cause the parser stop parsing the document header, now an empty lines are skipped. Also document attribute can be place anywhere, either before or after title, and in between attributes; now it can be only placed after revision or author or title.
-rw-r--r--_doc/SPECS.adoc18
-rw-r--r--document_parser.go115
-rw-r--r--document_test.go5
-rw-r--r--testdata/document_title_test.txt2
-rw-r--r--testdata/header_with_empty_line_test.txt4
-rw-r--r--testdata/test.adoc4
-rw-r--r--testdata/test.got.html2
7 files changed, 87 insertions, 63 deletions
diff --git a/_doc/SPECS.adoc b/_doc/SPECS.adoc
index 3ceb8ae..82f81ba 100644
--- a/_doc/SPECS.adoc
+++ b/_doc/SPECS.adoc
@@ -67,21 +67,19 @@ REF_ID = 1*ALPHA *("-" / "_" / ALPHA / DIGIT)
{url_ref}/document/header/[Reference^].
Document header consist of title and optional authors, a revision, and zero or
-more metadata.
-The document metadata can be in any order, before or after title, but the
-author and revision MUST be after title and in order.
+more attributes.
+The author and revision MUST be after title and in order.
+The document attributes can be in any order, after title, author or
+revision.
----
-DOC_HEADER = *(DOC_ATTRIBUTE / COMMENTS)
- "=" SP DOC_TITLE LF
- (*DOC_ATTRIBUTE)
- DOC_AUTHORS LF
- (*DOC_ATTRIBUTE)
- DOC_REVISION LF
+DOC_HEADER = [ "=" SP DOC_TITLE LF
+ [ DOC_AUTHORS LF
+ [ DOC_REVISION LF ]]]
(*DOC_ATTRIBUTE)
+ LF
----
-There are no empty line before and after the document header.
An empty line mark as the end of document header.
=== Title
diff --git a/document_parser.go b/document_parser.go
index 5dc4a2e..3b529fc 100644
--- a/document_parser.go
+++ b/document_parser.go
@@ -686,72 +686,76 @@ func (docp *documentParser) parseBlock(parent *element, term int) {
// The document attributes can be in any order, but the author and revision
// MUST be in order.
//
-// DOC_HEADER = *(DOC_ATTRIBUTE / COMMENTS)
-// "=" SP *ADOC_WORD LF
-// (*DOC_ATTRIBUTE)
-// DOC_AUTHORS LF
-// (*DOC_ATTRIBUTE)
-// DOC_REVISION LF
+// DOC_HEADER = [ "=" SP *ADOC_WORD LF
+// [ DOC_AUTHORS LF
+// [ DOC_REVISION LF ]]]
// (*DOC_ATTRIBUTE)
+// LF
func (docp *documentParser) parseHeader() {
- const (
- stateBegin int = iota
- stateTitle
- stateAuthor
- stateRevision
+ var (
+ logp = `parseHeader`
+ line []byte
+ ok bool
)
- var (
- logp = `parseHeader`
- state = stateBegin
+ line, ok = docp.skipCommentAndEmptyLine()
+ if !ok {
+ return
+ }
+ if docp.kind == lineKindText && isTitle(line) {
+ docp.doc.header.Write(bytes.TrimSpace(line[2:]))
+ docp.doc.Title.raw = string(docp.doc.header.raw)
- key string
- value string
- line []byte
- ok bool
- )
- for {
_, line, ok = docp.line(logp)
if !ok {
return
}
- if len(line) == 0 {
+ if docp.kind == lineKindText {
+ docp.doc.rawAuthors = string(line)
+
+ _, line, ok = docp.line(logp)
+ if !ok {
+ return
+ }
+ if docp.kind == lineKindText {
+ docp.doc.rawRevision = string(line)
+ line = nil
+ }
+ }
+ }
+
+ // Parse the rest of attributes until we found an empty line or
+ // line with non-attribute.
+ for {
+ if line == nil {
+ _, line, ok = docp.line(logp)
+ if !ok {
+ return
+ }
+ }
+ if docp.kind == lineKindEmpty {
return
}
- if bytes.HasPrefix(line, []byte(`////`)) {
+ if docp.kind == lineKindBlockComment {
docp.parseIgnoreCommentBlock()
+ line = nil
continue
}
- if bytes.HasPrefix(line, []byte(`//`)) {
+ if docp.kind == lineKindComment {
+ line = nil
continue
}
- if line[0] == ':' {
+ if docp.kind == lineKindAttribute {
+ var key, value string
key, value, ok = docp.parseAttribute(line, false)
if ok {
docp.doc.Attributes.apply(key, value)
}
+ line = nil
continue
}
- if state == stateBegin {
- if isTitle(line) {
- docp.doc.header.Write(bytes.TrimSpace(line[2:]))
- docp.doc.Title.raw = string(docp.doc.header.raw)
- state = stateTitle
- } else {
- docp.doc.rawAuthors = string(line)
- state = stateAuthor
- }
- continue
- }
- switch state {
- case stateTitle:
- docp.doc.rawAuthors = string(line)
- state = stateAuthor
-
- case stateAuthor:
- docp.doc.rawRevision = string(line)
- state = stateRevision
- }
+ docp.lineNum--
+ break
}
}
@@ -1572,3 +1576,26 @@ func (docp *documentParser) parseParagraph(parent, el *element, line []byte, ter
el.parseInlineMarkup(docp.doc, elKindText)
return line
}
+
+func (docp *documentParser) skipCommentAndEmptyLine() (line []byte, ok bool) {
+ var logp = `skipCommentAndEmptyLine`
+
+ for {
+ _, line, ok = docp.line(logp)
+ if !ok {
+ return nil, false
+ }
+ if docp.kind == lineKindEmpty {
+ continue
+ }
+ if docp.kind == lineKindBlockComment {
+ docp.parseIgnoreCommentBlock()
+ continue
+ }
+ if docp.kind == lineKindComment {
+ continue
+ }
+ break
+ }
+ return line, true
+}
diff --git a/document_test.go b/document_test.go
index b15781c..c4cc995 100644
--- a/document_test.go
+++ b/document_test.go
@@ -71,8 +71,9 @@ func TestParse_document_title(t *testing.T) {
expString: `a: b: c`,
}, {
// With custom separator.
- content: `:title-separator: x
-= Mainx sub`,
+ content: `
+= Mainx sub
+:title-separator: x`,
exp: DocumentTitle{
Main: `Main`,
Sub: `sub`,
diff --git a/testdata/document_title_test.txt b/testdata/document_title_test.txt
index 9856204..95d1e35 100644
--- a/testdata/document_title_test.txt
+++ b/testdata/document_title_test.txt
@@ -25,8 +25,8 @@ output_call: htmlWriteHeader
</div>
>>> With custom separator
-:title-separator: x
= Mainx sub
+:title-separator: x
<<< With custom separator
<div id="header">
diff --git a/testdata/header_with_empty_line_test.txt b/testdata/header_with_empty_line_test.txt
index c164769..30c7e7c 100644
--- a/testdata/header_with_empty_line_test.txt
+++ b/testdata/header_with_empty_line_test.txt
@@ -12,11 +12,9 @@ Below is empty line with spaces.
<<<
<div id="header">
+<h1>Title</h1>
</div>
<div id="content">
-<div class="paragraph">
-<p>= Title</p>
-</div>
</div>
<div id="footer">
<div id="footer-text">
diff --git a/testdata/test.adoc b/testdata/test.adoc
index ee99271..e5c4109 100644
--- a/testdata/test.adoc
+++ b/testdata/test.adoc
@@ -1,10 +1,10 @@
// SPDX-FileCopyrightText: 2020 M. Shulhan <ms@kilabit.info>
// SPDX-License-Identifier: GPL-3.0-or-later
= _Example `Document` **title**_
-:metadata key: value
Author A <a@a.com>; Author mid_dle B <b@b.com>
-:unclosed metadata:
v1.1.1, 18 July 2020: remark
+:metadata key: value
+:unclosed metadata:
:sectnums:
:sectlinks:
:sectanchors:
diff --git a/testdata/test.got.html b/testdata/test.got.html
index b567740..2299f90 100644
--- a/testdata/test.got.html
+++ b/testdata/test.got.html
@@ -3056,7 +3056,7 @@ this sidebar.</p>
<div id="footer">
<div id="footer-text">
1.1.1<br>
-Last updated 2024-04-04 21:22:35 +0700
+Last updated 2024-08-12 23:31:24 +0700
</div>
</div>
</body>