aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorShulhan <ms@kilabit.info>2026-04-04 16:38:06 +0700
committerShulhan <ms@kilabit.info>2026-04-04 16:38:06 +0700
commit45526ab31b93debffd51e1aaad43c9acf9d61ab6 (patch)
treea23ccc730093cf8d251c1d6e43a3c9679f42112c
parenta9e626c6306697a64af87b1b7fa4f61b0580d5a8 (diff)
downloadasciidoctor-go-45526ab31b93debffd51e1aaad43c9acf9d61ab6.tar.xz
all: fix parsing literal paragraph
A line that start with one or more space will be considered as literal paragraph. Once literal line found it will end until an empty line.
-rw-r--r--README.md5
-rw-r--r--document_parser.go71
-rw-r--r--testdata/literal_paragraph_test.txt51
-rw-r--r--testdata/test.adoc24
-rw-r--r--testdata/test.exp.html46
-rw-r--r--testdata/test.got.html46
6 files changed, 217 insertions, 26 deletions
diff --git a/README.md b/README.md
index 908244a..d05f186 100644
--- a/README.md
+++ b/README.md
@@ -118,7 +118,10 @@ Each supported feature is linked to official
- Attributes (reference)
- Replacements
- Preventing Substitutions
-- Listing Blocks
+- Verbatim and Sources Block
+ - ❌ [Source Code Blocks](https://docs.asciidoctor.org/asciidoc/latest/verbatim/source-blocks/)
+ - ✅ [Listing Blocks](https://docs.asciidoctor.org/asciidoc/latest/verbatim/listing-blocks/)
+ - ✅ [Literal Blocks](https://docs.asciidoctor.org/asciidoc/latest/verbatim/literal-blocks/)
- Passthroughs
- Passthrough Blocks
- Open Blocks
diff --git a/document_parser.go b/document_parser.go
index 8de91cc..18c0817 100644
--- a/document_parser.go
+++ b/document_parser.go
@@ -11,6 +11,13 @@ import (
"git.sr.ht/~shulhan/pakakeh.go/lib/ascii"
)
+// From the [Document.parse] its all start from parseBlock.
+//
+// -> parseBlock()
+// -> line()
+// -> whatKindOfLine()
+// The parsing branched here based on docp.kind.
+
const debugLevel = 0
type documentParser struct {
@@ -133,6 +140,48 @@ func (docp *documentParser) consumeLinesUntil(el *element, term int, terms []int
return line
}
+// consumeLiteralParagraph consumes lines that start with spaces and returns
+// the next line that does not have it.
+func (docp *documentParser) consumeLiteralParagraph(el *element,
+ origSpaces []byte, terms ...int,
+) (line []byte) {
+ logp := `consumeLiteralParagraph`
+ lenOrigSpaces := len(origSpaces)
+ var spaces []byte
+ var ok bool
+ for {
+ spaces, line, ok = docp.line(logp)
+ if !ok {
+ break
+ }
+ if bytes.HasPrefix(spaces, origSpaces) {
+ el.Write(spaces[lenOrigSpaces:])
+ el.Write(line)
+ el.WriteByte('\n')
+ continue
+ }
+ if len(spaces) != 0 {
+ // If start with spaces, remove single space only and
+ // append the rest.
+ el.Write(spaces[1:])
+ el.Write(line)
+ el.WriteByte('\n')
+ continue
+ }
+ for _, kind := range terms {
+ if kind == docp.kind {
+ goto out
+ }
+ }
+ // Keep consume lines until we found terminating conditions.
+ el.Write(line)
+ el.WriteByte('\n')
+ }
+out:
+ el.raw = applySubstitutions(docp.doc, el.raw)
+ return line
+}
+
// hasPreamble will return true if the contents contains preamble, indicated
// by the first section that found after current line.
func (docp *documentParser) hasPreamble() bool {
@@ -285,12 +334,13 @@ func (docp *documentParser) parseBlock(parent *element, term int) {
el = &element{}
line []byte
+ spaces []byte
isTerm bool
ok bool
)
for !isTerm {
if len(line) == 0 {
- _, line, ok = docp.line(logp)
+ spaces, line, ok = docp.line(logp)
if !ok {
return
}
@@ -514,17 +564,14 @@ func (docp *documentParser) parseBlock(parent *element, term int) {
el.addRole(classNameLiteralBlock)
el.Write(line)
el.WriteByte('\n')
- line = docp.consumeLinesUntil(
- el,
- lineKindEmpty,
- []int{
- term,
- elKindBlockListing,
- elKindBlockListingNamed,
- elKindBlockLiteral,
- elKindBlockLiteralNamed,
- })
- el.raw = applySubstitutions(docp.doc, el.raw)
+ line = docp.consumeLiteralParagraph(el,
+ spaces, lineKindEmpty,
+ term,
+ elKindBlockListing,
+ elKindBlockListingNamed,
+ elKindBlockLiteral,
+ elKindBlockLiteralNamed,
+ )
}
parent.addChild(el)
el = &element{}
diff --git a/testdata/literal_paragraph_test.txt b/testdata/literal_paragraph_test.txt
new file mode 100644
index 0000000..684b859
--- /dev/null
+++ b/testdata/literal_paragraph_test.txt
@@ -0,0 +1,51 @@
+>>> WithTab
+
+A paragraph.
+
+ Literal paragraph with tab.
+ Literal paragraph with tab.
+ Literal paragraph with tab.
+
+End of section.
+
+<<< WithTab
+
+<div class="paragraph">
+<p>A paragraph.</p>
+</div>
+<div class="literalblock">
+<div class="content">
+<pre>Literal paragraph with tab.
+ Literal paragraph with tab.
+Literal paragraph with tab.</pre>
+</div>
+</div>
+<div class="paragraph">
+<p>End of section.</p>
+</div>
+
+>>> WithSpaces
+
+A paragraph.
+
+ Literal paragraph with two spaces.
+ Literal paragraph with two spaces.
+ Literal paragraph with two spaces.
+
+End of section.
+
+<<< WithSpaces
+
+<div class="paragraph">
+<p>A paragraph.</p>
+</div>
+<div class="literalblock">
+<div class="content">
+<pre>Literal paragraph with two spaces.
+Literal paragraph with two spaces.
+Literal paragraph with two spaces.</pre>
+</div>
+</div>
+<div class="paragraph">
+<p>End of section.</p>
+</div>
diff --git a/testdata/test.adoc b/testdata/test.adoc
index e7ef033..5241734 100644
--- a/testdata/test.adoc
+++ b/testdata/test.adoc
@@ -234,7 +234,7 @@ A bold with * space *, with single non alnum *=*.
* Write my document
-== Block listing
+== Listing Blocks
[listing]
This is single paragraph listing.
@@ -247,19 +247,37 @@ This is block listing.
----
-== Block literal
+== Literal Blocks
+
+=== Indent method
A literal paragraph followed by non-space line:
non-space line.
+ Literal with tab.
+ Literal with tab.
+ Literal with tab.
+
+ Literal with two spaces.
+ Literal with two spaces.
+ Literal with two tab.
+
+ Literal with mixed tab and spaces (two spaces).
+ Literal with mixed tab and spaces (a tab).
+ Literal with mixed tab and spaces (two spaces and a tab).
+
+=== Literal style sintax
+
[literal]
A literal named.
[literal] x
A literal named and trailing characters will become paragraph.
+=== Delimited literal block
+
....
-With 4 dots.
+With .... block.
....
// This one does not work:
diff --git a/testdata/test.exp.html b/testdata/test.exp.html
index 90be8e2..dd1e453 100644
--- a/testdata/test.exp.html
+++ b/testdata/test.exp.html
@@ -481,8 +481,14 @@ body.book #toc,body.book #preamble,body.book h1.sect0,body.book .sect1>h2{page-b
<li><a href="#title">11.1. Title</a></li>
</ul>
</li>
-<li><a href="#block_listing">12. Block listing</a></li>
-<li><a href="#block_literal">13. Block literal</a></li>
+<li><a href="#listing_blocks">12. Listing Blocks</a></li>
+<li><a href="#literal_blocks">13. Literal Blocks</a>
+<ul class="sectlevel2">
+<li><a href="#indent_method">13.1. Indent method</a></li>
+<li><a href="#literal_style_sintax">13.2. Literal style sintax</a></li>
+<li><a href="#delimited_literal_block">13.3. Delimited literal block</a></li>
+</ul>
+</li>
<li><a href="#ordered_lists">14. Ordered Lists</a></li>
<li><a href="#unordered_lists">15. Unordered Lists</a>
<ul class="sectlevel2">
@@ -897,7 +903,7 @@ paragraph*</em>.</p>
</div>
</div>
<div class="sect1">
-<h2 id="block_listing"><a class="anchor" href="#block_listing"></a><a class="link" href="#block_listing">12. Block listing</a></h2>
+<h2 id="listing_blocks"><a class="anchor" href="#listing_blocks"></a><a class="link" href="#listing_blocks">12. Listing Blocks</a></h2>
<div class="sectionbody">
<div class="listingblock">
<div class="content">
@@ -916,8 +922,10 @@ This is not listing.</p>
</div>
</div>
<div class="sect1">
-<h2 id="block_literal"><a class="anchor" href="#block_literal"></a><a class="link" href="#block_literal">13. Block literal</a></h2>
+<h2 id="literal_blocks"><a class="anchor" href="#literal_blocks"></a><a class="link" href="#literal_blocks">13. Literal Blocks</a></h2>
<div class="sectionbody">
+<div class="sect2">
+<h3 id="indent_method"><a class="anchor" href="#indent_method"></a><a class="link" href="#indent_method">13.1. Indent method</a></h3>
<div class="literalblock">
<div class="content">
<pre> A literal paragraph followed by non-space line:
@@ -926,6 +934,30 @@ non-space line.</pre>
</div>
<div class="literalblock">
<div class="content">
+<pre>Literal with tab.
+ Literal with tab.
+Literal with tab.</pre>
+</div>
+</div>
+<div class="literalblock">
+<div class="content">
+<pre>Literal with two spaces.
+ Literal with two spaces.
+Literal with two tab.</pre>
+</div>
+</div>
+<div class="literalblock">
+<div class="content">
+<pre> Literal with mixed tab and spaces (two spaces).
+Literal with mixed tab and spaces (a tab).
+ Literal with mixed tab and spaces (two spaces and a tab).</pre>
+</div>
+</div>
+</div>
+<div class="sect2">
+<h3 id="literal_style_sintax"><a class="anchor" href="#literal_style_sintax"></a><a class="link" href="#literal_style_sintax">13.2. Literal style sintax</a></h3>
+<div class="literalblock">
+<div class="content">
<pre>A literal named.</pre>
</div>
</div>
@@ -933,9 +965,13 @@ non-space line.</pre>
<p>[literal] x
A literal named and trailing characters will become paragraph.</p>
</div>
+</div>
+<div class="sect2">
+<h3 id="delimited_literal_block"><a class="anchor" href="#delimited_literal_block"></a><a class="link" href="#delimited_literal_block">13.3. Delimited literal block</a></h3>
<div class="literalblock">
<div class="content">
-<pre>With 4 dots.</pre>
+<pre>With .... block.</pre>
+</div>
</div>
</div>
</div>
diff --git a/testdata/test.got.html b/testdata/test.got.html
index 6e8c276..18d7609 100644
--- a/testdata/test.got.html
+++ b/testdata/test.got.html
@@ -2496,8 +2496,14 @@ p.tableblock {
<li><a href="#title">11.1. Title</a></li>
</ul>
</li>
-<li><a href="#block_listing">12. Block listing</a></li>
-<li><a href="#block_literal">13. Block literal</a></li>
+<li><a href="#listing_blocks">12. Listing Blocks</a></li>
+<li><a href="#literal_blocks">13. Literal Blocks</a>
+<ul class="sectlevel2">
+<li><a href="#indent_method">13.1. Indent method</a></li>
+<li><a href="#literal_style_sintax">13.2. Literal style sintax</a></li>
+<li><a href="#delimited_literal_block">13.3. Delimited literal block</a></li>
+</ul>
+</li>
<li><a href="#ordered_lists">14. Ordered Lists</a></li>
<li><a href="#unordered_lists">15. Unordered Lists</a>
<ul class="sectlevel2">
@@ -2910,7 +2916,7 @@ paragraph</strong></em>.</p>
</div>
</div>
<div class="sect1">
-<h2 id="block_listing"><a class="link" href="#block_listing">12. Block listing</a><a class="anchor" href="#block_listing" aria-label="Anchor for block_listing"></a></h2>
+<h2 id="listing_blocks"><a class="link" href="#listing_blocks">12. Listing Blocks</a><a class="anchor" href="#listing_blocks" aria-label="Anchor for listing_blocks"></a></h2>
<div class="sectionbody">
<div class="listingblock">
<div class="content">
@@ -2929,8 +2935,10 @@ This is not listing.</p>
</div>
</div>
<div class="sect1">
-<h2 id="block_literal"><a class="link" href="#block_literal">13. Block literal</a><a class="anchor" href="#block_literal" aria-label="Anchor for block_literal"></a></h2>
+<h2 id="literal_blocks"><a class="link" href="#literal_blocks">13. Literal Blocks</a><a class="anchor" href="#literal_blocks" aria-label="Anchor for literal_blocks"></a></h2>
<div class="sectionbody">
+<div class="sect2">
+<h3 id="indent_method"><a class="link" href="#indent_method">13.1. Indent method</a><a class="anchor" href="#indent_method" aria-label="Anchor for indent_method"></a></h3>
<div class="literalblock">
<div class="content">
<pre>A literal paragraph followed by non-space line:
@@ -2939,6 +2947,30 @@ non-space line.</pre>
</div>
<div class="literalblock">
<div class="content">
+<pre>Literal with tab.
+ Literal with tab.
+Literal with tab.</pre>
+</div>
+</div>
+<div class="literalblock">
+<div class="content">
+<pre>Literal with two spaces.
+ Literal with two spaces.
+Literal with two tab.</pre>
+</div>
+</div>
+<div class="literalblock">
+<div class="content">
+<pre>Literal with mixed tab and spaces (two spaces).
+Literal with mixed tab and spaces (a tab).
+ Literal with mixed tab and spaces (two spaces and a tab).</pre>
+</div>
+</div>
+</div>
+<div class="sect2">
+<h3 id="literal_style_sintax"><a class="link" href="#literal_style_sintax">13.2. Literal style sintax</a><a class="anchor" href="#literal_style_sintax" aria-label="Anchor for literal_style_sintax"></a></h3>
+<div class="literalblock">
+<div class="content">
<pre>A literal named.</pre>
</div>
</div>
@@ -2946,9 +2978,13 @@ non-space line.</pre>
<p>[literal] x
A literal named and trailing characters will become paragraph.</p>
</div>
+</div>
+<div class="sect2">
+<h3 id="delimited_literal_block"><a class="link" href="#delimited_literal_block">13.3. Delimited literal block</a><a class="anchor" href="#delimited_literal_block" aria-label="Anchor for delimited_literal_block"></a></h3>
<div class="literalblock">
<div class="content">
-<pre>With 4 dots.</pre>
+<pre>With .... block.</pre>
+</div>
</div>
</div>
</div>