From 45526ab31b93debffd51e1aaad43c9acf9d61ab6 Mon Sep 17 00:00:00 2001
From: Shulhan <ms@kilabit.info>
Date: Sat, 4 Apr 2026 16:38:06 +0700
Subject: all: fix parsing literal paragraph

A line that start with one or more space will be considered
as literal paragraph.
Once literal line found it will end until an empty line.
---
 README.md                           |  5 ++-
 document_parser.go                  | 71 ++++++++++++++++++++++++++++++-------
 testdata/literal_paragraph_test.txt | 51 ++++++++++++++++++++++++++
 testdata/test.adoc                  | 24 +++++++++++--
 testdata/test.exp.html              | 46 +++++++++++++++++++++---
 testdata/test.got.html              | 46 +++++++++++++++++++++---
 6 files changed, 217 insertions(+), 26 deletions(-)
 create mode 100644 testdata/literal_paragraph_test.txt

diff --git a/README.md b/README.md
index 908244a..d05f186 100644
--- a/README.md
+++ b/README.md
@@ -118,7 +118,10 @@ Each supported feature is linked to official
   - Attributes (reference)
   - Replacements
   - Preventing Substitutions
-- Listing Blocks
+- Verbatim and Sources Block
+  - ❌ [Source Code Blocks](https://docs.asciidoctor.org/asciidoc/latest/verbatim/source-blocks/)
+  - ✅ [Listing Blocks](https://docs.asciidoctor.org/asciidoc/latest/verbatim/listing-blocks/)
+  - ✅ [Literal Blocks](https://docs.asciidoctor.org/asciidoc/latest/verbatim/literal-blocks/)
 - Passthroughs
   - Passthrough Blocks
 - Open Blocks
diff --git a/document_parser.go b/document_parser.go
index 8de91cc..18c0817 100644
--- a/document_parser.go
+++ b/document_parser.go
@@ -11,6 +11,13 @@ import (
 	"git.sr.ht/~shulhan/pakakeh.go/lib/ascii"
 )
 
+// From the [Document.parse] its all start from parseBlock.
+//
+//  -> parseBlock()
+//     -> line()
+//        -> whatKindOfLine()
+//     The parsing branched here based on docp.kind.
+
 const debugLevel = 0
 
 type documentParser struct {
@@ -133,6 +140,48 @@ func (docp *documentParser) consumeLinesUntil(el *element, term int, terms []int
 	return line
 }
 
+// consumeLiteralParagraph consumes lines that start with spaces and returns
+// the next line that does not have it.
+func (docp *documentParser) consumeLiteralParagraph(el *element,
+	origSpaces []byte, terms ...int,
+) (line []byte) {
+	logp := `consumeLiteralParagraph`
+	lenOrigSpaces := len(origSpaces)
+	var spaces []byte
+	var ok bool
+	for {
+		spaces, line, ok = docp.line(logp)
+		if !ok {
+			break
+		}
+		if bytes.HasPrefix(spaces, origSpaces) {
+			el.Write(spaces[lenOrigSpaces:])
+			el.Write(line)
+			el.WriteByte('\n')
+			continue
+		}
+		if len(spaces) != 0 {
+			// If start with spaces, remove single space only and
+			// append the rest.
+			el.Write(spaces[1:])
+			el.Write(line)
+			el.WriteByte('\n')
+			continue
+		}
+		for _, kind := range terms {
+			if kind == docp.kind {
+				goto out
+			}
+		}
+		// Keep consume lines until we found terminating conditions.
+		el.Write(line)
+		el.WriteByte('\n')
+	}
+out:
+	el.raw = applySubstitutions(docp.doc, el.raw)
+	return line
+}
+
 // hasPreamble will return true if the contents contains preamble, indicated
 // by the first section that found after current line.
 func (docp *documentParser) hasPreamble() bool {
@@ -285,12 +334,13 @@ func (docp *documentParser) parseBlock(parent *element, term int) {
 		el   = &element{}
 
 		line   []byte
+		spaces []byte
 		isTerm bool
 		ok     bool
 	)
 	for !isTerm {
 		if len(line) == 0 {
-			_, line, ok = docp.line(logp)
+			spaces, line, ok = docp.line(logp)
 			if !ok {
 				return
 			}
@@ -514,17 +564,14 @@ func (docp *documentParser) parseBlock(parent *element, term int) {
 				el.addRole(classNameLiteralBlock)
 				el.Write(line)
 				el.WriteByte('\n')
-				line = docp.consumeLinesUntil(
-					el,
-					lineKindEmpty,
-					[]int{
-						term,
-						elKindBlockListing,
-						elKindBlockListingNamed,
-						elKindBlockLiteral,
-						elKindBlockLiteralNamed,
-					})
-				el.raw = applySubstitutions(docp.doc, el.raw)
+				line = docp.consumeLiteralParagraph(el,
+					spaces, lineKindEmpty,
+					term,
+					elKindBlockListing,
+					elKindBlockListingNamed,
+					elKindBlockLiteral,
+					elKindBlockLiteralNamed,
+				)
 			}
 			parent.addChild(el)
 			el = &element{}
diff --git a/testdata/literal_paragraph_test.txt b/testdata/literal_paragraph_test.txt
new file mode 100644
index 0000000..684b859
--- /dev/null
+++ b/testdata/literal_paragraph_test.txt
@@ -0,0 +1,51 @@
+>>> WithTab
+
+A paragraph.
+
+	Literal paragraph with tab.
+		Literal paragraph with tab.
+	Literal paragraph with tab.
+
+End of section.
+
+<<< WithTab
+
+<div class="paragraph">
+<p>A paragraph.</p>
+</div>
+<div class="literalblock">
+<div class="content">
+<pre>Literal paragraph with tab.
+	Literal paragraph with tab.
+Literal paragraph with tab.</pre>
+</div>
+</div>
+<div class="paragraph">
+<p>End of section.</p>
+</div>
+
+>>> WithSpaces
+
+A paragraph.
+
+  Literal paragraph with two spaces.
+  Literal paragraph with two spaces.
+  Literal paragraph with two spaces.
+
+End of section.
+
+<<< WithSpaces
+
+<div class="paragraph">
+<p>A paragraph.</p>
+</div>
+<div class="literalblock">
+<div class="content">
+<pre>Literal paragraph with two spaces.
+Literal paragraph with two spaces.
+Literal paragraph with two spaces.</pre>
+</div>
+</div>
+<div class="paragraph">
+<p>End of section.</p>
+</div>
diff --git a/testdata/test.adoc b/testdata/test.adoc
index e7ef033..5241734 100644
--- a/testdata/test.adoc
+++ b/testdata/test.adoc
@@ -234,7 +234,7 @@ A bold with * space *, with single non alnum *=*.
 * Write my document
 
 
-== Block listing
+== Listing Blocks
 
 [listing]
 This is single paragraph listing.
@@ -247,19 +247,37 @@ This is block listing.
 ----
 
 
-== Block literal
+== Literal Blocks
+
+=== Indent method
 
  A literal paragraph followed by non-space line:
 non-space line.
 
+	Literal with tab.
+		Literal with tab.
+	Literal with tab.
+
+  Literal with two spaces.
+    Literal with two spaces.
+  Literal with two tab.
+
+  Literal with mixed tab and spaces (two spaces).
+	Literal with mixed tab and spaces (a tab).
+  	Literal with mixed tab and spaces (two spaces and a tab).
+
+=== Literal style sintax
+
 [literal]
 A literal named.
 
 [literal] x
 A literal named and trailing characters will become paragraph.
 
+=== Delimited literal block
+
 ....
-With 4 dots.
+With .... block.
 ....
 
 // This one does not work:
diff --git a/testdata/test.exp.html b/testdata/test.exp.html
index 90be8e2..dd1e453 100644
--- a/testdata/test.exp.html
+++ b/testdata/test.exp.html
@@ -481,8 +481,14 @@ body.book #toc,body.book #preamble,body.book h1.sect0,body.book .sect1>h2{page-b
 <li><a href="#title">11.1. Title</a></li>
 </ul>
 </li>
-<li><a href="#block_listing">12. Block listing</a></li>
-<li><a href="#block_literal">13. Block literal</a></li>
+<li><a href="#listing_blocks">12. Listing Blocks</a></li>
+<li><a href="#literal_blocks">13. Literal Blocks</a>
+<ul class="sectlevel2">
+<li><a href="#indent_method">13.1. Indent method</a></li>
+<li><a href="#literal_style_sintax">13.2. Literal style sintax</a></li>
+<li><a href="#delimited_literal_block">13.3. Delimited literal block</a></li>
+</ul>
+</li>
 <li><a href="#ordered_lists">14. Ordered Lists</a></li>
 <li><a href="#unordered_lists">15. Unordered Lists</a>
 <ul class="sectlevel2">
@@ -897,7 +903,7 @@ paragraph*</em>.</p>
 </div>
 </div>
 <div class="sect1">
-<h2 id="block_listing"><a class="anchor" href="#block_listing"></a><a class="link" href="#block_listing">12. Block listing</a></h2>
+<h2 id="listing_blocks"><a class="anchor" href="#listing_blocks"></a><a class="link" href="#listing_blocks">12. Listing Blocks</a></h2>
 <div class="sectionbody">
 <div class="listingblock">
 <div class="content">
@@ -916,8 +922,10 @@ This is not listing.</p>
 </div>
 </div>
 <div class="sect1">
-<h2 id="block_literal"><a class="anchor" href="#block_literal"></a><a class="link" href="#block_literal">13. Block literal</a></h2>
+<h2 id="literal_blocks"><a class="anchor" href="#literal_blocks"></a><a class="link" href="#literal_blocks">13. Literal Blocks</a></h2>
 <div class="sectionbody">
+<div class="sect2">
+<h3 id="indent_method"><a class="anchor" href="#indent_method"></a><a class="link" href="#indent_method">13.1. Indent method</a></h3>
 <div class="literalblock">
 <div class="content">
 <pre> A literal paragraph followed by non-space line:
@@ -926,6 +934,30 @@ non-space line.</pre>
 </div>
 <div class="literalblock">
 <div class="content">
+<pre>Literal with tab.
+	Literal with tab.
+Literal with tab.</pre>
+</div>
+</div>
+<div class="literalblock">
+<div class="content">
+<pre>Literal with two spaces.
+  Literal with two spaces.
+Literal with two tab.</pre>
+</div>
+</div>
+<div class="literalblock">
+<div class="content">
+<pre> Literal with mixed tab and spaces (two spaces).
+Literal with mixed tab and spaces (a tab).
+ 	Literal with mixed tab and spaces (two spaces and a tab).</pre>
+</div>
+</div>
+</div>
+<div class="sect2">
+<h3 id="literal_style_sintax"><a class="anchor" href="#literal_style_sintax"></a><a class="link" href="#literal_style_sintax">13.2. Literal style sintax</a></h3>
+<div class="literalblock">
+<div class="content">
 <pre>A literal named.</pre>
 </div>
 </div>
@@ -933,9 +965,13 @@ non-space line.</pre>
 <p>[literal] x
 A literal named and trailing characters will become paragraph.</p>
 </div>
+</div>
+<div class="sect2">
+<h3 id="delimited_literal_block"><a class="anchor" href="#delimited_literal_block"></a><a class="link" href="#delimited_literal_block">13.3. Delimited literal block</a></h3>
 <div class="literalblock">
 <div class="content">
-<pre>With 4 dots.</pre>
+<pre>With .... block.</pre>
+</div>
 </div>
 </div>
 </div>
diff --git a/testdata/test.got.html b/testdata/test.got.html
index 6e8c276..18d7609 100644
--- a/testdata/test.got.html
+++ b/testdata/test.got.html
@@ -2496,8 +2496,14 @@ p.tableblock {
 <li><a href="#title">11.1. Title</a></li>
 </ul>
 </li>
-<li><a href="#block_listing">12. Block listing</a></li>
-<li><a href="#block_literal">13. Block literal</a></li>
+<li><a href="#listing_blocks">12. Listing Blocks</a></li>
+<li><a href="#literal_blocks">13. Literal Blocks</a>
+<ul class="sectlevel2">
+<li><a href="#indent_method">13.1. Indent method</a></li>
+<li><a href="#literal_style_sintax">13.2. Literal style sintax</a></li>
+<li><a href="#delimited_literal_block">13.3. Delimited literal block</a></li>
+</ul>
+</li>
 <li><a href="#ordered_lists">14. Ordered Lists</a></li>
 <li><a href="#unordered_lists">15. Unordered Lists</a>
 <ul class="sectlevel2">
@@ -2910,7 +2916,7 @@ paragraph</strong></em>.</p>
 </div>
 </div>
 <div class="sect1">
-<h2 id="block_listing"><a class="link" href="#block_listing">12. Block listing</a><a class="anchor" href="#block_listing" aria-label="Anchor for block_listing"></a></h2>
+<h2 id="listing_blocks"><a class="link" href="#listing_blocks">12. Listing Blocks</a><a class="anchor" href="#listing_blocks" aria-label="Anchor for listing_blocks"></a></h2>
 <div class="sectionbody">
 <div class="listingblock">
 <div class="content">
@@ -2929,8 +2935,10 @@ This is not listing.</p>
 </div>
 </div>
 <div class="sect1">
-<h2 id="block_literal"><a class="link" href="#block_literal">13. Block literal</a><a class="anchor" href="#block_literal" aria-label="Anchor for block_literal"></a></h2>
+<h2 id="literal_blocks"><a class="link" href="#literal_blocks">13. Literal Blocks</a><a class="anchor" href="#literal_blocks" aria-label="Anchor for literal_blocks"></a></h2>
 <div class="sectionbody">
+<div class="sect2">
+<h3 id="indent_method"><a class="link" href="#indent_method">13.1. Indent method</a><a class="anchor" href="#indent_method" aria-label="Anchor for indent_method"></a></h3>
 <div class="literalblock">
 <div class="content">
 <pre>A literal paragraph followed by non-space line:
@@ -2939,6 +2947,30 @@ non-space line.</pre>
 </div>
 <div class="literalblock">
 <div class="content">
+<pre>Literal with tab.
+	Literal with tab.
+Literal with tab.</pre>
+</div>
+</div>
+<div class="literalblock">
+<div class="content">
+<pre>Literal with two spaces.
+  Literal with two spaces.
+Literal with two tab.</pre>
+</div>
+</div>
+<div class="literalblock">
+<div class="content">
+<pre>Literal with mixed tab and spaces (two spaces).
+Literal with mixed tab and spaces (a tab).
+	Literal with mixed tab and spaces (two spaces and a tab).</pre>
+</div>
+</div>
+</div>
+<div class="sect2">
+<h3 id="literal_style_sintax"><a class="link" href="#literal_style_sintax">13.2. Literal style sintax</a><a class="anchor" href="#literal_style_sintax" aria-label="Anchor for literal_style_sintax"></a></h3>
+<div class="literalblock">
+<div class="content">
 <pre>A literal named.</pre>
 </div>
 </div>
@@ -2946,9 +2978,13 @@ non-space line.</pre>
 <p>[literal] x
 A literal named and trailing characters will become paragraph.</p>
 </div>
+</div>
+<div class="sect2">
+<h3 id="delimited_literal_block"><a class="link" href="#delimited_literal_block">13.3. Delimited literal block</a><a class="anchor" href="#delimited_literal_block" aria-label="Anchor for delimited_literal_block"></a></h3>
 <div class="literalblock">
 <div class="content">
-<pre>With 4 dots.</pre>
+<pre>With .... block.</pre>
+</div>
 </div>
 </div>
 </div>
-- 
cgit v1.3