From 45526ab31b93debffd51e1aaad43c9acf9d61ab6 Mon Sep 17 00:00:00 2001
From: Shulhan
Date: Sat, 4 Apr 2026 16:38:06 +0700
Subject: all: fix parsing literal paragraph
A line that start with one or more space will be considered
as literal paragraph.
Once literal line found it will end until an empty line.
---
README.md | 5 ++-
document_parser.go | 71 ++++++++++++++++++++++++++++++-------
testdata/literal_paragraph_test.txt | 51 ++++++++++++++++++++++++++
testdata/test.adoc | 24 +++++++++++--
testdata/test.exp.html | 46 +++++++++++++++++++++---
testdata/test.got.html | 46 +++++++++++++++++++++---
6 files changed, 217 insertions(+), 26 deletions(-)
create mode 100644 testdata/literal_paragraph_test.txt
diff --git a/README.md b/README.md
index 908244a..d05f186 100644
--- a/README.md
+++ b/README.md
@@ -118,7 +118,10 @@ Each supported feature is linked to official
- Attributes (reference)
- Replacements
- Preventing Substitutions
-- Listing Blocks
+- Verbatim and Sources Block
+ - ❌ [Source Code Blocks](https://docs.asciidoctor.org/asciidoc/latest/verbatim/source-blocks/)
+ - ✅ [Listing Blocks](https://docs.asciidoctor.org/asciidoc/latest/verbatim/listing-blocks/)
+ - ✅ [Literal Blocks](https://docs.asciidoctor.org/asciidoc/latest/verbatim/literal-blocks/)
- Passthroughs
- Passthrough Blocks
- Open Blocks
diff --git a/document_parser.go b/document_parser.go
index 8de91cc..18c0817 100644
--- a/document_parser.go
+++ b/document_parser.go
@@ -11,6 +11,13 @@ import (
"git.sr.ht/~shulhan/pakakeh.go/lib/ascii"
)
+// From the [Document.parse] its all start from parseBlock.
+//
+// -> parseBlock()
+// -> line()
+// -> whatKindOfLine()
+// The parsing branched here based on docp.kind.
+
const debugLevel = 0
type documentParser struct {
@@ -133,6 +140,48 @@ func (docp *documentParser) consumeLinesUntil(el *element, term int, terms []int
return line
}
+// consumeLiteralParagraph consumes lines that start with spaces and returns
+// the next line that does not have it.
+func (docp *documentParser) consumeLiteralParagraph(el *element,
+ origSpaces []byte, terms ...int,
+) (line []byte) {
+ logp := `consumeLiteralParagraph`
+ lenOrigSpaces := len(origSpaces)
+ var spaces []byte
+ var ok bool
+ for {
+ spaces, line, ok = docp.line(logp)
+ if !ok {
+ break
+ }
+ if bytes.HasPrefix(spaces, origSpaces) {
+ el.Write(spaces[lenOrigSpaces:])
+ el.Write(line)
+ el.WriteByte('\n')
+ continue
+ }
+ if len(spaces) != 0 {
+ // If start with spaces, remove single space only and
+ // append the rest.
+ el.Write(spaces[1:])
+ el.Write(line)
+ el.WriteByte('\n')
+ continue
+ }
+ for _, kind := range terms {
+ if kind == docp.kind {
+ goto out
+ }
+ }
+ // Keep consume lines until we found terminating conditions.
+ el.Write(line)
+ el.WriteByte('\n')
+ }
+out:
+ el.raw = applySubstitutions(docp.doc, el.raw)
+ return line
+}
+
// hasPreamble will return true if the contents contains preamble, indicated
// by the first section that found after current line.
func (docp *documentParser) hasPreamble() bool {
@@ -285,12 +334,13 @@ func (docp *documentParser) parseBlock(parent *element, term int) {
el = &element{}
line []byte
+ spaces []byte
isTerm bool
ok bool
)
for !isTerm {
if len(line) == 0 {
- _, line, ok = docp.line(logp)
+ spaces, line, ok = docp.line(logp)
if !ok {
return
}
@@ -514,17 +564,14 @@ func (docp *documentParser) parseBlock(parent *element, term int) {
el.addRole(classNameLiteralBlock)
el.Write(line)
el.WriteByte('\n')
- line = docp.consumeLinesUntil(
- el,
- lineKindEmpty,
- []int{
- term,
- elKindBlockListing,
- elKindBlockListingNamed,
- elKindBlockLiteral,
- elKindBlockLiteralNamed,
- })
- el.raw = applySubstitutions(docp.doc, el.raw)
+ line = docp.consumeLiteralParagraph(el,
+ spaces, lineKindEmpty,
+ term,
+ elKindBlockListing,
+ elKindBlockListingNamed,
+ elKindBlockLiteral,
+ elKindBlockLiteralNamed,
+ )
}
parent.addChild(el)
el = &element{}
diff --git a/testdata/literal_paragraph_test.txt b/testdata/literal_paragraph_test.txt
new file mode 100644
index 0000000..684b859
--- /dev/null
+++ b/testdata/literal_paragraph_test.txt
@@ -0,0 +1,51 @@
+>>> WithTab
+
+A paragraph.
+
+ Literal paragraph with tab.
+ Literal paragraph with tab.
+ Literal paragraph with tab.
+
+End of section.
+
+<<< WithTab
+
+
+
+
+
Literal paragraph with tab.
+ Literal paragraph with tab.
+Literal paragraph with tab.
+
+
+
+
+>>> WithSpaces
+
+A paragraph.
+
+ Literal paragraph with two spaces.
+ Literal paragraph with two spaces.
+ Literal paragraph with two spaces.
+
+End of section.
+
+<<< WithSpaces
+
+
+
+
+
Literal paragraph with two spaces.
+Literal paragraph with two spaces.
+Literal paragraph with two spaces.
+
+
+
diff --git a/testdata/test.adoc b/testdata/test.adoc
index e7ef033..5241734 100644
--- a/testdata/test.adoc
+++ b/testdata/test.adoc
@@ -234,7 +234,7 @@ A bold with * space *, with single non alnum *=*.
* Write my document
-== Block listing
+== Listing Blocks
[listing]
This is single paragraph listing.
@@ -247,19 +247,37 @@ This is block listing.
----
-== Block literal
+== Literal Blocks
+
+=== Indent method
A literal paragraph followed by non-space line:
non-space line.
+ Literal with tab.
+ Literal with tab.
+ Literal with tab.
+
+ Literal with two spaces.
+ Literal with two spaces.
+ Literal with two tab.
+
+ Literal with mixed tab and spaces (two spaces).
+ Literal with mixed tab and spaces (a tab).
+ Literal with mixed tab and spaces (two spaces and a tab).
+
+=== Literal style sintax
+
[literal]
A literal named.
[literal] x
A literal named and trailing characters will become paragraph.
+=== Delimited literal block
+
....
-With 4 dots.
+With .... block.
....
// This one does not work:
diff --git a/testdata/test.exp.html b/testdata/test.exp.html
index 90be8e2..dd1e453 100644
--- a/testdata/test.exp.html
+++ b/testdata/test.exp.html
@@ -481,8 +481,14 @@ body.book #toc,body.book #preamble,body.book h1.sect0,body.book .sect1>h2{page-b
11.1. Title
-12. Block listing
-13. Block literal
+12. Listing Blocks
+13. Literal Blocks
+
+
14. Ordered Lists
15. Unordered Lists
@@ -897,7 +903,7 @@ paragraph*.
-
+
@@ -916,8 +922,10 @@ This is not listing.
-
+
+
+
A literal paragraph followed by non-space line:
@@ -926,6 +934,30 @@ non-space line.
+
Literal with tab.
+ Literal with tab.
+Literal with tab.
+
+
+
+
+
Literal with two spaces.
+ Literal with two spaces.
+Literal with two tab.
+
+
+
+
+
Literal with mixed tab and spaces (two spaces).
+Literal with mixed tab and spaces (a tab).
+ Literal with mixed tab and spaces (two spaces and a tab).
+
+
+
+
+
+
@@ -933,9 +965,13 @@ non-space line.
[literal] x
A literal named and trailing characters will become paragraph.
+
+
+
-
With 4 dots.
+
With .... block.
+
diff --git a/testdata/test.got.html b/testdata/test.got.html
index 6e8c276..18d7609 100644
--- a/testdata/test.got.html
+++ b/testdata/test.got.html
@@ -2496,8 +2496,14 @@ p.tableblock {
11.1. Title
-
12. Block listing
-
13. Block literal
+
12. Listing Blocks
+
13. Literal Blocks
+
+
14. Ordered Lists
15. Unordered Lists
@@ -2910,7 +2916,7 @@ paragraph.
-
+
@@ -2929,8 +2935,10 @@ This is not listing.
-
+
+
+
A literal paragraph followed by non-space line:
@@ -2939,6 +2947,30 @@ non-space line.
+
Literal with tab.
+ Literal with tab.
+Literal with tab.
+
+
+
+
+
Literal with two spaces.
+ Literal with two spaces.
+Literal with two tab.
+
+
+
+
+
Literal with mixed tab and spaces (two spaces).
+Literal with mixed tab and spaces (a tab).
+ Literal with mixed tab and spaces (two spaces and a tab).
+
+
+
+
+
+
@@ -2946,9 +2978,13 @@ non-space line.
[literal] x
A literal named and trailing characters will become paragraph.
+
+
+
-
With 4 dots.
+
With .... block.
+
--
cgit v1.3