From 45526ab31b93debffd51e1aaad43c9acf9d61ab6 Mon Sep 17 00:00:00 2001 From: Shulhan Date: Sat, 4 Apr 2026 16:38:06 +0700 Subject: all: fix parsing literal paragraph A line that start with one or more space will be considered as literal paragraph. Once literal line found it will end until an empty line. --- README.md | 5 ++- document_parser.go | 71 ++++++++++++++++++++++++++++++------- testdata/literal_paragraph_test.txt | 51 ++++++++++++++++++++++++++ testdata/test.adoc | 24 +++++++++++-- testdata/test.exp.html | 46 +++++++++++++++++++++--- testdata/test.got.html | 46 +++++++++++++++++++++--- 6 files changed, 217 insertions(+), 26 deletions(-) create mode 100644 testdata/literal_paragraph_test.txt diff --git a/README.md b/README.md index 908244a..d05f186 100644 --- a/README.md +++ b/README.md @@ -118,7 +118,10 @@ Each supported feature is linked to official - Attributes (reference) - Replacements - Preventing Substitutions -- Listing Blocks +- Verbatim and Sources Block + - ❌ [Source Code Blocks](https://docs.asciidoctor.org/asciidoc/latest/verbatim/source-blocks/) + - ✅ [Listing Blocks](https://docs.asciidoctor.org/asciidoc/latest/verbatim/listing-blocks/) + - ✅ [Literal Blocks](https://docs.asciidoctor.org/asciidoc/latest/verbatim/literal-blocks/) - Passthroughs - Passthrough Blocks - Open Blocks diff --git a/document_parser.go b/document_parser.go index 8de91cc..18c0817 100644 --- a/document_parser.go +++ b/document_parser.go @@ -11,6 +11,13 @@ import ( "git.sr.ht/~shulhan/pakakeh.go/lib/ascii" ) +// From the [Document.parse] its all start from parseBlock. +// +// -> parseBlock() +// -> line() +// -> whatKindOfLine() +// The parsing branched here based on docp.kind. + const debugLevel = 0 type documentParser struct { @@ -133,6 +140,48 @@ func (docp *documentParser) consumeLinesUntil(el *element, term int, terms []int return line } +// consumeLiteralParagraph consumes lines that start with spaces and returns +// the next line that does not have it. +func (docp *documentParser) consumeLiteralParagraph(el *element, + origSpaces []byte, terms ...int, +) (line []byte) { + logp := `consumeLiteralParagraph` + lenOrigSpaces := len(origSpaces) + var spaces []byte + var ok bool + for { + spaces, line, ok = docp.line(logp) + if !ok { + break + } + if bytes.HasPrefix(spaces, origSpaces) { + el.Write(spaces[lenOrigSpaces:]) + el.Write(line) + el.WriteByte('\n') + continue + } + if len(spaces) != 0 { + // If start with spaces, remove single space only and + // append the rest. + el.Write(spaces[1:]) + el.Write(line) + el.WriteByte('\n') + continue + } + for _, kind := range terms { + if kind == docp.kind { + goto out + } + } + // Keep consume lines until we found terminating conditions. + el.Write(line) + el.WriteByte('\n') + } +out: + el.raw = applySubstitutions(docp.doc, el.raw) + return line +} + // hasPreamble will return true if the contents contains preamble, indicated // by the first section that found after current line. func (docp *documentParser) hasPreamble() bool { @@ -285,12 +334,13 @@ func (docp *documentParser) parseBlock(parent *element, term int) { el = &element{} line []byte + spaces []byte isTerm bool ok bool ) for !isTerm { if len(line) == 0 { - _, line, ok = docp.line(logp) + spaces, line, ok = docp.line(logp) if !ok { return } @@ -514,17 +564,14 @@ func (docp *documentParser) parseBlock(parent *element, term int) { el.addRole(classNameLiteralBlock) el.Write(line) el.WriteByte('\n') - line = docp.consumeLinesUntil( - el, - lineKindEmpty, - []int{ - term, - elKindBlockListing, - elKindBlockListingNamed, - elKindBlockLiteral, - elKindBlockLiteralNamed, - }) - el.raw = applySubstitutions(docp.doc, el.raw) + line = docp.consumeLiteralParagraph(el, + spaces, lineKindEmpty, + term, + elKindBlockListing, + elKindBlockListingNamed, + elKindBlockLiteral, + elKindBlockLiteralNamed, + ) } parent.addChild(el) el = &element{} diff --git a/testdata/literal_paragraph_test.txt b/testdata/literal_paragraph_test.txt new file mode 100644 index 0000000..684b859 --- /dev/null +++ b/testdata/literal_paragraph_test.txt @@ -0,0 +1,51 @@ +>>> WithTab + +A paragraph. + + Literal paragraph with tab. + Literal paragraph with tab. + Literal paragraph with tab. + +End of section. + +<<< WithTab + +
+

A paragraph.

+
+
+
+
Literal paragraph with tab.
+	Literal paragraph with tab.
+Literal paragraph with tab.
+
+
+
+

End of section.

+
+ +>>> WithSpaces + +A paragraph. + + Literal paragraph with two spaces. + Literal paragraph with two spaces. + Literal paragraph with two spaces. + +End of section. + +<<< WithSpaces + +
+

A paragraph.

+
+
+
+
Literal paragraph with two spaces.
+Literal paragraph with two spaces.
+Literal paragraph with two spaces.
+
+
+
+

End of section.

+
diff --git a/testdata/test.adoc b/testdata/test.adoc index e7ef033..5241734 100644 --- a/testdata/test.adoc +++ b/testdata/test.adoc @@ -234,7 +234,7 @@ A bold with * space *, with single non alnum *=*. * Write my document -== Block listing +== Listing Blocks [listing] This is single paragraph listing. @@ -247,19 +247,37 @@ This is block listing. ---- -== Block literal +== Literal Blocks + +=== Indent method A literal paragraph followed by non-space line: non-space line. + Literal with tab. + Literal with tab. + Literal with tab. + + Literal with two spaces. + Literal with two spaces. + Literal with two tab. + + Literal with mixed tab and spaces (two spaces). + Literal with mixed tab and spaces (a tab). + Literal with mixed tab and spaces (two spaces and a tab). + +=== Literal style sintax + [literal] A literal named. [literal] x A literal named and trailing characters will become paragraph. +=== Delimited literal block + .... -With 4 dots. +With .... block. .... // This one does not work: diff --git a/testdata/test.exp.html b/testdata/test.exp.html index 90be8e2..dd1e453 100644 --- a/testdata/test.exp.html +++ b/testdata/test.exp.html @@ -481,8 +481,14 @@ body.book #toc,body.book #preamble,body.book h1.sect0,body.book .sect1>h2{page-b
  • 11.1. Title
  • -
  • 12. Block listing
  • -
  • 13. Block literal
  • +
  • 12. Listing Blocks
  • +
  • 13. Literal Blocks + +
  • 14. Ordered Lists
  • 15. Unordered Lists
  • -
  • 12. Block listing
  • -
  • 13. Block literal
  • +
  • 12. Listing Blocks
  • +
  • 13. Literal Blocks + +
  • 14. Ordered Lists
  • 15. Unordered Lists