diff options
| author | Shulhan <ms@kilabit.info> | 2026-04-04 16:38:06 +0700 |
|---|---|---|
| committer | Shulhan <ms@kilabit.info> | 2026-04-04 16:38:06 +0700 |
| commit | 45526ab31b93debffd51e1aaad43c9acf9d61ab6 (patch) | |
| tree | a23ccc730093cf8d251c1d6e43a3c9679f42112c | |
| parent | a9e626c6306697a64af87b1b7fa4f61b0580d5a8 (diff) | |
| download | asciidoctor-go-45526ab31b93debffd51e1aaad43c9acf9d61ab6.tar.xz | |
all: fix parsing literal paragraph
A line that start with one or more space will be considered
as literal paragraph.
Once literal line found it will end until an empty line.
| -rw-r--r-- | README.md | 5 | ||||
| -rw-r--r-- | document_parser.go | 71 | ||||
| -rw-r--r-- | testdata/literal_paragraph_test.txt | 51 | ||||
| -rw-r--r-- | testdata/test.adoc | 24 | ||||
| -rw-r--r-- | testdata/test.exp.html | 46 | ||||
| -rw-r--r-- | testdata/test.got.html | 46 |
6 files changed, 217 insertions, 26 deletions
@@ -118,7 +118,10 @@ Each supported feature is linked to official - Attributes (reference) - Replacements - Preventing Substitutions -- Listing Blocks +- Verbatim and Sources Block + - ❌ [Source Code Blocks](https://docs.asciidoctor.org/asciidoc/latest/verbatim/source-blocks/) + - ✅ [Listing Blocks](https://docs.asciidoctor.org/asciidoc/latest/verbatim/listing-blocks/) + - ✅ [Literal Blocks](https://docs.asciidoctor.org/asciidoc/latest/verbatim/literal-blocks/) - Passthroughs - Passthrough Blocks - Open Blocks diff --git a/document_parser.go b/document_parser.go index 8de91cc..18c0817 100644 --- a/document_parser.go +++ b/document_parser.go @@ -11,6 +11,13 @@ import ( "git.sr.ht/~shulhan/pakakeh.go/lib/ascii" ) +// From the [Document.parse] its all start from parseBlock. +// +// -> parseBlock() +// -> line() +// -> whatKindOfLine() +// The parsing branched here based on docp.kind. + const debugLevel = 0 type documentParser struct { @@ -133,6 +140,48 @@ func (docp *documentParser) consumeLinesUntil(el *element, term int, terms []int return line } +// consumeLiteralParagraph consumes lines that start with spaces and returns +// the next line that does not have it. +func (docp *documentParser) consumeLiteralParagraph(el *element, + origSpaces []byte, terms ...int, +) (line []byte) { + logp := `consumeLiteralParagraph` + lenOrigSpaces := len(origSpaces) + var spaces []byte + var ok bool + for { + spaces, line, ok = docp.line(logp) + if !ok { + break + } + if bytes.HasPrefix(spaces, origSpaces) { + el.Write(spaces[lenOrigSpaces:]) + el.Write(line) + el.WriteByte('\n') + continue + } + if len(spaces) != 0 { + // If start with spaces, remove single space only and + // append the rest. + el.Write(spaces[1:]) + el.Write(line) + el.WriteByte('\n') + continue + } + for _, kind := range terms { + if kind == docp.kind { + goto out + } + } + // Keep consume lines until we found terminating conditions. + el.Write(line) + el.WriteByte('\n') + } +out: + el.raw = applySubstitutions(docp.doc, el.raw) + return line +} + // hasPreamble will return true if the contents contains preamble, indicated // by the first section that found after current line. func (docp *documentParser) hasPreamble() bool { @@ -285,12 +334,13 @@ func (docp *documentParser) parseBlock(parent *element, term int) { el = &element{} line []byte + spaces []byte isTerm bool ok bool ) for !isTerm { if len(line) == 0 { - _, line, ok = docp.line(logp) + spaces, line, ok = docp.line(logp) if !ok { return } @@ -514,17 +564,14 @@ func (docp *documentParser) parseBlock(parent *element, term int) { el.addRole(classNameLiteralBlock) el.Write(line) el.WriteByte('\n') - line = docp.consumeLinesUntil( - el, - lineKindEmpty, - []int{ - term, - elKindBlockListing, - elKindBlockListingNamed, - elKindBlockLiteral, - elKindBlockLiteralNamed, - }) - el.raw = applySubstitutions(docp.doc, el.raw) + line = docp.consumeLiteralParagraph(el, + spaces, lineKindEmpty, + term, + elKindBlockListing, + elKindBlockListingNamed, + elKindBlockLiteral, + elKindBlockLiteralNamed, + ) } parent.addChild(el) el = &element{} diff --git a/testdata/literal_paragraph_test.txt b/testdata/literal_paragraph_test.txt new file mode 100644 index 0000000..684b859 --- /dev/null +++ b/testdata/literal_paragraph_test.txt @@ -0,0 +1,51 @@ +>>> WithTab + +A paragraph. + + Literal paragraph with tab. + Literal paragraph with tab. + Literal paragraph with tab. + +End of section. + +<<< WithTab + +<div class="paragraph"> +<p>A paragraph.</p> +</div> +<div class="literalblock"> +<div class="content"> +<pre>Literal paragraph with tab. + Literal paragraph with tab. +Literal paragraph with tab.</pre> +</div> +</div> +<div class="paragraph"> +<p>End of section.</p> +</div> + +>>> WithSpaces + +A paragraph. + + Literal paragraph with two spaces. + Literal paragraph with two spaces. + Literal paragraph with two spaces. + +End of section. + +<<< WithSpaces + +<div class="paragraph"> +<p>A paragraph.</p> +</div> +<div class="literalblock"> +<div class="content"> +<pre>Literal paragraph with two spaces. +Literal paragraph with two spaces. +Literal paragraph with two spaces.</pre> +</div> +</div> +<div class="paragraph"> +<p>End of section.</p> +</div> diff --git a/testdata/test.adoc b/testdata/test.adoc index e7ef033..5241734 100644 --- a/testdata/test.adoc +++ b/testdata/test.adoc @@ -234,7 +234,7 @@ A bold with * space *, with single non alnum *=*. * Write my document -== Block listing +== Listing Blocks [listing] This is single paragraph listing. @@ -247,19 +247,37 @@ This is block listing. ---- -== Block literal +== Literal Blocks + +=== Indent method A literal paragraph followed by non-space line: non-space line. + Literal with tab. + Literal with tab. + Literal with tab. + + Literal with two spaces. + Literal with two spaces. + Literal with two tab. + + Literal with mixed tab and spaces (two spaces). + Literal with mixed tab and spaces (a tab). + Literal with mixed tab and spaces (two spaces and a tab). + +=== Literal style sintax + [literal] A literal named. [literal] x A literal named and trailing characters will become paragraph. +=== Delimited literal block + .... -With 4 dots. +With .... block. .... // This one does not work: diff --git a/testdata/test.exp.html b/testdata/test.exp.html index 90be8e2..dd1e453 100644 --- a/testdata/test.exp.html +++ b/testdata/test.exp.html @@ -481,8 +481,14 @@ body.book #toc,body.book #preamble,body.book h1.sect0,body.book .sect1>h2{page-b <li><a href="#title">11.1. Title</a></li> </ul> </li> -<li><a href="#block_listing">12. Block listing</a></li> -<li><a href="#block_literal">13. Block literal</a></li> +<li><a href="#listing_blocks">12. Listing Blocks</a></li> +<li><a href="#literal_blocks">13. Literal Blocks</a> +<ul class="sectlevel2"> +<li><a href="#indent_method">13.1. Indent method</a></li> +<li><a href="#literal_style_sintax">13.2. Literal style sintax</a></li> +<li><a href="#delimited_literal_block">13.3. Delimited literal block</a></li> +</ul> +</li> <li><a href="#ordered_lists">14. Ordered Lists</a></li> <li><a href="#unordered_lists">15. Unordered Lists</a> <ul class="sectlevel2"> @@ -897,7 +903,7 @@ paragraph*</em>.</p> </div> </div> <div class="sect1"> -<h2 id="block_listing"><a class="anchor" href="#block_listing"></a><a class="link" href="#block_listing">12. Block listing</a></h2> +<h2 id="listing_blocks"><a class="anchor" href="#listing_blocks"></a><a class="link" href="#listing_blocks">12. Listing Blocks</a></h2> <div class="sectionbody"> <div class="listingblock"> <div class="content"> @@ -916,8 +922,10 @@ This is not listing.</p> </div> </div> <div class="sect1"> -<h2 id="block_literal"><a class="anchor" href="#block_literal"></a><a class="link" href="#block_literal">13. Block literal</a></h2> +<h2 id="literal_blocks"><a class="anchor" href="#literal_blocks"></a><a class="link" href="#literal_blocks">13. Literal Blocks</a></h2> <div class="sectionbody"> +<div class="sect2"> +<h3 id="indent_method"><a class="anchor" href="#indent_method"></a><a class="link" href="#indent_method">13.1. Indent method</a></h3> <div class="literalblock"> <div class="content"> <pre> A literal paragraph followed by non-space line: @@ -926,6 +934,30 @@ non-space line.</pre> </div> <div class="literalblock"> <div class="content"> +<pre>Literal with tab. + Literal with tab. +Literal with tab.</pre> +</div> +</div> +<div class="literalblock"> +<div class="content"> +<pre>Literal with two spaces. + Literal with two spaces. +Literal with two tab.</pre> +</div> +</div> +<div class="literalblock"> +<div class="content"> +<pre> Literal with mixed tab and spaces (two spaces). +Literal with mixed tab and spaces (a tab). + Literal with mixed tab and spaces (two spaces and a tab).</pre> +</div> +</div> +</div> +<div class="sect2"> +<h3 id="literal_style_sintax"><a class="anchor" href="#literal_style_sintax"></a><a class="link" href="#literal_style_sintax">13.2. Literal style sintax</a></h3> +<div class="literalblock"> +<div class="content"> <pre>A literal named.</pre> </div> </div> @@ -933,9 +965,13 @@ non-space line.</pre> <p>[literal] x A literal named and trailing characters will become paragraph.</p> </div> +</div> +<div class="sect2"> +<h3 id="delimited_literal_block"><a class="anchor" href="#delimited_literal_block"></a><a class="link" href="#delimited_literal_block">13.3. Delimited literal block</a></h3> <div class="literalblock"> <div class="content"> -<pre>With 4 dots.</pre> +<pre>With .... block.</pre> +</div> </div> </div> </div> diff --git a/testdata/test.got.html b/testdata/test.got.html index 6e8c276..18d7609 100644 --- a/testdata/test.got.html +++ b/testdata/test.got.html @@ -2496,8 +2496,14 @@ p.tableblock { <li><a href="#title">11.1. Title</a></li> </ul> </li> -<li><a href="#block_listing">12. Block listing</a></li> -<li><a href="#block_literal">13. Block literal</a></li> +<li><a href="#listing_blocks">12. Listing Blocks</a></li> +<li><a href="#literal_blocks">13. Literal Blocks</a> +<ul class="sectlevel2"> +<li><a href="#indent_method">13.1. Indent method</a></li> +<li><a href="#literal_style_sintax">13.2. Literal style sintax</a></li> +<li><a href="#delimited_literal_block">13.3. Delimited literal block</a></li> +</ul> +</li> <li><a href="#ordered_lists">14. Ordered Lists</a></li> <li><a href="#unordered_lists">15. Unordered Lists</a> <ul class="sectlevel2"> @@ -2910,7 +2916,7 @@ paragraph</strong></em>.</p> </div> </div> <div class="sect1"> -<h2 id="block_listing"><a class="link" href="#block_listing">12. Block listing</a><a class="anchor" href="#block_listing" aria-label="Anchor for block_listing"></a></h2> +<h2 id="listing_blocks"><a class="link" href="#listing_blocks">12. Listing Blocks</a><a class="anchor" href="#listing_blocks" aria-label="Anchor for listing_blocks"></a></h2> <div class="sectionbody"> <div class="listingblock"> <div class="content"> @@ -2929,8 +2935,10 @@ This is not listing.</p> </div> </div> <div class="sect1"> -<h2 id="block_literal"><a class="link" href="#block_literal">13. Block literal</a><a class="anchor" href="#block_literal" aria-label="Anchor for block_literal"></a></h2> +<h2 id="literal_blocks"><a class="link" href="#literal_blocks">13. Literal Blocks</a><a class="anchor" href="#literal_blocks" aria-label="Anchor for literal_blocks"></a></h2> <div class="sectionbody"> +<div class="sect2"> +<h3 id="indent_method"><a class="link" href="#indent_method">13.1. Indent method</a><a class="anchor" href="#indent_method" aria-label="Anchor for indent_method"></a></h3> <div class="literalblock"> <div class="content"> <pre>A literal paragraph followed by non-space line: @@ -2939,6 +2947,30 @@ non-space line.</pre> </div> <div class="literalblock"> <div class="content"> +<pre>Literal with tab. + Literal with tab. +Literal with tab.</pre> +</div> +</div> +<div class="literalblock"> +<div class="content"> +<pre>Literal with two spaces. + Literal with two spaces. +Literal with two tab.</pre> +</div> +</div> +<div class="literalblock"> +<div class="content"> +<pre>Literal with mixed tab and spaces (two spaces). +Literal with mixed tab and spaces (a tab). + Literal with mixed tab and spaces (two spaces and a tab).</pre> +</div> +</div> +</div> +<div class="sect2"> +<h3 id="literal_style_sintax"><a class="link" href="#literal_style_sintax">13.2. Literal style sintax</a><a class="anchor" href="#literal_style_sintax" aria-label="Anchor for literal_style_sintax"></a></h3> +<div class="literalblock"> +<div class="content"> <pre>A literal named.</pre> </div> </div> @@ -2946,9 +2978,13 @@ non-space line.</pre> <p>[literal] x A literal named and trailing characters will become paragraph.</p> </div> +</div> +<div class="sect2"> +<h3 id="delimited_literal_block"><a class="link" href="#delimited_literal_block">13.3. Delimited literal block</a><a class="anchor" href="#delimited_literal_block" aria-label="Anchor for delimited_literal_block"></a></h3> <div class="literalblock"> <div class="content"> -<pre>With 4 dots.</pre> +<pre>With .... block.</pre> +</div> </div> </div> </div> |
