From 6f7bb3e07398c80e782dd46045ce020a45067095 Mon Sep 17 00:00:00 2001
From: Shulhan
Date: Sun, 28 May 2023 20:55:38 +0700
Subject: all: add support for unordered list with '-'
The unordered list item with hyphen ('-') cause too much confusion and
inconsistency, nevertheless most of still use it.
Case one, given the following markup,
```
- Item 1
+
"A line
of quote"
-- Author
```
Is the "Author" the sub item in list or we are parsing author of quote
paragraph?
Case two, the writer want to write em dash (`—` in HTML Unicode) but
somehow the editor wrap it and start in new line.
As a reminder, the official documentation only recommend using hyphen for
simple list item [1].
[1] https://docs.asciidoctor.org/asciidoc/latest/lists/unordered/#basic-unordered-list
---
README.md | 35 ++++++++-
document_parser.go | 2 -
element.go | 4 +-
parser.go | 47 ++++++++----
testdata/list_unordered_test.txt | 157 +++++++++++++++++++++++++++++++++++++++
testdata/test.adoc | 10 +--
testdata/test.exp.html | 14 ++--
testdata/test.got.html | 12 +--
8 files changed, 243 insertions(+), 38 deletions(-)
create mode 100644 testdata/list_unordered_test.txt
diff --git a/README.md b/README.md
index e057025..6a8b257 100644
--- a/README.md
+++ b/README.md
@@ -52,7 +52,7 @@ The numbered one is based on the old documentation.
* Quotation Marks and Apostrophes
* Subscript and Superscript
* Monospace
-* Unordered Lists
+* Unordered Lists (See Notes below)
* Nested
* Complex List Content
* Custom Markers
@@ -144,9 +144,12 @@ Additional metadata provides by this library,
* `author_names` - list of author full names separated by comma.
+### Notes
+
+#### Unsupported markup
The following markup will not supported because its functionality is duplicate
-with others markup or not secure,
+or inconsistent with others markup, or not secure,
* Header
* Subtitle partitioning.
@@ -167,6 +170,34 @@ with others markup or not secure,
* Caching URI Content
+#### Unordered list item with hyphen
+
+The unordered list item with hyphen ('-') cause too much confusion and
+inconsistency.
+
+Case one, given the following markup,
+
+```
+- Item 1
++
+"A line
+of quote"
+-- Author
+```
+
+Is the "Author" the sub item in list or we are parsing author of quote
+paragraph?
+
+Case two, the writer want to write em dash (`—` in HTML Unicode) but
+somehow the editor wrap it and start in new line.
+
+As a reminder, the official documentation only recommend using hyphen for
+simple list item
+
+> You should reserve the hyphen for lists that only have a single level
+> because the hyphen marker (-) doesn’t work for nested lists.
+> --
+
### TODO
List of features which may be implemented,
diff --git a/document_parser.go b/document_parser.go
index 3ccfa0f..db2dec1 100644
--- a/document_parser.go
+++ b/document_parser.go
@@ -841,8 +841,6 @@ func (docp *documentParser) parseListBlock() (el *element, line []byte) {
lineKindEmpty,
[]int{
lineKindListContinue,
- elKindListOrderedItem,
- elKindListUnorderedItem,
elKindListDescriptionItem,
})
el.postParseParagraph(nil)
diff --git a/element.go b/element.go
index 9609570..db8c6a6 100644
--- a/element.go
+++ b/element.go
@@ -43,7 +43,7 @@ type element struct {
elementAttribute
rawLabel bytes.Buffer
- level int // The number of dot for ordered list, or '*' for unordered list.
+ level int // The number of dot for ordered list, or '*'/'-' for unordered list.
listItemNumber int // The counter for list item, start from 1.
kind int
@@ -494,7 +494,7 @@ func (el *element) parseListUnorderedItem(line []byte) {
)
for ; x < len(line); x++ {
- if line[x] == '*' {
+ if line[x] == '*' || line[x] == '-' {
el.level++
continue
}
diff --git a/parser.go b/parser.go
index 1eb53c4..7b95773 100644
--- a/parser.go
+++ b/parser.go
@@ -45,7 +45,7 @@ const (
elKindListOrdered // Wrapper.
elKindListOrderedItem // 30: Line start with ". "
elKindListUnordered // Wrapper.
- elKindListUnorderedItem // Line start with "* "
+ elKindListUnorderedItem // Line start with "* " or "- "
elKindListDescription // Wrapper.
elKindListDescriptionItem // Line that has "::" + WSP
elKindMacroTOC // "toc::[]"
@@ -799,7 +799,7 @@ func whatKindOfLine(line []byte) (kind int, spaces, got []byte) {
return elKindListDescriptionItem, spaces, line
}
- if line[0] != '*' && line[0] != '.' {
+ if line[0] != '*' && line[0] != '-' && line[0] != '.' {
return elKindLiteralParagraph, spaces, line
}
}
@@ -865,23 +865,42 @@ func whatKindOfLine(line []byte) (kind int, spaces, got []byte) {
}
}
}
- } else if line[0] == '*' {
+ } else if line[0] == '*' || line[0] == '-' {
if len(line) <= 1 {
kind = lineKindText
- } else {
- x = 0
- for ; x < len(line); x++ {
- if line[x] == '*' {
- continue
- }
- if line[x] == ' ' || line[x] == '\t' {
- kind = elKindListUnorderedItem
- return kind, spaces, line
- }
- kind = lineKindText
+ return kind, spaces, line
+ }
+
+ var (
+ listItemChar = line[0]
+ count = 0
+ )
+ x = 0
+ for ; x < len(line); x++ {
+ if line[x] == listItemChar {
+ count++
+ continue
+ }
+ if line[x] == ' ' || line[x] == '\t' {
+ kind = elKindListUnorderedItem
return kind, spaces, line
}
+ // Break on the first non-space, so from above
+ // condition we have,
+ // - item
+ // -- item
+ // --- item
+ // ---- // block listing
+ // --unknown // break here
+ break
}
+ if listItemChar == '-' && count == 4 && x == len(line) {
+ kind = elKindBlockListing
+ } else {
+ kind = lineKindText
+ }
+ return kind, spaces, line
+
} else if bytes.Equal(line, []byte(`+`)) {
kind = lineKindListContinue
} else if bytes.Equal(line, []byte(`----`)) {
diff --git a/testdata/list_unordered_test.txt b/testdata/list_unordered_test.txt
new file mode 100644
index 0000000..01c62a8
--- /dev/null
+++ b/testdata/list_unordered_test.txt
@@ -0,0 +1,157 @@
+Parsing and rendering unordered list item with '*' and '-'.
+
+>>> with_star
+= Unordered list with star
+
+With star,
+
+* Star 1
+* Star 2
+** Star 2.1
+*** Star 2.1.1
+** Star 2.2
+* Star 3
+* Star 4
+
+<<< with_star
+
+
+
+
+-
+
Star 1
+
+-
+
Star 2
+
+
+-
+
Star 2.1
+
+
+-
+
Star 2.2
+
+
+
+
+-
+
Star 3
+
+-
+
Star 4
+
+
+
+
+>>> with_dash
+= Unordered list with dash
+
+With dash,
+
+- Dash 1
+- Dash 2
+-- Dash 2.1
+--- Dash 2.1.1
+-- Dash 2.2
+- Dash 3
+- Dash 4
+
+<<< with_dash
+
+
+
+
+-
+
Dash 1
+
+-
+
Dash 2
+
+
+-
+
Dash 2.1
+
+
+-
+
Dash 2.2
+
+
+
+
+-
+
Dash 3
+
+-
+
Dash 4
+
+
+
+
+>>> quoted_paragraph
+= Quoted paragraph
+
+"A line
+of Quote"
+-- Author Name, Citation Name
+
+<<< quoted_paragraph
+
+
+
+A line
+of Quote
+
+
+— Author Name
+Citation Name
+
+
+
+>>> list_item_with_quoted_paragraph
+= List item with quote paragraph
+
+* List item 1
++
+"a quote"
+-- Thomas Jefferson, Papers of Thomas Jefferson: Volume 11
+
+* List item 2
+
+<<< list_item_with_quoted_paragraph
+
+
+
+-
+
List item 1
+
+
+a quote
+
+
+— Thomas Jefferson
+Papers of Thomas Jefferson: Volume 11
+
+
+
+-
+
List item 2
+
+
+
diff --git a/testdata/test.adoc b/testdata/test.adoc
index cece8d6..869d720 100644
--- a/testdata/test.adoc
+++ b/testdata/test.adoc
@@ -480,7 +480,7 @@ This line separated by comment.
== Description list
toc::[]
-`CPU`:: The brain
+`AAA`:: The brain
of
the
computer.
@@ -489,23 +489,23 @@ Hard drive:: Permanent storage for operating system and/or user files.
With `[horizontal]` style,
[horizontal]
-CPU:: The brain of the computer.
+BBB:: The brain of the computer.
Hard drive:: Permanent storage for operating system and/or user files.
With title,
.A title
-CPU:: The brain of the computer.
+CCC:: The brain of the computer.
Hard drive:: Permanent storage for operating system and/or user files.
Indented with space
- CPU:: The brain of the computer.
+ DDD:: The brain of the computer.
Hard drive:: Permanent storage for operating system and/or user files.
With continuation "+",
-CPU::
+EEE::
+
The brain of the computer.
Hard drive::
diff --git a/testdata/test.exp.html b/testdata/test.exp.html
index f0a36d0..ba0aa6f 100644
--- a/testdata/test.exp.html
+++ b/testdata/test.exp.html
@@ -4,7 +4,7 @@
-
+
@@ -1133,7 +1133,7 @@ This line separated by comment.
-CPU
+AAA
-
The brain
of
@@ -1153,7 +1153,7 @@ computer.
|
-CPU
+BBB
|
The brain of the computer.
@@ -1175,7 +1175,7 @@ Hard drive
A title
-- CPU
+- CCC
-
The brain of the computer.
@@ -1190,7 +1190,7 @@ Hard drive
-- CPU
+- DDD
-
The brain of the computer.
@@ -1205,7 +1205,7 @@ Hard drive
-- CPU
+- EEE
-
The brain of the computer.
@@ -2995,7 +2995,7 @@ this sidebar.
|