From 6f7bb3e07398c80e782dd46045ce020a45067095 Mon Sep 17 00:00:00 2001 From: Shulhan Date: Sun, 28 May 2023 20:55:38 +0700 Subject: all: add support for unordered list with '-' The unordered list item with hyphen ('-') cause too much confusion and inconsistency, nevertheless most of still use it. Case one, given the following markup, ``` - Item 1 + "A line of quote" -- Author ``` Is the "Author" the sub item in list or we are parsing author of quote paragraph? Case two, the writer want to write em dash (`—` in HTML Unicode) but somehow the editor wrap it and start in new line. As a reminder, the official documentation only recommend using hyphen for simple list item [1]. [1] https://docs.asciidoctor.org/asciidoc/latest/lists/unordered/#basic-unordered-list --- README.md | 35 ++++++++- document_parser.go | 2 - element.go | 4 +- parser.go | 47 ++++++++---- testdata/list_unordered_test.txt | 157 +++++++++++++++++++++++++++++++++++++++ testdata/test.adoc | 10 +-- testdata/test.exp.html | 14 ++-- testdata/test.got.html | 12 +-- 8 files changed, 243 insertions(+), 38 deletions(-) create mode 100644 testdata/list_unordered_test.txt diff --git a/README.md b/README.md index e057025..6a8b257 100644 --- a/README.md +++ b/README.md @@ -52,7 +52,7 @@ The numbered one is based on the old documentation. * Quotation Marks and Apostrophes * Subscript and Superscript * Monospace -* Unordered Lists +* Unordered Lists (See Notes below) * Nested * Complex List Content * Custom Markers @@ -144,9 +144,12 @@ Additional metadata provides by this library, * `author_names` - list of author full names separated by comma. +### Notes + +#### Unsupported markup The following markup will not supported because its functionality is duplicate -with others markup or not secure, +or inconsistent with others markup, or not secure, * Header * Subtitle partitioning. @@ -167,6 +170,34 @@ with others markup or not secure, * Caching URI Content +#### Unordered list item with hyphen + +The unordered list item with hyphen ('-') cause too much confusion and +inconsistency. + +Case one, given the following markup, + +``` +- Item 1 ++ +"A line +of quote" +-- Author +``` + +Is the "Author" the sub item in list or we are parsing author of quote +paragraph? + +Case two, the writer want to write em dash (`—` in HTML Unicode) but +somehow the editor wrap it and start in new line. + +As a reminder, the official documentation only recommend using hyphen for +simple list item + +> You should reserve the hyphen for lists that only have a single level +> because the hyphen marker (-) doesn’t work for nested lists. +> -- + ### TODO List of features which may be implemented, diff --git a/document_parser.go b/document_parser.go index 3ccfa0f..db2dec1 100644 --- a/document_parser.go +++ b/document_parser.go @@ -841,8 +841,6 @@ func (docp *documentParser) parseListBlock() (el *element, line []byte) { lineKindEmpty, []int{ lineKindListContinue, - elKindListOrderedItem, - elKindListUnorderedItem, elKindListDescriptionItem, }) el.postParseParagraph(nil) diff --git a/element.go b/element.go index 9609570..db8c6a6 100644 --- a/element.go +++ b/element.go @@ -43,7 +43,7 @@ type element struct { elementAttribute rawLabel bytes.Buffer - level int // The number of dot for ordered list, or '*' for unordered list. + level int // The number of dot for ordered list, or '*'/'-' for unordered list. listItemNumber int // The counter for list item, start from 1. kind int @@ -494,7 +494,7 @@ func (el *element) parseListUnorderedItem(line []byte) { ) for ; x < len(line); x++ { - if line[x] == '*' { + if line[x] == '*' || line[x] == '-' { el.level++ continue } diff --git a/parser.go b/parser.go index 1eb53c4..7b95773 100644 --- a/parser.go +++ b/parser.go @@ -45,7 +45,7 @@ const ( elKindListOrdered // Wrapper. elKindListOrderedItem // 30: Line start with ". " elKindListUnordered // Wrapper. - elKindListUnorderedItem // Line start with "* " + elKindListUnorderedItem // Line start with "* " or "- " elKindListDescription // Wrapper. elKindListDescriptionItem // Line that has "::" + WSP elKindMacroTOC // "toc::[]" @@ -799,7 +799,7 @@ func whatKindOfLine(line []byte) (kind int, spaces, got []byte) { return elKindListDescriptionItem, spaces, line } - if line[0] != '*' && line[0] != '.' { + if line[0] != '*' && line[0] != '-' && line[0] != '.' { return elKindLiteralParagraph, spaces, line } } @@ -865,23 +865,42 @@ func whatKindOfLine(line []byte) (kind int, spaces, got []byte) { } } } - } else if line[0] == '*' { + } else if line[0] == '*' || line[0] == '-' { if len(line) <= 1 { kind = lineKindText - } else { - x = 0 - for ; x < len(line); x++ { - if line[x] == '*' { - continue - } - if line[x] == ' ' || line[x] == '\t' { - kind = elKindListUnorderedItem - return kind, spaces, line - } - kind = lineKindText + return kind, spaces, line + } + + var ( + listItemChar = line[0] + count = 0 + ) + x = 0 + for ; x < len(line); x++ { + if line[x] == listItemChar { + count++ + continue + } + if line[x] == ' ' || line[x] == '\t' { + kind = elKindListUnorderedItem return kind, spaces, line } + // Break on the first non-space, so from above + // condition we have, + // - item + // -- item + // --- item + // ---- // block listing + // --unknown // break here + break } + if listItemChar == '-' && count == 4 && x == len(line) { + kind = elKindBlockListing + } else { + kind = lineKindText + } + return kind, spaces, line + } else if bytes.Equal(line, []byte(`+`)) { kind = lineKindListContinue } else if bytes.Equal(line, []byte(`----`)) { diff --git a/testdata/list_unordered_test.txt b/testdata/list_unordered_test.txt new file mode 100644 index 0000000..01c62a8 --- /dev/null +++ b/testdata/list_unordered_test.txt @@ -0,0 +1,157 @@ +Parsing and rendering unordered list item with '*' and '-'. + +>>> with_star += Unordered list with star + +With star, + +* Star 1 +* Star 2 +** Star 2.1 +*** Star 2.1.1 +** Star 2.2 +* Star 3 +* Star 4 + +<<< with_star + +
+

With star,

+
+
+
    +
  • +

    Star 1

    +
  • +
  • +

    Star 2

    +
    +
      +
    • +

      Star 2.1

      +
      +
        +
      • +

        Star 2.1.1

        +
      • +
      +
      +
    • +
    • +

      Star 2.2

      +
    • +
    +
    +
  • +
  • +

    Star 3

    +
  • +
  • +

    Star 4

    +
  • +
+
+ +>>> with_dash += Unordered list with dash + +With dash, + +- Dash 1 +- Dash 2 +-- Dash 2.1 +--- Dash 2.1.1 +-- Dash 2.2 +- Dash 3 +- Dash 4 + +<<< with_dash + +
+

With dash,

+
+
+
    +
  • +

    Dash 1

    +
  • +
  • +

    Dash 2

    +
    +
      +
    • +

      Dash 2.1

      +
      +
        +
      • +

        Dash 2.1.1

        +
      • +
      +
      +
    • +
    • +

      Dash 2.2

      +
    • +
    +
    +
  • +
  • +

    Dash 3

    +
  • +
  • +

    Dash 4

    +
  • +
+
+ +>>> quoted_paragraph += Quoted paragraph + +"A line +of Quote" +-- Author Name, Citation Name + +<<< quoted_paragraph + +
+
+A line +of Quote +
+
+— Author Name
+Citation Name +
+
+ +>>> list_item_with_quoted_paragraph += List item with quote paragraph + +* List item 1 ++ +"a quote" +-- Thomas Jefferson, Papers of Thomas Jefferson: Volume 11 + +* List item 2 + +<<< list_item_with_quoted_paragraph + +
+
    +
  • +

    List item 1

    +
    +
    +a quote +
    +
    +— Thomas Jefferson
    +Papers of Thomas Jefferson: Volume 11 +
    +
    +
  • +
  • +

    List item 2

    +
  • +
+
diff --git a/testdata/test.adoc b/testdata/test.adoc index cece8d6..869d720 100644 --- a/testdata/test.adoc +++ b/testdata/test.adoc @@ -480,7 +480,7 @@ This line separated by comment. == Description list toc::[] -`CPU`:: The brain +`AAA`:: The brain of the computer. @@ -489,23 +489,23 @@ Hard drive:: Permanent storage for operating system and/or user files. With `[horizontal]` style, [horizontal] -CPU:: The brain of the computer. +BBB:: The brain of the computer. Hard drive:: Permanent storage for operating system and/or user files. With title, .A title -CPU:: The brain of the computer. +CCC:: The brain of the computer. Hard drive:: Permanent storage for operating system and/or user files. Indented with space - CPU:: The brain of the computer. + DDD:: The brain of the computer. Hard drive:: Permanent storage for operating system and/or user files. With continuation "+", -CPU:: +EEE:: + The brain of the computer. Hard drive:: diff --git a/testdata/test.exp.html b/testdata/test.exp.html index f0a36d0..ba0aa6f 100644 --- a/testdata/test.exp.html +++ b/testdata/test.exp.html @@ -4,7 +4,7 @@ - + @@ -1133,7 +1133,7 @@ This line separated by comment.

-
CPU
+
AAA

The brain of @@ -1153,7 +1153,7 @@ computer.

-CPU +BBB

The brain of the computer.

@@ -1175,7 +1175,7 @@ Hard drive
A title
-
CPU
+
CCC

The brain of the computer.

@@ -1190,7 +1190,7 @@ Hard drive
-
CPU
+
DDD

The brain of the computer.

@@ -1205,7 +1205,7 @@ Hard drive
-
CPU
+
EEE

The brain of the computer.

@@ -2995,7 +2995,7 @@ this sidebar.

diff --git a/testdata/test.got.html b/testdata/test.got.html index 28cba38..aba6b1f 100644 --- a/testdata/test.got.html +++ b/testdata/test.got.html @@ -1130,7 +1130,7 @@ This line separated by comment.

-
CPU
+
AAA

The brain of @@ -1150,7 +1150,7 @@ computer.

-CPU +BBB

The brain of the computer.

@@ -1172,7 +1172,7 @@ Hard drive
A title
-
CPU
+
CCC

The brain of the computer.

@@ -1187,7 +1187,7 @@ Hard drive
-
CPU
+
DDD

The brain of the computer.

@@ -1202,7 +1202,7 @@ Hard drive
-
CPU
+
EEE

The brain of the computer.

@@ -2997,7 +2997,7 @@ this sidebar.

-- cgit v1.3