aboutsummaryrefslogtreecommitdiff
path: root/lib/parser
diff options
context:
space:
mode:
Diffstat (limited to 'lib/parser')
-rw-r--r--lib/parser/parser.go349
-rw-r--r--lib/parser/parser_benchmark_test.go23
-rw-r--r--lib/parser/parser_example_test.go29
-rw-r--r--lib/parser/parser_test.go287
-rw-r--r--lib/parser/testdata/test.txt1
5 files changed, 689 insertions, 0 deletions
diff --git a/lib/parser/parser.go b/lib/parser/parser.go
new file mode 100644
index 00000000..8c5d07e7
--- /dev/null
+++ b/lib/parser/parser.go
@@ -0,0 +1,349 @@
+// Copyright 2019, Shulhan <m.shulhan@gmail.com>. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//
+// Package parser provide a common text parser, using delimiters.
+//
+package parser
+
+import (
+ "fmt"
+ "io/ioutil"
+
+ libascii "github.com/shuLhan/share/lib/ascii"
+)
+
+//
+// Parser implement text parsing.
+//
+type Parser struct {
+ file string
+ delims string
+ x int // x is the position of read in v.
+ v string // v contains the text to be parsed.
+ token []rune // token that has been parsed.
+ d rune // d is one of delims character that terminated parsing.
+}
+
+//
+// Lines parse the content of path and return non-empty lines.
+//
+func Lines(file string) ([]string, error) {
+ p, err := Open(file, "")
+ if err != nil {
+ return nil, fmt.Errorf("Lines: %w", err)
+ }
+ return p.Lines(), nil
+}
+
+//
+// New create and initialize parser from content and delimiters.
+//
+func New(content, delims string) (p *Parser) {
+ p = &Parser{
+ token: make([]rune, 0, 16),
+ }
+
+ p.Load(content, delims)
+
+ return p
+}
+
+//
+// Open create and initialize the parser using predefined delimiters.
+// All the content of file will be loaded first.
+// If delimiters is empty, it would default to all whitespaces characters.
+//
+func Open(file, delims string) (p *Parser, err error) {
+ v, err := ioutil.ReadFile(file)
+ if err != nil {
+ return nil, err
+ }
+
+ p = New(string(v), delims)
+ p.file = file
+
+ return p, nil
+}
+
+//
+// AddDelimiters append new delimiter to existing parser.
+//
+func (p *Parser) AddDelimiters(delims string) {
+ var found bool
+ for _, newd := range delims {
+ found = false
+ for _, oldd := range p.delims {
+ if oldd == newd {
+ found = true
+ break
+ }
+ }
+ if !found {
+ p.delims += string(newd)
+ }
+ }
+}
+
+//
+// Close the parser by resetting all its internal state to zero value.
+//
+func (p *Parser) Close() {
+ p.file = ""
+ p.delims = ""
+ p.x = 0
+ p.v = ""
+ p.token = p.token[:0]
+ p.d = 0
+}
+
+//
+// Lines return all non-empty lines from the content.
+//
+func (p *Parser) Lines() []string {
+ var start, end int
+
+ lines := make([]string, 0)
+
+ for x := p.x; x < len(p.v); x++ {
+ // Skip white spaces on beginning ...
+ for ; x < len(p.v); x++ {
+ if p.v[x] == ' ' || p.v[x] == '\t' || p.v[x] == '\r' || p.v[x] == '\f' {
+ continue
+ }
+ break
+ }
+ start = x
+ for ; x < len(p.v); x++ {
+ if p.v[x] != '\n' {
+ continue
+ }
+ break
+ }
+
+ // Skip white spaces at the end ...
+ for end = x - 1; end > start; end-- {
+ if p.v[end] == ' ' || p.v[end] == '\t' ||
+ p.v[end] == '\r' || p.v[end] == '\f' {
+ continue
+ }
+ break
+ }
+ end++
+ if start == end {
+ // Skip empty lines
+ continue
+ }
+
+ line := p.v[start:end]
+ lines = append(lines, line)
+ }
+
+ p.x = len(p.v)
+
+ return lines
+}
+
+//
+// Load the new content and delimiters.
+//
+func (p *Parser) Load(content, delims string) {
+ p.Close()
+ p.v = content
+ if len(delims) == 0 {
+ p.delims = string(libascii.Spaces)
+ } else {
+ p.delims = delims
+ }
+}
+
+//
+// Token read the next token from content until one of the delimiter found.
+// if no delimiter found, its mean all of content has been read, the returned
+// delimiter will be 0.
+//
+func (p *Parser) Token() (string, rune) {
+ p.token = p.token[:0]
+
+ if p.x >= len(p.v) {
+ p.d = 0
+ return "", 0
+ }
+
+ for x, r := range p.v[p.x:] {
+ for _, d := range p.delims {
+ if r == d {
+ p.d = d
+ p.x += x + 1
+ return string(p.token), p.d
+ }
+ }
+
+ p.token = append(p.token, r)
+ }
+
+ p.d = 0
+ p.x = len(p.v)
+ return string(p.token), p.d
+}
+
+//
+// TokenEscaped read the next token from content until one of the delimiter
+// found, unless its escaped with value of esc character.
+//
+// For example, if the content is "a b" and one of the delimiter is " ",
+// escaping it with "\" will return as "a b" not "a".
+//
+func (p *Parser) TokenEscaped(esc rune) (string, rune) {
+ var isEscaped bool
+
+ p.token = p.token[:0]
+
+ if p.x >= len(p.v) {
+ p.d = 0
+ return "", 0
+ }
+
+ for x, r := range p.v[p.x:] {
+ if r == esc {
+ if isEscaped {
+ p.token = append(p.token, r)
+ isEscaped = false
+ continue
+ }
+ isEscaped = true
+ continue
+ }
+ for _, d := range p.delims {
+ if r == d {
+ if isEscaped {
+ isEscaped = false
+ break
+ }
+
+ p.d = d
+ p.x += x + 1
+ return string(p.token), p.d
+ }
+ }
+
+ p.token = append(p.token, r)
+ }
+
+ p.d = 0
+ p.x = len(p.v)
+ return string(p.token), p.d
+
+}
+
+//
+// ReadEnclosed read the token inside opening and closing characters, ignoring
+// all delimiters that previously set.
+//
+// It will return the parsed token and closed character if closed character
+// found, otherwise it will token with 0.
+//
+func (p *Parser) ReadEnclosed(open, closed rune) (string, rune) {
+ for x, r := range p.v[p.x:] {
+ if x == 0 {
+ if r == open {
+ continue
+ }
+ }
+ if r == closed {
+ p.d = closed
+ p.x += x + 1
+ return string(p.token), p.d
+ }
+
+ p.token = append(p.token, r)
+ }
+
+ p.d = 0
+ p.x = len(p.v)
+ return string(p.v), 0
+}
+
+//
+// RemoveDelimiters from current parser.
+//
+func (p *Parser) RemoveDelimiters(dels string) {
+ var (
+ newdelims string
+ found bool
+ )
+
+ for _, oldd := range p.delims {
+ found = false
+ for _, r := range dels {
+ if r == oldd {
+ found = true
+ break
+ }
+ }
+ if !found {
+ newdelims += string(oldd)
+ }
+ }
+
+ p.delims = newdelims
+}
+
+//
+// Skip parsing n characters or EOF if n is greater then length of content.
+//
+func (p *Parser) Skip(n int) {
+ if p.x+n >= len(p.v) {
+ p.x = len(p.v)
+ p.d = 0
+ } else {
+ p.x += n
+ }
+}
+
+//
+// SkipHorizontalSpaces skip all space (" "), tab ("\t"), carriage return
+// ("\r"), and form feed ("\f") characters; and return the first character
+// found, probably new line.
+//
+func (p *Parser) SkipHorizontalSpaces() rune {
+ for x, r := range p.v[p.x:] {
+ switch r {
+ case ' ', '\t', '\r', '\f':
+ default:
+ p.x += x
+ p.d = r
+ return r
+ }
+ }
+
+ p.d = 0
+ p.x = len(p.v)
+
+ return 0
+}
+
+//
+// SkipLine skip all characters until new line.
+// It will return the first character after new line or 0 if EOF.
+//
+func (p *Parser) SkipLine() rune {
+ for x, r := range p.v[p.x:] {
+ if r == '\n' {
+ p.x += x + 1
+ if p.x >= len(p.v) {
+ p.d = 0
+ } else {
+ p.d = r
+ }
+ return p.d
+ }
+ }
+
+ // All contents has been read, no new line found.
+ p.x = len(p.v)
+ p.d = 0
+
+ return 0
+}
diff --git a/lib/parser/parser_benchmark_test.go b/lib/parser/parser_benchmark_test.go
new file mode 100644
index 00000000..19e17872
--- /dev/null
+++ b/lib/parser/parser_benchmark_test.go
@@ -0,0 +1,23 @@
+// Copyright 2019, Shulhan <m.shulhan@gmail.com>. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package parser
+
+import "testing"
+
+// Output:
+//
+// BenchmarkParser_Token-4 59117898 20.2 ns/op 0 B/op 0 allocs/op
+//
+func BenchmarkParser_Token(b *testing.B) {
+ content := "abc;def"
+ delims := " /;"
+
+ p := New(content, delims)
+
+ for x := 0; x < b.N; x++ {
+ p.Token()
+ p.Load(content, delims)
+ }
+}
diff --git a/lib/parser/parser_example_test.go b/lib/parser/parser_example_test.go
new file mode 100644
index 00000000..41e75455
--- /dev/null
+++ b/lib/parser/parser_example_test.go
@@ -0,0 +1,29 @@
+// Copyright 2019, Shulhan <m.shulhan@gmail.com>. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package parser
+
+import (
+ "fmt"
+ "strings"
+)
+
+func ExampleNew() {
+ content := "[test]\nkey = value"
+ p := New(content, "=[]")
+
+ for {
+ token, del := p.Token()
+ token = strings.TrimSpace(token)
+ fmt.Printf("%q %q\n", token, del)
+ if del == 0 {
+ break
+ }
+ }
+ // Output:
+ // "" '['
+ // "test" ']'
+ // "key" '='
+ // "value" '\x00'
+}
diff --git a/lib/parser/parser_test.go b/lib/parser/parser_test.go
new file mode 100644
index 00000000..71d0a9e5
--- /dev/null
+++ b/lib/parser/parser_test.go
@@ -0,0 +1,287 @@
+// Copyright 2019, Shulhan <m.shulhan@gmail.com>. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package parser
+
+import (
+ "testing"
+
+ "github.com/shuLhan/share/lib/test"
+)
+
+func TestParser_AddDelimiters(t *testing.T) {
+ p := &Parser{
+ delims: "/:",
+ }
+
+ cases := []struct {
+ delims string
+ exp string
+ }{{
+ exp: "/:",
+ }, {
+ delims: " \t",
+ exp: "/: \t",
+ }, {
+ delims: " \t",
+ exp: "/: \t",
+ }}
+
+ for _, c := range cases {
+ p.AddDelimiters(c.delims)
+ test.Assert(t, "p.delims", c.exp, p.delims, true)
+ }
+}
+
+func TestParser_Lines(t *testing.T) {
+ cases := []struct {
+ desc string
+ content string
+ exp []string
+ }{{
+ desc: "With empty content",
+ exp: []string{},
+ }, {
+ desc: "With single empty line",
+ content: "\n",
+ exp: []string{},
+ }, {
+ desc: "With single empty line",
+ content: " \t\r\f\n",
+ exp: []string{},
+ }, {
+ desc: "With one line, at the end",
+ content: " \t\r\f\ntest",
+ exp: []string{
+ "test",
+ },
+ }, {
+ desc: "With one line, in the middle",
+ content: " \t\r\f\ntest \t\r\f\n",
+ exp: []string{
+ "test",
+ },
+ }, {
+ desc: "With two lines",
+ content: "A \t\f\r\n \nB \t\f\r\n",
+ exp: []string{
+ "A",
+ "B",
+ },
+ }, {
+ desc: "With three lines",
+ content: "A \t\f\r\n \n\n\nB\n \t\f\r\nC",
+ exp: []string{
+ "A",
+ "B",
+ "C",
+ },
+ }}
+
+ p := New("", "")
+
+ for _, c := range cases {
+ t.Log(c.desc)
+
+ p.Load(c.content, "")
+
+ got := p.Lines()
+
+ test.Assert(t, "Lines()", c.exp, got, true)
+ }
+}
+
+func TestParser_Token(t *testing.T) {
+ p := New("\t test \ntest", "")
+
+ cases := []struct {
+ expToken string
+ expDelim rune
+ }{{
+ expDelim: '\t',
+ }, {
+ expDelim: ' ',
+ }, {
+ expToken: "test",
+ expDelim: ' ',
+ }, {
+ expDelim: '\n',
+ }, {
+ expToken: "test",
+ }}
+
+ for _, c := range cases {
+ gotToken, gotDelim := p.Token()
+
+ test.Assert(t, "token", c.expToken, gotToken, true)
+ test.Assert(t, "delim", c.expDelim, gotDelim, true)
+ }
+}
+
+func TestParser_TokenEscaped(t *testing.T) {
+ p := New("\t te\\ st \ntest", "")
+
+ cases := []struct {
+ expToken string
+ expDelim rune
+ }{{
+ expDelim: '\t',
+ }, {
+ expDelim: ' ',
+ }, {
+ expToken: "te st",
+ expDelim: ' ',
+ }, {
+ expDelim: '\n',
+ }, {
+ expToken: "test",
+ }}
+
+ for _, c := range cases {
+ gotToken, gotDelim := p.TokenEscaped('\\')
+
+ test.Assert(t, "token", c.expToken, gotToken, true)
+ test.Assert(t, "delim", c.expDelim, gotDelim, true)
+ }
+}
+
+func TestParser_SkipLine(t *testing.T) {
+ cases := []struct {
+ desc string
+ content string
+ expToken string
+ expDelim rune
+ }{{
+ desc: "With empty content",
+ }, {
+ desc: "With empty line",
+ content: "\ntest\n",
+ expToken: "test",
+ expDelim: '\n',
+ }, {
+ desc: "With single line",
+ content: "test\n",
+ }, {
+ desc: "With two lines",
+ content: "test 1\ntest 2",
+ expToken: "test",
+ expDelim: ' ',
+ }}
+
+ p := New("", "")
+
+ for _, c := range cases {
+ t.Log(c.desc)
+
+ p.Load(c.content, "")
+
+ p.SkipLine()
+
+ gotToken, gotDelim := p.Token()
+
+ test.Assert(t, "token", c.expToken, gotToken, true)
+ test.Assert(t, "delim", c.expDelim, gotDelim, true)
+ }
+}
+
+func TestParser_Open(t *testing.T) {
+ cases := []struct {
+ desc string
+ file string
+ expError string
+ expContent string
+ }{{
+ desc: "With not existing file",
+ file: "testdata/xxx",
+ expError: "open testdata/xxx: no such file or directory",
+ }, {
+ desc: "With file exist",
+ file: "testdata/test.txt",
+ expContent: "test\n",
+ }}
+
+ for _, c := range cases {
+ t.Log(c.desc)
+
+ p, err := Open(c.file, "")
+ if err != nil {
+ test.Assert(t, "error", c.expError, err.Error(), true)
+ continue
+ }
+
+ test.Assert(t, "content", c.expContent, p.v, true)
+ }
+}
+
+func TestParser_RemoveDelimiters(t *testing.T) {
+ p := &Parser{
+ delims: "/: \t",
+ }
+ cases := []struct {
+ delims string
+ exp string
+ }{{
+ exp: "/: \t",
+ }, {
+ delims: "/",
+ exp: ": \t",
+ }, {
+ delims: "///",
+ exp: ": \t",
+ }, {
+ delims: "\t :",
+ exp: "",
+ }}
+
+ for _, c := range cases {
+ p.RemoveDelimiters(c.delims)
+ test.Assert(t, "p.delims", c.exp, p.delims, true)
+ }
+}
+
+func TestParser_SkipHorizontalSpaces(t *testing.T) {
+ cases := []struct {
+ desc string
+ content string
+ expRune rune
+ expToken string
+ expDelim rune
+ }{{
+ desc: "With empty content",
+ }, {
+ desc: "With empty line",
+ content: " \t\r\f\n",
+ expRune: '\n',
+ expDelim: '\n',
+ }, {
+ desc: "With single line",
+ content: "test\n",
+ expRune: 't',
+ expToken: "test",
+ expDelim: '\n',
+ }, {
+ desc: "With space in the beginning",
+ content: " \t\f\rtest 1\ntest 2",
+ expRune: 't',
+ expToken: "test",
+ expDelim: ' ',
+ }}
+
+ p := New("", "")
+
+ for _, c := range cases {
+ t.Log(c.desc)
+
+ p.Load(c.content, "")
+
+ got := p.SkipHorizontalSpaces()
+
+ test.Assert(t, "rune", c.expRune, got, true)
+
+ gotToken, gotDelim := p.Token()
+
+ test.Assert(t, "token", c.expToken, gotToken, true)
+ test.Assert(t, "delim", c.expDelim, gotDelim, true)
+ }
+}
diff --git a/lib/parser/testdata/test.txt b/lib/parser/testdata/test.txt
new file mode 100644
index 00000000..9daeafb9
--- /dev/null
+++ b/lib/parser/testdata/test.txt
@@ -0,0 +1 @@
+test