diff options
| author | Shulhan <m.shulhan@gmail.com> | 2019-11-05 22:16:46 +0700 |
|---|---|---|
| committer | Shulhan <m.shulhan@gmail.com> | 2020-03-01 22:54:24 +0700 |
| commit | 2bca5ef4f69b8f33e76660dfd784fa2b3e2251c4 (patch) | |
| tree | 89366ccf19686fc545c3871469308ecab7287ac9 | |
| parent | 216584e664fc8898b13f931a582646803d8fa6fb (diff) | |
| download | pakakeh.go-2bca5ef4f69b8f33e76660dfd784fa2b3e2251c4.tar.xz | |
lib/parser: a general parser library
| -rw-r--r-- | lib/parser/parser.go | 349 | ||||
| -rw-r--r-- | lib/parser/parser_benchmark_test.go | 23 | ||||
| -rw-r--r-- | lib/parser/parser_example_test.go | 29 | ||||
| -rw-r--r-- | lib/parser/parser_test.go | 287 | ||||
| -rw-r--r-- | lib/parser/testdata/test.txt | 1 |
5 files changed, 689 insertions, 0 deletions
diff --git a/lib/parser/parser.go b/lib/parser/parser.go new file mode 100644 index 00000000..8c5d07e7 --- /dev/null +++ b/lib/parser/parser.go @@ -0,0 +1,349 @@ +// Copyright 2019, Shulhan <m.shulhan@gmail.com>. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +// Package parser provide a common text parser, using delimiters. +// +package parser + +import ( + "fmt" + "io/ioutil" + + libascii "github.com/shuLhan/share/lib/ascii" +) + +// +// Parser implement text parsing. +// +type Parser struct { + file string + delims string + x int // x is the position of read in v. + v string // v contains the text to be parsed. + token []rune // token that has been parsed. + d rune // d is one of delims character that terminated parsing. +} + +// +// Lines parse the content of path and return non-empty lines. +// +func Lines(file string) ([]string, error) { + p, err := Open(file, "") + if err != nil { + return nil, fmt.Errorf("Lines: %w", err) + } + return p.Lines(), nil +} + +// +// New create and initialize parser from content and delimiters. +// +func New(content, delims string) (p *Parser) { + p = &Parser{ + token: make([]rune, 0, 16), + } + + p.Load(content, delims) + + return p +} + +// +// Open create and initialize the parser using predefined delimiters. +// All the content of file will be loaded first. +// If delimiters is empty, it would default to all whitespaces characters. +// +func Open(file, delims string) (p *Parser, err error) { + v, err := ioutil.ReadFile(file) + if err != nil { + return nil, err + } + + p = New(string(v), delims) + p.file = file + + return p, nil +} + +// +// AddDelimiters append new delimiter to existing parser. +// +func (p *Parser) AddDelimiters(delims string) { + var found bool + for _, newd := range delims { + found = false + for _, oldd := range p.delims { + if oldd == newd { + found = true + break + } + } + if !found { + p.delims += string(newd) + } + } +} + +// +// Close the parser by resetting all its internal state to zero value. +// +func (p *Parser) Close() { + p.file = "" + p.delims = "" + p.x = 0 + p.v = "" + p.token = p.token[:0] + p.d = 0 +} + +// +// Lines return all non-empty lines from the content. +// +func (p *Parser) Lines() []string { + var start, end int + + lines := make([]string, 0) + + for x := p.x; x < len(p.v); x++ { + // Skip white spaces on beginning ... + for ; x < len(p.v); x++ { + if p.v[x] == ' ' || p.v[x] == '\t' || p.v[x] == '\r' || p.v[x] == '\f' { + continue + } + break + } + start = x + for ; x < len(p.v); x++ { + if p.v[x] != '\n' { + continue + } + break + } + + // Skip white spaces at the end ... + for end = x - 1; end > start; end-- { + if p.v[end] == ' ' || p.v[end] == '\t' || + p.v[end] == '\r' || p.v[end] == '\f' { + continue + } + break + } + end++ + if start == end { + // Skip empty lines + continue + } + + line := p.v[start:end] + lines = append(lines, line) + } + + p.x = len(p.v) + + return lines +} + +// +// Load the new content and delimiters. +// +func (p *Parser) Load(content, delims string) { + p.Close() + p.v = content + if len(delims) == 0 { + p.delims = string(libascii.Spaces) + } else { + p.delims = delims + } +} + +// +// Token read the next token from content until one of the delimiter found. +// if no delimiter found, its mean all of content has been read, the returned +// delimiter will be 0. +// +func (p *Parser) Token() (string, rune) { + p.token = p.token[:0] + + if p.x >= len(p.v) { + p.d = 0 + return "", 0 + } + + for x, r := range p.v[p.x:] { + for _, d := range p.delims { + if r == d { + p.d = d + p.x += x + 1 + return string(p.token), p.d + } + } + + p.token = append(p.token, r) + } + + p.d = 0 + p.x = len(p.v) + return string(p.token), p.d +} + +// +// TokenEscaped read the next token from content until one of the delimiter +// found, unless its escaped with value of esc character. +// +// For example, if the content is "a b" and one of the delimiter is " ", +// escaping it with "\" will return as "a b" not "a". +// +func (p *Parser) TokenEscaped(esc rune) (string, rune) { + var isEscaped bool + + p.token = p.token[:0] + + if p.x >= len(p.v) { + p.d = 0 + return "", 0 + } + + for x, r := range p.v[p.x:] { + if r == esc { + if isEscaped { + p.token = append(p.token, r) + isEscaped = false + continue + } + isEscaped = true + continue + } + for _, d := range p.delims { + if r == d { + if isEscaped { + isEscaped = false + break + } + + p.d = d + p.x += x + 1 + return string(p.token), p.d + } + } + + p.token = append(p.token, r) + } + + p.d = 0 + p.x = len(p.v) + return string(p.token), p.d + +} + +// +// ReadEnclosed read the token inside opening and closing characters, ignoring +// all delimiters that previously set. +// +// It will return the parsed token and closed character if closed character +// found, otherwise it will token with 0. +// +func (p *Parser) ReadEnclosed(open, closed rune) (string, rune) { + for x, r := range p.v[p.x:] { + if x == 0 { + if r == open { + continue + } + } + if r == closed { + p.d = closed + p.x += x + 1 + return string(p.token), p.d + } + + p.token = append(p.token, r) + } + + p.d = 0 + p.x = len(p.v) + return string(p.v), 0 +} + +// +// RemoveDelimiters from current parser. +// +func (p *Parser) RemoveDelimiters(dels string) { + var ( + newdelims string + found bool + ) + + for _, oldd := range p.delims { + found = false + for _, r := range dels { + if r == oldd { + found = true + break + } + } + if !found { + newdelims += string(oldd) + } + } + + p.delims = newdelims +} + +// +// Skip parsing n characters or EOF if n is greater then length of content. +// +func (p *Parser) Skip(n int) { + if p.x+n >= len(p.v) { + p.x = len(p.v) + p.d = 0 + } else { + p.x += n + } +} + +// +// SkipHorizontalSpaces skip all space (" "), tab ("\t"), carriage return +// ("\r"), and form feed ("\f") characters; and return the first character +// found, probably new line. +// +func (p *Parser) SkipHorizontalSpaces() rune { + for x, r := range p.v[p.x:] { + switch r { + case ' ', '\t', '\r', '\f': + default: + p.x += x + p.d = r + return r + } + } + + p.d = 0 + p.x = len(p.v) + + return 0 +} + +// +// SkipLine skip all characters until new line. +// It will return the first character after new line or 0 if EOF. +// +func (p *Parser) SkipLine() rune { + for x, r := range p.v[p.x:] { + if r == '\n' { + p.x += x + 1 + if p.x >= len(p.v) { + p.d = 0 + } else { + p.d = r + } + return p.d + } + } + + // All contents has been read, no new line found. + p.x = len(p.v) + p.d = 0 + + return 0 +} diff --git a/lib/parser/parser_benchmark_test.go b/lib/parser/parser_benchmark_test.go new file mode 100644 index 00000000..19e17872 --- /dev/null +++ b/lib/parser/parser_benchmark_test.go @@ -0,0 +1,23 @@ +// Copyright 2019, Shulhan <m.shulhan@gmail.com>. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package parser + +import "testing" + +// Output: +// +// BenchmarkParser_Token-4 59117898 20.2 ns/op 0 B/op 0 allocs/op +// +func BenchmarkParser_Token(b *testing.B) { + content := "abc;def" + delims := " /;" + + p := New(content, delims) + + for x := 0; x < b.N; x++ { + p.Token() + p.Load(content, delims) + } +} diff --git a/lib/parser/parser_example_test.go b/lib/parser/parser_example_test.go new file mode 100644 index 00000000..41e75455 --- /dev/null +++ b/lib/parser/parser_example_test.go @@ -0,0 +1,29 @@ +// Copyright 2019, Shulhan <m.shulhan@gmail.com>. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package parser + +import ( + "fmt" + "strings" +) + +func ExampleNew() { + content := "[test]\nkey = value" + p := New(content, "=[]") + + for { + token, del := p.Token() + token = strings.TrimSpace(token) + fmt.Printf("%q %q\n", token, del) + if del == 0 { + break + } + } + // Output: + // "" '[' + // "test" ']' + // "key" '=' + // "value" '\x00' +} diff --git a/lib/parser/parser_test.go b/lib/parser/parser_test.go new file mode 100644 index 00000000..71d0a9e5 --- /dev/null +++ b/lib/parser/parser_test.go @@ -0,0 +1,287 @@ +// Copyright 2019, Shulhan <m.shulhan@gmail.com>. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package parser + +import ( + "testing" + + "github.com/shuLhan/share/lib/test" +) + +func TestParser_AddDelimiters(t *testing.T) { + p := &Parser{ + delims: "/:", + } + + cases := []struct { + delims string + exp string + }{{ + exp: "/:", + }, { + delims: " \t", + exp: "/: \t", + }, { + delims: " \t", + exp: "/: \t", + }} + + for _, c := range cases { + p.AddDelimiters(c.delims) + test.Assert(t, "p.delims", c.exp, p.delims, true) + } +} + +func TestParser_Lines(t *testing.T) { + cases := []struct { + desc string + content string + exp []string + }{{ + desc: "With empty content", + exp: []string{}, + }, { + desc: "With single empty line", + content: "\n", + exp: []string{}, + }, { + desc: "With single empty line", + content: " \t\r\f\n", + exp: []string{}, + }, { + desc: "With one line, at the end", + content: " \t\r\f\ntest", + exp: []string{ + "test", + }, + }, { + desc: "With one line, in the middle", + content: " \t\r\f\ntest \t\r\f\n", + exp: []string{ + "test", + }, + }, { + desc: "With two lines", + content: "A \t\f\r\n \nB \t\f\r\n", + exp: []string{ + "A", + "B", + }, + }, { + desc: "With three lines", + content: "A \t\f\r\n \n\n\nB\n \t\f\r\nC", + exp: []string{ + "A", + "B", + "C", + }, + }} + + p := New("", "") + + for _, c := range cases { + t.Log(c.desc) + + p.Load(c.content, "") + + got := p.Lines() + + test.Assert(t, "Lines()", c.exp, got, true) + } +} + +func TestParser_Token(t *testing.T) { + p := New("\t test \ntest", "") + + cases := []struct { + expToken string + expDelim rune + }{{ + expDelim: '\t', + }, { + expDelim: ' ', + }, { + expToken: "test", + expDelim: ' ', + }, { + expDelim: '\n', + }, { + expToken: "test", + }} + + for _, c := range cases { + gotToken, gotDelim := p.Token() + + test.Assert(t, "token", c.expToken, gotToken, true) + test.Assert(t, "delim", c.expDelim, gotDelim, true) + } +} + +func TestParser_TokenEscaped(t *testing.T) { + p := New("\t te\\ st \ntest", "") + + cases := []struct { + expToken string + expDelim rune + }{{ + expDelim: '\t', + }, { + expDelim: ' ', + }, { + expToken: "te st", + expDelim: ' ', + }, { + expDelim: '\n', + }, { + expToken: "test", + }} + + for _, c := range cases { + gotToken, gotDelim := p.TokenEscaped('\\') + + test.Assert(t, "token", c.expToken, gotToken, true) + test.Assert(t, "delim", c.expDelim, gotDelim, true) + } +} + +func TestParser_SkipLine(t *testing.T) { + cases := []struct { + desc string + content string + expToken string + expDelim rune + }{{ + desc: "With empty content", + }, { + desc: "With empty line", + content: "\ntest\n", + expToken: "test", + expDelim: '\n', + }, { + desc: "With single line", + content: "test\n", + }, { + desc: "With two lines", + content: "test 1\ntest 2", + expToken: "test", + expDelim: ' ', + }} + + p := New("", "") + + for _, c := range cases { + t.Log(c.desc) + + p.Load(c.content, "") + + p.SkipLine() + + gotToken, gotDelim := p.Token() + + test.Assert(t, "token", c.expToken, gotToken, true) + test.Assert(t, "delim", c.expDelim, gotDelim, true) + } +} + +func TestParser_Open(t *testing.T) { + cases := []struct { + desc string + file string + expError string + expContent string + }{{ + desc: "With not existing file", + file: "testdata/xxx", + expError: "open testdata/xxx: no such file or directory", + }, { + desc: "With file exist", + file: "testdata/test.txt", + expContent: "test\n", + }} + + for _, c := range cases { + t.Log(c.desc) + + p, err := Open(c.file, "") + if err != nil { + test.Assert(t, "error", c.expError, err.Error(), true) + continue + } + + test.Assert(t, "content", c.expContent, p.v, true) + } +} + +func TestParser_RemoveDelimiters(t *testing.T) { + p := &Parser{ + delims: "/: \t", + } + cases := []struct { + delims string + exp string + }{{ + exp: "/: \t", + }, { + delims: "/", + exp: ": \t", + }, { + delims: "///", + exp: ": \t", + }, { + delims: "\t :", + exp: "", + }} + + for _, c := range cases { + p.RemoveDelimiters(c.delims) + test.Assert(t, "p.delims", c.exp, p.delims, true) + } +} + +func TestParser_SkipHorizontalSpaces(t *testing.T) { + cases := []struct { + desc string + content string + expRune rune + expToken string + expDelim rune + }{{ + desc: "With empty content", + }, { + desc: "With empty line", + content: " \t\r\f\n", + expRune: '\n', + expDelim: '\n', + }, { + desc: "With single line", + content: "test\n", + expRune: 't', + expToken: "test", + expDelim: '\n', + }, { + desc: "With space in the beginning", + content: " \t\f\rtest 1\ntest 2", + expRune: 't', + expToken: "test", + expDelim: ' ', + }} + + p := New("", "") + + for _, c := range cases { + t.Log(c.desc) + + p.Load(c.content, "") + + got := p.SkipHorizontalSpaces() + + test.Assert(t, "rune", c.expRune, got, true) + + gotToken, gotDelim := p.Token() + + test.Assert(t, "token", c.expToken, gotToken, true) + test.Assert(t, "delim", c.expDelim, gotDelim, true) + } +} diff --git a/lib/parser/testdata/test.txt b/lib/parser/testdata/test.txt new file mode 100644 index 00000000..9daeafb9 --- /dev/null +++ b/lib/parser/testdata/test.txt @@ -0,0 +1 @@ +test |
