lib/parser: a general parser library

author: Shulhan <m.shulhan@gmail.com> 2019-11-05 22:16:46 +0700
committer: Shulhan <m.shulhan@gmail.com> 2020-03-01 22:54:24 +0700
commit: 2bca5ef4f69b8f33e76660dfd784fa2b3e2251c4 (patch)
tree: 89366ccf19686fc545c3871469308ecab7287ac9 /lib/parser/parser.go
parent: 216584e664fc8898b13f931a582646803d8fa6fb (diff)
download: pakakeh.go-2bca5ef4f69b8f33e76660dfd784fa2b3e2251c4.tar.xz
1 files changed, 349 insertions, 0 deletions
diff --git a/lib/parser/parser.go b/lib/parser/parser.go
new file mode 100644
index 00000000..8c5d07e7
--- /dev/null
+++ b/lib/parser/parser.go
@@ -0,0 +1,349 @@
+// Copyright 2019, Shulhan <m.shulhan@gmail.com>. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//
+// Package parser provide a common text parser, using delimiters.
+//
+package parser
+
+import (
+	"fmt"
+	"io/ioutil"
+
+	libascii "github.com/shuLhan/share/lib/ascii"
+)
+
+//
+// Parser implement text parsing.
+//
+type Parser struct {
+	file   string
+	delims string
+	x      int    // x is the position of read in v.
+	v      string // v contains the text to be parsed.
+	token  []rune // token that has been parsed.
+	d      rune   // d is one of delims character that terminated parsing.
+}
+
+//
+// Lines parse the content of path and return non-empty lines.
+//
+func Lines(file string) ([]string, error) {
+	p, err := Open(file, "")
+	if err != nil {
+		return nil, fmt.Errorf("Lines: %w", err)
+	}
+	return p.Lines(), nil
+}
+
+//
+// New create and initialize parser from content and delimiters.
+//
+func New(content, delims string) (p *Parser) {
+	p = &Parser{
+		token: make([]rune, 0, 16),
+	}
+
+	p.Load(content, delims)
+
+	return p
+}
+
+//
+// Open create and initialize the parser using predefined delimiters.
+// All the content of file will be loaded first.
+// If delimiters is empty, it would default to all whitespaces characters.
+//
+func Open(file, delims string) (p *Parser, err error) {
+	v, err := ioutil.ReadFile(file)
+	if err != nil {
+		return nil, err
+	}
+
+	p = New(string(v), delims)
+	p.file = file
+
+	return p, nil
+}
+
+//
+// AddDelimiters append new delimiter to existing parser.
+//
+func (p *Parser) AddDelimiters(delims string) {
+	var found bool
+	for _, newd := range delims {
+		found = false
+		for _, oldd := range p.delims {
+			if oldd == newd {
+				found = true
+				break
+			}
+		}
+		if !found {
+			p.delims += string(newd)
+		}
+	}
+}
+
+//
+// Close the parser by resetting all its internal state to zero value.
+//
+func (p *Parser) Close() {
+	p.file = ""
+	p.delims = ""
+	p.x = 0
+	p.v = ""
+	p.token = p.token[:0]
+	p.d = 0
+}
+
+//
+// Lines return all non-empty lines from the content.
+//
+func (p *Parser) Lines() []string {
+	var start, end int
+
+	lines := make([]string, 0)
+
+	for x := p.x; x < len(p.v); x++ {
+		// Skip white spaces on beginning ...
+		for ; x < len(p.v); x++ {
+			if p.v[x] == ' ' || p.v[x] == '\t' || p.v[x] == '\r' || p.v[x] == '\f' {
+				continue
+			}
+			break
+		}
+		start = x
+		for ; x < len(p.v); x++ {
+			if p.v[x] != '\n' {
+				continue
+			}
+			break
+		}
+
+		// Skip white spaces at the end ...
+		for end = x - 1; end > start; end-- {
+			if p.v[end] == ' ' || p.v[end] == '\t' ||
+				p.v[end] == '\r' || p.v[end] == '\f' {
+				continue
+			}
+			break
+		}
+		end++
+		if start == end {
+			// Skip empty lines
+			continue
+		}
+
+		line := p.v[start:end]
+		lines = append(lines, line)
+	}
+
+	p.x = len(p.v)
+
+	return lines
+}
+
+//
+// Load the new content and delimiters.
+//
+func (p *Parser) Load(content, delims string) {
+	p.Close()
+	p.v = content
+	if len(delims) == 0 {
+		p.delims = string(libascii.Spaces)
+	} else {
+		p.delims = delims
+	}
+}
+
+//
+// Token read the next token from content until one of the delimiter found.
+// if no delimiter found, its mean all of content has been read, the returned
+// delimiter will be 0.
+//
+func (p *Parser) Token() (string, rune) {
+	p.token = p.token[:0]
+
+	if p.x >= len(p.v) {
+		p.d = 0
+		return "", 0
+	}
+
+	for x, r := range p.v[p.x:] {
+		for _, d := range p.delims {
+			if r == d {
+				p.d = d
+				p.x += x + 1
+				return string(p.token), p.d
+			}
+		}
+
+		p.token = append(p.token, r)
+	}
+
+	p.d = 0
+	p.x = len(p.v)
+	return string(p.token), p.d
+}
+
+//
+// TokenEscaped read the next token from content until one of the delimiter
+// found, unless its escaped with value of esc character.
+//
+// For example, if the content is "a b" and one of the delimiter is " ",
+// escaping it with "\" will return as "a b" not "a".
+//
+func (p *Parser) TokenEscaped(esc rune) (string, rune) {
+	var isEscaped bool
+
+	p.token = p.token[:0]
+
+	if p.x >= len(p.v) {
+		p.d = 0
+		return "", 0
+	}
+
+	for x, r := range p.v[p.x:] {
+		if r == esc {
+			if isEscaped {
+				p.token = append(p.token, r)
+				isEscaped = false
+				continue
+			}
+			isEscaped = true
+			continue
+		}
+		for _, d := range p.delims {
+			if r == d {
+				if isEscaped {
+					isEscaped = false
+					break
+				}
+
+				p.d = d
+				p.x += x + 1
+				return string(p.token), p.d
+			}
+		}
+
+		p.token = append(p.token, r)
+	}
+
+	p.d = 0
+	p.x = len(p.v)
+	return string(p.token), p.d
+
+}
+
+//
+// ReadEnclosed read the token inside opening and closing characters, ignoring
+// all delimiters that previously set.
+//
+// It will return the parsed token and closed character if closed character
+// found, otherwise it will token with 0.
+//
+func (p *Parser) ReadEnclosed(open, closed rune) (string, rune) {
+	for x, r := range p.v[p.x:] {
+		if x == 0 {
+			if r == open {
+				continue
+			}
+		}
+		if r == closed {
+			p.d = closed
+			p.x += x + 1
+			return string(p.token), p.d
+		}
+
+		p.token = append(p.token, r)
+	}
+
+	p.d = 0
+	p.x = len(p.v)
+	return string(p.v), 0
+}
+
+//
+// RemoveDelimiters from current parser.
+//
+func (p *Parser) RemoveDelimiters(dels string) {
+	var (
+		newdelims string
+		found     bool
+	)
+
+	for _, oldd := range p.delims {
+		found = false
+		for _, r := range dels {
+			if r == oldd {
+				found = true
+				break
+			}
+		}
+		if !found {
+			newdelims += string(oldd)
+		}
+	}
+
+	p.delims = newdelims
+}
+
+//
+// Skip parsing n characters or EOF if n is greater then length of content.
+//
+func (p *Parser) Skip(n int) {
+	if p.x+n >= len(p.v) {
+		p.x = len(p.v)
+		p.d = 0
+	} else {
+		p.x += n
+	}
+}
+
+//
+// SkipHorizontalSpaces skip all space (" "), tab ("\t"), carriage return
+// ("\r"), and form feed ("\f") characters; and return the first character
+// found, probably new line.
+//
+func (p *Parser) SkipHorizontalSpaces() rune {
+	for x, r := range p.v[p.x:] {
+		switch r {
+		case ' ', '\t', '\r', '\f':
+		default:
+			p.x += x
+			p.d = r
+			return r
+		}
+	}
+
+	p.d = 0
+	p.x = len(p.v)
+
+	return 0
+}
+
+//
+// SkipLine skip all characters until new line.
+// It will return the first character after new line or 0 if EOF.
+//
+func (p *Parser) SkipLine() rune {
+	for x, r := range p.v[p.x:] {
+		if r == '\n' {
+			p.x += x + 1
+			if p.x >= len(p.v) {
+				p.d = 0
+			} else {
+				p.d = r
+			}
+			return p.d
+		}
+	}
+
+	// All contents has been read, no new line found.
+	p.x = len(p.v)
+	p.d = 0
+
+	return 0
+}
author	Shulhan <m.shulhan@gmail.com>	2019-11-05 22:16:46 +0700
committer	Shulhan <m.shulhan@gmail.com>	2020-03-01 22:54:24 +0700
commit	2bca5ef4f69b8f33e76660dfd784fa2b3e2251c4 (patch)
tree	89366ccf19686fc545c3871469308ecab7287ac9 /lib/parser/parser.go
parent	216584e664fc8898b13f931a582646803d8fa6fb (diff)
download	pakakeh.go-2bca5ef4f69b8f33e76660dfd784fa2b3e2251c4.tar.xz