lib/bytes: add various methods to Parser

The AddDelimiters method add another delimiters to the current parser. The ReadNoSpace method read the next token by ignoring the leading spaces, even if its one of the delimiter. The returned token will have no trailing spaces. The RemoveDelimiters method remove delimiters delims from current delimiters. The Reset method set all internal state to new content and delimiters. The SetDelimiters method replace the current delimiters with delims. The SkipHorizontalSpaces method skip space (" "), tab ("\t"), carriage return ("\r"), and form feed ("\f") characters; and return the first non-space character or 0 if it reach end-of-content. The SkipSpaces method skip all spaces character (' ', '\f', '\n', '\r', '\t') and return the first non-space character or 0 if it reach end-of-content. The Stop method stop the parser, return the remaining unparsed content and its last position, and then call Reset to reset the internal state back to zero. The UnreadN method unread N characters and return the character its pointed to. If N greater than current position index, it will reset the read pointer index back to zero.
author: Shulhan <ms@kilabit.info> 2023-04-06 01:36:47 +0700
committer: Shulhan <ms@kilabit.info> 2023-04-08 16:56:26 +0700
commit: bf053a185ff3b34c37c539bf2e85e1cf34d9cf7c (patch)
tree: 31d1c72159fa3b580e6aaa8a79f6373705de70e7 /lib/bytes/parser.go
parent: 3de6c75a8364cd2042746c586ab008c9187e4aaa (diff)
download: pakakeh.go-bf053a185ff3b34c37c539bf2e85e1cf34d9cf7c.tar.xz
1 files changed, 193 insertions, 0 deletions
diff --git a/lib/bytes/parser.go b/lib/bytes/parser.go
index a8bc5d7b..e4e14927 100644
--- a/lib/bytes/parser.go
+++ b/lib/bytes/parser.go
@@ -4,6 +4,8 @@
 
 package bytes
 
+import "github.com/shuLhan/share/lib/ascii"
+
 // Parser implement tokenize parser for stream of byte using one or more
 // delimiters as separator between token.
 type Parser struct {
@@ -24,6 +26,16 @@ func NewParser(content, delims []byte) (bp *Parser) {
 	return bp
 }
 
+// AddDelimiters add another delimiters to the current parser.
+func (bp *Parser) AddDelimiters(delims []byte) {
+	bp.delims = append(bp.delims, delims...)
+}
+
+// Delimiters return the copy of current delimiters.
+func (bp *Parser) Delimiters() []byte {
+	return Copy(bp.delims)
+}
+
 // Read read a token until one of the delimiters found.
 // If one of delimiter match, it will return it as d.
 // When end of content encountered, the returned token may be not empty but
@@ -44,6 +56,152 @@ func (bp *Parser) Read() (token []byte, d byte) {
 	return token, 0
 }
 
+// ReadN read exactly n characters ignoring the delimiters.
+// It will return the token and the character after n or 0 if end-of-content.
+func (bp *Parser) ReadN(n int) (token []byte, d byte) {
+	var (
+		c     byte
+		count int
+	)
+	for bp.x < bp.size {
+		c = bp.content[bp.x]
+		if count >= n {
+			return token, c
+		}
+		token = append(token, c)
+		count++
+		bp.x++
+	}
+	return token, 0
+}
+
+// ReadNoSpace read the next token by ignoring the leading spaces, even if its
+// one of the delimiter.
+// The returned token will have no trailing spaces.
+func (bp *Parser) ReadNoSpace() (token []byte, d byte) {
+	var c byte
+
+	// Ignore leading spaces.
+	for ; bp.x < bp.size; bp.x++ {
+		c = bp.content[bp.x]
+		if !ascii.IsSpace(c) {
+			break
+		}
+	}
+
+	for ; bp.x < bp.size; bp.x++ {
+		c = bp.content[bp.x]
+		for _, d = range bp.delims {
+			if d == c {
+				bp.x++
+				goto out
+			}
+		}
+		token = append(token, c)
+	}
+	d = 0
+
+out:
+	// Remove trailing spaces.
+	var x int
+	for x = len(token) - 1; x >= 0; x-- {
+		if !ascii.IsSpace(token[x]) {
+			break
+		}
+	}
+	if x < 0 {
+		token = token[:0]
+	} else {
+		token = token[:x+1]
+	}
+
+	return token, d
+}
+
+// RemoveDelimiters remove delimiters delims from current delimiters.
+func (bp *Parser) RemoveDelimiters(delims []byte) {
+	var (
+		newDelims = make([]byte, 0, len(bp.delims))
+
+		oldd  byte
+		remd  byte
+		found bool
+	)
+	for _, oldd = range bp.delims {
+		found = false
+		for _, remd = range delims {
+			if remd == oldd {
+				found = true
+				break
+			}
+		}
+		if !found {
+			newDelims = append(newDelims, oldd)
+		}
+	}
+	bp.delims = newDelims
+}
+
+// Reset the Parser by setting all internal state to new content and
+// delimiters.
+func (bp *Parser) Reset(content, delims []byte) {
+	bp.content = content
+	bp.delims = delims
+	bp.x = 0
+	bp.size = len(content)
+}
+
+// SetDelimiters replace the current delimiters with delims.
+func (bp *Parser) SetDelimiters(delims []byte) {
+	bp.delims = delims
+}
+
+// Skip skip parsing token until one of the delimiters found or
+// end-of-content.
+func (bp *Parser) Skip() (c byte) {
+	var d byte
+	for bp.x < bp.size {
+		c = bp.content[bp.x]
+		for _, d = range bp.delims {
+			if c == d {
+				bp.x++
+				return c
+			}
+		}
+		bp.x++
+	}
+	return 0
+}
+
+// SkipN skip exactly N characters ignoring delimiters.
+// It will return the next character after N or 0 if it reach end-of-content.
+func (bp *Parser) SkipN(n int) (c byte) {
+	var count int
+	for bp.x < bp.size {
+		c = bp.content[bp.x]
+		if count >= n {
+			return c
+		}
+		count++
+		bp.x++
+	}
+	return 0
+}
+
+// SkipHorizontalSpaces skip space (" "), tab ("\t"), carriage return
+// ("\r"), and form feed ("\f") characters; and return the first non-space
+// character or 0 if it reach end-of-content.
+func (bp *Parser) SkipHorizontalSpaces() (c byte) {
+	for ; bp.x < bp.size; bp.x++ {
+		c = bp.content[bp.x]
+		if c == ' ' || c == '\t' || c == '\r' || c == '\f' {
+			continue
+		}
+		return c
+	}
+	return 0
+}
+
 // SkipLine skip all characters until new line.
 // It will return 0 if EOF.
 func (bp *Parser) SkipLine() (c byte) {
@@ -57,3 +215,38 @@ func (bp *Parser) SkipLine() (c byte) {
 	}
 	return 0
 }
+
+// SkipSpaces skip all spaces character (' ', '\f', '\n', '\r', '\t') and
+// return the first non-space character or 0 if it reach end-of-content.
+func (bp *Parser) SkipSpaces() (c byte) {
+	for ; bp.x < bp.size; bp.x++ {
+		c = bp.content[bp.x]
+		if ascii.IsSpace(c) {
+			continue
+		}
+		return c
+	}
+	return 0
+}
+
+// Stop the parser, return the remaining unparsed content and its last
+// position, and then call Reset to reset the internal state back to zero.
+func (bp *Parser) Stop() (remain []byte, pos int) {
+	remain = Copy(bp.content[bp.x:])
+	pos = bp.x
+	bp.Reset(nil, nil)
+	return remain, pos
+}
+
+// UnreadN unread N characters and return the character its pointed
+// to.
+// If N greater than current position index, it will reset the read pointer
+// index back to zero.
+func (bp *Parser) UnreadN(n int) byte {
+	if n > bp.x {
+		bp.x = 0
+	} else {
+		bp.x -= n
+	}
+	return bp.content[bp.x]
+}
author	Shulhan <ms@kilabit.info>	2023-04-06 01:36:47 +0700
committer	Shulhan <ms@kilabit.info>	2023-04-08 16:56:26 +0700
commit	bf053a185ff3b34c37c539bf2e85e1cf34d9cf7c (patch)
tree	31d1c72159fa3b580e6aaa8a79f6373705de70e7 /lib/bytes/parser.go
parent	3de6c75a8364cd2042746c586ab008c9187e4aaa (diff)
download	pakakeh.go-bf053a185ff3b34c37c539bf2e85e1cf34d9cf7c.tar.xz