summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorShulhan <ms@kilabit.info>2023-04-06 01:36:47 +0700
committerShulhan <ms@kilabit.info>2023-04-08 16:56:26 +0700
commitbf053a185ff3b34c37c539bf2e85e1cf34d9cf7c (patch)
tree31d1c72159fa3b580e6aaa8a79f6373705de70e7
parent3de6c75a8364cd2042746c586ab008c9187e4aaa (diff)
downloadpakakeh.go-bf053a185ff3b34c37c539bf2e85e1cf34d9cf7c.tar.xz
lib/bytes: add various methods to Parser
The AddDelimiters method add another delimiters to the current parser. The ReadNoSpace method read the next token by ignoring the leading spaces, even if its one of the delimiter. The returned token will have no trailing spaces. The RemoveDelimiters method remove delimiters delims from current delimiters. The Reset method set all internal state to new content and delimiters. The SetDelimiters method replace the current delimiters with delims. The SkipHorizontalSpaces method skip space (" "), tab ("\t"), carriage return ("\r"), and form feed ("\f") characters; and return the first non-space character or 0 if it reach end-of-content. The SkipSpaces method skip all spaces character (' ', '\f', '\n', '\r', '\t') and return the first non-space character or 0 if it reach end-of-content. The Stop method stop the parser, return the remaining unparsed content and its last position, and then call Reset to reset the internal state back to zero. The UnreadN method unread N characters and return the character its pointed to. If N greater than current position index, it will reset the read pointer index back to zero.
-rw-r--r--lib/bytes/parser.go193
-rw-r--r--lib/bytes/parser_example_test.go298
2 files changed, 491 insertions, 0 deletions
diff --git a/lib/bytes/parser.go b/lib/bytes/parser.go
index a8bc5d7b..e4e14927 100644
--- a/lib/bytes/parser.go
+++ b/lib/bytes/parser.go
@@ -4,6 +4,8 @@
package bytes
+import "github.com/shuLhan/share/lib/ascii"
+
// Parser implement tokenize parser for stream of byte using one or more
// delimiters as separator between token.
type Parser struct {
@@ -24,6 +26,16 @@ func NewParser(content, delims []byte) (bp *Parser) {
return bp
}
+// AddDelimiters add another delimiters to the current parser.
+func (bp *Parser) AddDelimiters(delims []byte) {
+ bp.delims = append(bp.delims, delims...)
+}
+
+// Delimiters return the copy of current delimiters.
+func (bp *Parser) Delimiters() []byte {
+ return Copy(bp.delims)
+}
+
// Read read a token until one of the delimiters found.
// If one of delimiter match, it will return it as d.
// When end of content encountered, the returned token may be not empty but
@@ -44,6 +56,152 @@ func (bp *Parser) Read() (token []byte, d byte) {
return token, 0
}
+// ReadN read exactly n characters ignoring the delimiters.
+// It will return the token and the character after n or 0 if end-of-content.
+func (bp *Parser) ReadN(n int) (token []byte, d byte) {
+ var (
+ c byte
+ count int
+ )
+ for bp.x < bp.size {
+ c = bp.content[bp.x]
+ if count >= n {
+ return token, c
+ }
+ token = append(token, c)
+ count++
+ bp.x++
+ }
+ return token, 0
+}
+
+// ReadNoSpace read the next token by ignoring the leading spaces, even if its
+// one of the delimiter.
+// The returned token will have no trailing spaces.
+func (bp *Parser) ReadNoSpace() (token []byte, d byte) {
+ var c byte
+
+ // Ignore leading spaces.
+ for ; bp.x < bp.size; bp.x++ {
+ c = bp.content[bp.x]
+ if !ascii.IsSpace(c) {
+ break
+ }
+ }
+
+ for ; bp.x < bp.size; bp.x++ {
+ c = bp.content[bp.x]
+ for _, d = range bp.delims {
+ if d == c {
+ bp.x++
+ goto out
+ }
+ }
+ token = append(token, c)
+ }
+ d = 0
+
+out:
+ // Remove trailing spaces.
+ var x int
+ for x = len(token) - 1; x >= 0; x-- {
+ if !ascii.IsSpace(token[x]) {
+ break
+ }
+ }
+ if x < 0 {
+ token = token[:0]
+ } else {
+ token = token[:x+1]
+ }
+
+ return token, d
+}
+
+// RemoveDelimiters remove delimiters delims from current delimiters.
+func (bp *Parser) RemoveDelimiters(delims []byte) {
+ var (
+ newDelims = make([]byte, 0, len(bp.delims))
+
+ oldd byte
+ remd byte
+ found bool
+ )
+ for _, oldd = range bp.delims {
+ found = false
+ for _, remd = range delims {
+ if remd == oldd {
+ found = true
+ break
+ }
+ }
+ if !found {
+ newDelims = append(newDelims, oldd)
+ }
+ }
+ bp.delims = newDelims
+}
+
+// Reset the Parser by setting all internal state to new content and
+// delimiters.
+func (bp *Parser) Reset(content, delims []byte) {
+ bp.content = content
+ bp.delims = delims
+ bp.x = 0
+ bp.size = len(content)
+}
+
+// SetDelimiters replace the current delimiters with delims.
+func (bp *Parser) SetDelimiters(delims []byte) {
+ bp.delims = delims
+}
+
+// Skip skip parsing token until one of the delimiters found or
+// end-of-content.
+func (bp *Parser) Skip() (c byte) {
+ var d byte
+ for bp.x < bp.size {
+ c = bp.content[bp.x]
+ for _, d = range bp.delims {
+ if c == d {
+ bp.x++
+ return c
+ }
+ }
+ bp.x++
+ }
+ return 0
+}
+
+// SkipN skip exactly N characters ignoring delimiters.
+// It will return the next character after N or 0 if it reach end-of-content.
+func (bp *Parser) SkipN(n int) (c byte) {
+ var count int
+ for bp.x < bp.size {
+ c = bp.content[bp.x]
+ if count >= n {
+ return c
+ }
+ count++
+ bp.x++
+ }
+ return 0
+}
+
+// SkipHorizontalSpaces skip space (" "), tab ("\t"), carriage return
+// ("\r"), and form feed ("\f") characters; and return the first non-space
+// character or 0 if it reach end-of-content.
+func (bp *Parser) SkipHorizontalSpaces() (c byte) {
+ for ; bp.x < bp.size; bp.x++ {
+ c = bp.content[bp.x]
+ if c == ' ' || c == '\t' || c == '\r' || c == '\f' {
+ continue
+ }
+ return c
+ }
+ return 0
+}
+
// SkipLine skip all characters until new line.
// It will return 0 if EOF.
func (bp *Parser) SkipLine() (c byte) {
@@ -57,3 +215,38 @@ func (bp *Parser) SkipLine() (c byte) {
}
return 0
}
+
+// SkipSpaces skip all spaces character (' ', '\f', '\n', '\r', '\t') and
+// return the first non-space character or 0 if it reach end-of-content.
+func (bp *Parser) SkipSpaces() (c byte) {
+ for ; bp.x < bp.size; bp.x++ {
+ c = bp.content[bp.x]
+ if ascii.IsSpace(c) {
+ continue
+ }
+ return c
+ }
+ return 0
+}
+
+// Stop the parser, return the remaining unparsed content and its last
+// position, and then call Reset to reset the internal state back to zero.
+func (bp *Parser) Stop() (remain []byte, pos int) {
+ remain = Copy(bp.content[bp.x:])
+ pos = bp.x
+ bp.Reset(nil, nil)
+ return remain, pos
+}
+
+// UnreadN unread N characters and return the character its pointed
+// to.
+// If N greater than current position index, it will reset the read pointer
+// index back to zero.
+func (bp *Parser) UnreadN(n int) byte {
+ if n > bp.x {
+ bp.x = 0
+ } else {
+ bp.x -= n
+ }
+ return bp.content[bp.x]
+}
diff --git a/lib/bytes/parser_example_test.go b/lib/bytes/parser_example_test.go
new file mode 100644
index 00000000..1348a5ab
--- /dev/null
+++ b/lib/bytes/parser_example_test.go
@@ -0,0 +1,298 @@
+package bytes_test
+
+import (
+ "fmt"
+
+ libbytes "github.com/shuLhan/share/lib/bytes"
+)
+
+func ExampleParser_AddDelimiters() {
+ var (
+ content = []byte(` a = b ; c = d `)
+ delims = []byte(`=`)
+ parser = libbytes.NewParser(content, delims)
+ )
+
+ token, d := parser.ReadNoSpace()
+ fmt.Printf("%s:%c\n", token, d)
+
+ parser.AddDelimiters([]byte{';'})
+ token, d = parser.ReadNoSpace()
+ fmt.Printf("%s:%c\n", token, d)
+
+ // Output:
+ // a:=
+ // b:;
+}
+
+func ExampleParser_Delimiters() {
+ var (
+ content = []byte(`a=b;c=d;`)
+ delims = []byte{'=', ';'}
+ parser = libbytes.NewParser(content, delims)
+ )
+
+ fmt.Printf("%s\n", parser.Delimiters())
+ // Output:
+ // =;
+}
+
+func ExampleParser_ReadN() {
+ var (
+ content = []byte(`a=b;c=d;`)
+ delims = []byte{'=', ';'}
+ parser = libbytes.NewParser(content, delims)
+ )
+
+ token, c := parser.ReadN(2)
+ fmt.Printf("token:%s c:%d\n", token, c)
+
+ token, c = parser.ReadN(0)
+ fmt.Printf("token:%s c:%d\n", token, c)
+
+ token, c = parser.ReadN(10)
+ fmt.Printf("token:%s c:%d\n", token, c)
+ // Output:
+ // token:a= c:98
+ // token: c:98
+ // token:b;c=d; c:0
+}
+
+func ExampleParser_ReadNoSpace() {
+ var (
+ content = []byte(` a = b ;`)
+ delims = []byte(`=;`)
+ parser = libbytes.NewParser(content, delims)
+ )
+
+ for {
+ token, d := parser.ReadNoSpace()
+ fmt.Printf("%s:%d\n", token, d)
+ if d == 0 {
+ break
+ }
+ }
+ // Output:
+ // a:61
+ // b:59
+ // :0
+}
+
+func ExampleParser_RemoveDelimiters() {
+ var (
+ content = []byte(` a = b ; c = d `)
+ delims = []byte(`=;`)
+ parser = libbytes.NewParser(content, delims)
+ )
+
+ token, _ := parser.ReadNoSpace()
+ fmt.Printf("%s\n", token)
+
+ parser.RemoveDelimiters([]byte{';'})
+ token, _ = parser.ReadNoSpace()
+ fmt.Printf("%s\n", token)
+
+ // Output:
+ // a
+ // b ; c
+}
+
+func ExampleParser_Reset() {
+ var (
+ content = []byte(`a.b.c;`)
+ delims = []byte(`.`)
+ parser = libbytes.NewParser(content, delims)
+ )
+
+ parser.Read()
+ parser.Reset(content, delims)
+ remain, pos := parser.Stop()
+ fmt.Printf("remain:%s pos:%d\n", remain, pos)
+ // Output:
+ // remain:a.b.c; pos:0
+}
+
+func ExampleParser_SetDelimiters() {
+ var (
+ content = []byte(`a.b.c;`)
+ delims = []byte(`.`)
+ parser = libbytes.NewParser(content, delims)
+ token []byte
+ )
+
+ token, _ = parser.Read()
+ fmt.Println(string(token))
+
+ parser.SetDelimiters([]byte(`;`))
+
+ token, _ = parser.Read()
+ fmt.Println(string(token))
+
+ // Output:
+ // a
+ // b.c
+}
+
+func ExampleParser_Skip() {
+ var (
+ content = []byte(`a = b; c = d;`)
+ delims = []byte{'=', ';'}
+ parser = libbytes.NewParser(content, delims)
+ token []byte
+ )
+
+ parser.Skip()
+ token, _ = parser.ReadNoSpace()
+ fmt.Println(string(token))
+
+ parser.Skip()
+ token, _ = parser.ReadNoSpace()
+ fmt.Println(string(token))
+
+ parser.Skip()
+ token, _ = parser.ReadNoSpace()
+ fmt.Println(string(token))
+
+ // Output:
+ // b
+ // d
+ //
+}
+
+func ExampleParser_SkipN() {
+ var (
+ content = []byte(`a=b;c=d;`)
+ delims = []byte{'=', ';'}
+ parser = libbytes.NewParser(content, delims)
+ token []byte
+ c byte
+ )
+
+ c = parser.SkipN(2)
+ fmt.Printf("Skip: %c\n", c)
+ token, _ = parser.ReadNoSpace()
+ fmt.Println(string(token))
+
+ c = parser.SkipN(2)
+ fmt.Printf("Skip: %c\n", c)
+ token, _ = parser.ReadNoSpace()
+ fmt.Println(string(token))
+
+ _ = parser.SkipN(2)
+ token, _ = parser.ReadNoSpace()
+ fmt.Println(string(token))
+
+ // Output:
+ // Skip: b
+ // b
+ // Skip: d
+ // d
+ //
+}
+
+func ExampleParser_SkipHorizontalSpaces() {
+ var (
+ content = []byte(" \t\r\fA. \nB.")
+ delims = []byte{'.'}
+ parser = libbytes.NewParser(content, delims)
+ )
+
+ parser.SkipHorizontalSpaces()
+ token, d := parser.Read()
+ fmt.Printf("token:%s delim:%c\n", token, d)
+
+ parser.SkipHorizontalSpaces()
+ token, d = parser.Read() // The token include \n.
+ fmt.Printf("token:%s delim:%c\n", token, d)
+
+ parser.SkipHorizontalSpaces()
+ token, d = parser.Read() // The token include \n.
+ fmt.Printf("token:%s delim:%d\n", token, d)
+
+ // Output:
+ // token:A delim:.
+ // token:
+ // B delim:.
+ // token: delim:0
+}
+
+func ExampleParser_SkipSpaces() {
+ var (
+ content = []byte(" \t\r\fA. \nB.")
+ delims = []byte{'.'}
+ parser = libbytes.NewParser(content, delims)
+ )
+
+ parser.SkipSpaces()
+ token, d := parser.Read()
+ fmt.Printf("token:%s delim:%c\n", token, d)
+
+ parser.SkipSpaces()
+ token, d = parser.Read() // The token include \n.
+ fmt.Printf("token:%s delim:%c\n", token, d)
+
+ parser.SkipSpaces()
+ token, d = parser.Read() // The token include \n.
+ fmt.Printf("token:%s delim:%d\n", token, d)
+
+ // Output:
+ // token:A delim:.
+ // token:B delim:.
+ // token: delim:0
+}
+
+func ExampleParser_Stop() {
+ var (
+ content = []byte(`a.b.c;`)
+ delims = []byte(`.`)
+ parser = libbytes.NewParser(content, delims)
+
+ remain []byte
+ pos int
+ )
+
+ parser.Read()
+ remain, pos = parser.Stop()
+ fmt.Printf("remain:%s pos:%d\n", remain, pos)
+
+ parser.Reset(content, []byte(`;`))
+ parser.Read()
+ remain, pos = parser.Stop()
+ fmt.Printf("remain:%s pos:%d\n", remain, pos)
+
+ // Output:
+ // remain:b.c; pos:2
+ // remain: pos:6
+}
+
+func ExampleParser_UnreadN() {
+ var (
+ parser = libbytes.NewParser([]byte(`a,b.c/d`), []byte(`,./`))
+ token []byte
+ c byte
+ )
+
+ parser.Read()
+ parser.Read()
+ parser.Read()
+ parser.Read() // All content should be readed now.
+
+ c = parser.UnreadN(2) // Move the index to '/'.
+ fmt.Printf("UnreadN(2): %c\n", c)
+
+ token, c = parser.Read()
+ fmt.Printf("Read: %s %c\n", token, c)
+
+ // Position 99 greater than current index, this will reset index to 0.
+ c = parser.UnreadN(99)
+ fmt.Printf("UnreadN(99): %c\n", c)
+
+ token, c = parser.Read()
+ fmt.Printf("Read: %s %c\n", token, c)
+
+ // Output:
+ // UnreadN(2): /
+ // Read: /
+ // UnreadN(99): a
+ // Read: a ,
+}