diff options
| author | Shulhan <ms@kilabit.info> | 2023-04-06 01:36:47 +0700 |
|---|---|---|
| committer | Shulhan <ms@kilabit.info> | 2023-04-08 16:56:26 +0700 |
| commit | bf053a185ff3b34c37c539bf2e85e1cf34d9cf7c (patch) | |
| tree | 31d1c72159fa3b580e6aaa8a79f6373705de70e7 /lib/bytes/parser.go | |
| parent | 3de6c75a8364cd2042746c586ab008c9187e4aaa (diff) | |
| download | pakakeh.go-bf053a185ff3b34c37c539bf2e85e1cf34d9cf7c.tar.xz | |
lib/bytes: add various methods to Parser
The AddDelimiters method add another delimiters to the current parser.
The ReadNoSpace method read the next token by ignoring the leading spaces,
even if its one of the delimiter. The returned token will have no
trailing spaces.
The RemoveDelimiters method remove delimiters delims from current
delimiters.
The Reset method set all internal state to new content and
delimiters.
The SetDelimiters method replace the current delimiters with delims.
The SkipHorizontalSpaces method skip space (" "), tab ("\t"), carriage
return ("\r"), and form feed ("\f") characters; and return the first
non-space character or 0 if it reach end-of-content.
The SkipSpaces method skip all spaces character
(' ', '\f', '\n', '\r', '\t') and return the first non-space character
or 0 if it reach end-of-content.
The Stop method stop the parser, return the remaining unparsed content
and its last position, and then call Reset to reset the internal state
back to zero.
The UnreadN method unread N characters and return the character its
pointed to. If N greater than current position index, it will reset the
read pointer index back to zero.
Diffstat (limited to 'lib/bytes/parser.go')
| -rw-r--r-- | lib/bytes/parser.go | 193 |
1 files changed, 193 insertions, 0 deletions
diff --git a/lib/bytes/parser.go b/lib/bytes/parser.go index a8bc5d7b..e4e14927 100644 --- a/lib/bytes/parser.go +++ b/lib/bytes/parser.go @@ -4,6 +4,8 @@ package bytes +import "github.com/shuLhan/share/lib/ascii" + // Parser implement tokenize parser for stream of byte using one or more // delimiters as separator between token. type Parser struct { @@ -24,6 +26,16 @@ func NewParser(content, delims []byte) (bp *Parser) { return bp } +// AddDelimiters add another delimiters to the current parser. +func (bp *Parser) AddDelimiters(delims []byte) { + bp.delims = append(bp.delims, delims...) +} + +// Delimiters return the copy of current delimiters. +func (bp *Parser) Delimiters() []byte { + return Copy(bp.delims) +} + // Read read a token until one of the delimiters found. // If one of delimiter match, it will return it as d. // When end of content encountered, the returned token may be not empty but @@ -44,6 +56,152 @@ func (bp *Parser) Read() (token []byte, d byte) { return token, 0 } +// ReadN read exactly n characters ignoring the delimiters. +// It will return the token and the character after n or 0 if end-of-content. +func (bp *Parser) ReadN(n int) (token []byte, d byte) { + var ( + c byte + count int + ) + for bp.x < bp.size { + c = bp.content[bp.x] + if count >= n { + return token, c + } + token = append(token, c) + count++ + bp.x++ + } + return token, 0 +} + +// ReadNoSpace read the next token by ignoring the leading spaces, even if its +// one of the delimiter. +// The returned token will have no trailing spaces. +func (bp *Parser) ReadNoSpace() (token []byte, d byte) { + var c byte + + // Ignore leading spaces. + for ; bp.x < bp.size; bp.x++ { + c = bp.content[bp.x] + if !ascii.IsSpace(c) { + break + } + } + + for ; bp.x < bp.size; bp.x++ { + c = bp.content[bp.x] + for _, d = range bp.delims { + if d == c { + bp.x++ + goto out + } + } + token = append(token, c) + } + d = 0 + +out: + // Remove trailing spaces. + var x int + for x = len(token) - 1; x >= 0; x-- { + if !ascii.IsSpace(token[x]) { + break + } + } + if x < 0 { + token = token[:0] + } else { + token = token[:x+1] + } + + return token, d +} + +// RemoveDelimiters remove delimiters delims from current delimiters. +func (bp *Parser) RemoveDelimiters(delims []byte) { + var ( + newDelims = make([]byte, 0, len(bp.delims)) + + oldd byte + remd byte + found bool + ) + for _, oldd = range bp.delims { + found = false + for _, remd = range delims { + if remd == oldd { + found = true + break + } + } + if !found { + newDelims = append(newDelims, oldd) + } + } + bp.delims = newDelims +} + +// Reset the Parser by setting all internal state to new content and +// delimiters. +func (bp *Parser) Reset(content, delims []byte) { + bp.content = content + bp.delims = delims + bp.x = 0 + bp.size = len(content) +} + +// SetDelimiters replace the current delimiters with delims. +func (bp *Parser) SetDelimiters(delims []byte) { + bp.delims = delims +} + +// Skip skip parsing token until one of the delimiters found or +// end-of-content. +func (bp *Parser) Skip() (c byte) { + var d byte + for bp.x < bp.size { + c = bp.content[bp.x] + for _, d = range bp.delims { + if c == d { + bp.x++ + return c + } + } + bp.x++ + } + return 0 +} + +// SkipN skip exactly N characters ignoring delimiters. +// It will return the next character after N or 0 if it reach end-of-content. +func (bp *Parser) SkipN(n int) (c byte) { + var count int + for bp.x < bp.size { + c = bp.content[bp.x] + if count >= n { + return c + } + count++ + bp.x++ + } + return 0 +} + +// SkipHorizontalSpaces skip space (" "), tab ("\t"), carriage return +// ("\r"), and form feed ("\f") characters; and return the first non-space +// character or 0 if it reach end-of-content. +func (bp *Parser) SkipHorizontalSpaces() (c byte) { + for ; bp.x < bp.size; bp.x++ { + c = bp.content[bp.x] + if c == ' ' || c == '\t' || c == '\r' || c == '\f' { + continue + } + return c + } + return 0 +} + // SkipLine skip all characters until new line. // It will return 0 if EOF. func (bp *Parser) SkipLine() (c byte) { @@ -57,3 +215,38 @@ func (bp *Parser) SkipLine() (c byte) { } return 0 } + +// SkipSpaces skip all spaces character (' ', '\f', '\n', '\r', '\t') and +// return the first non-space character or 0 if it reach end-of-content. +func (bp *Parser) SkipSpaces() (c byte) { + for ; bp.x < bp.size; bp.x++ { + c = bp.content[bp.x] + if ascii.IsSpace(c) { + continue + } + return c + } + return 0 +} + +// Stop the parser, return the remaining unparsed content and its last +// position, and then call Reset to reset the internal state back to zero. +func (bp *Parser) Stop() (remain []byte, pos int) { + remain = Copy(bp.content[bp.x:]) + pos = bp.x + bp.Reset(nil, nil) + return remain, pos +} + +// UnreadN unread N characters and return the character its pointed +// to. +// If N greater than current position index, it will reset the read pointer +// index back to zero. +func (bp *Parser) UnreadN(n int) byte { + if n > bp.x { + bp.x = 0 + } else { + bp.x -= n + } + return bp.content[bp.x] +} |
