lib/bytes: refactoring and cleaning up the bytes package

The bytes package, and many other packages in this module, is written when I still learning and using the Go language for master thesis. Some of the code, like function signature, does not follow the Go idiom, at least not how the Go source code looks like. A breaking changes, * WriteUint16 and WriteUint32 accept slice only without pointer. There is no need to pass slice as pointer to function if we want to modify the content, as long as the backing storage is not changed. Bug fixes, * PrintHex: fix print layout on the last line * ReadHexByte: fix possible index out of range * SkipAfterToken return -1 and false if no token found, as promised in the comment, instead of the length of text. We move all unit test to example so we have test and example in the documentation at the same time. This changes make all test coverage 100%.
author: Shulhan <ms@kilabit.info> 2021-10-16 18:45:40 +0700
committer: Shulhan <ms@kilabit.info> 2021-10-17 01:16:27 +0700
commit: 8fc20636aa79b422864c0fb3e5c6e13d1eba3795 (patch)
tree: c3656651d0a55720641ce317adb5397ba09e51f9 /lib/bytes/bytes.go
parent: 2080593f8856863d8819062eede8ea59036b3299 (diff)
download: pakakeh.go-8fc20636aa79b422864c0fb3e5c6e13d1eba3795.tar.xz
1 files changed, 237 insertions, 220 deletions
diff --git a/lib/bytes/bytes.go b/lib/bytes/bytes.go
index f5a60f3c..ae1a5094 100644
--- a/lib/bytes/bytes.go
+++ b/lib/bytes/bytes.go
@@ -8,7 +8,6 @@ package bytes
 import (
 	"bytes"
 	"fmt"
-	"reflect"
 	"unicode"
 )
 
@@ -53,32 +52,20 @@ func AppendUint32(data []byte, v uint32) []byte {
 }
 
 //
-// Concat merge one or more slice of byte or string in arguments into slice of
+// Concat merge one or more []byte or string in args into slice of
 // byte.
-// Any type that is not []byte or string in arguments will be ignored.
+// Any type that is not []byte or string in args will be ignored.
 //
-func Concat(sb []byte, args ...interface{}) (out []byte) {
-	if len(sb) > 0 {
-		out = append(out, sb...)
-	}
+func Concat(args ...interface{}) (out []byte) {
 	for _, arg := range args {
-		v := reflect.ValueOf(arg)
-		if v.Kind() == reflect.String {
-			out = append(out, arg.(string)...)
-		}
-		if v.Kind() != reflect.Slice {
-			continue
-		}
-		if v.Len() == 0 {
-			continue
+		switch v := arg.(type) {
+		case string:
+			out = append(out, []byte(v)...)
+		case []byte:
+			out = append(out, v...)
 		}
-		if v.Index(0).Kind() != reflect.Uint8 {
-			continue
-		}
-		b := v.Bytes()
-		out = append(out, b...)
 	}
-	return
+	return out
 }
 
 //
@@ -94,153 +81,199 @@ func Copy(src []byte) (dst []byte) {
 }
 
 //
-// CutUntilToken cut line until we found token.
+// CutUntilToken cut text until we found token.
 //
-// If token found, it will return all cutted bytes before token, positition of
-// byte after token, and boolean true.
+// If token found, it will return all bytes before token, position of byte
+// after token, and true.
 //
 // If no token found, it will return false.
 //
-// If `checkEsc` is true, token that is prefixed with escaped character
-// '\' will be skipped.
-//
+// If checkEsc is true, token that is prefixed with escaped character ('\')
+// will be skipped, the escape character will be removed.
 //
-func CutUntilToken(line, token []byte, startAt int, checkEsc bool) ([]byte, int, bool) {
-	var (
-		v              []byte
-		p              int
-		found, escaped bool
-	)
+func CutUntilToken(text, token []byte, startAt int, checkEsc bool) (cut []byte, pos int, found bool) {
+	var isEsc bool
 
-	linelen := len(line)
+	textlen := len(text)
 	tokenlen := len(token)
 	if tokenlen == 0 {
-		return line, -1, false
+		return text, -1, false
 	}
 	if startAt < 0 {
 		startAt = 0
 	}
 
-	for p = startAt; p < linelen; p++ {
+	for pos = startAt; pos < textlen; pos++ {
 		// Check if the escape character is used to escaped the
 		// token ...
-		if checkEsc && line[p] == '\\' {
-			if escaped {
+		if checkEsc && text[pos] == '\\' {
+			if isEsc {
 				// escaped already, its mean double '\\'
-				v = append(v, '\\')
-				escaped = false
+				cut = append(cut, '\\')
+				isEsc = false
 			} else {
-				escaped = true
+				isEsc = true
 			}
 			continue
 		}
-		if line[p] != token[0] {
-			if escaped {
+		if text[pos] != token[0] {
+			if isEsc {
 				// ... turn out its not escaping token.
-				v = append(v, '\\')
-				escaped = false
+				cut = append(cut, '\\')
+				isEsc = false
 			}
-			v = append(v, line[p])
+			cut = append(cut, text[pos])
 			continue
 		}
 
 		// We found the first token character.
 		// Lets check if its match with all content of token.
-		found = IsTokenAt(line, token, p)
-
-		// False alarm ...
+		found = IsTokenAt(text, token, pos)
 		if !found {
-			if escaped {
+			if isEsc {
 				// ... turn out its not escaping token.
-				v = append(v, '\\')
-				escaped = false
+				cut = append(cut, '\\')
+				isEsc = false
 			}
-			v = append(v, line[p])
+			cut = append(cut, text[pos])
 			continue
 		}
 
 		// Found it, but if its prefixed with escaped char, then
 		// we assumed it as non breaking token.
-		if escaped {
-			v = append(v, token...)
-			p = p + tokenlen - 1
-			escaped = false
+		if isEsc {
+			cut = append(cut, token...)
+			pos = pos + tokenlen - 1
+			isEsc = false
 			continue
 		}
 
-		// We found the token match in `line` at `p`
-		return v, p + tokenlen, true
+		// We found the token match in `text` at `p`
+		return cut, pos + tokenlen, true
 	}
 
 	// We did not found it...
-	return v, p, false
+	return cut, pos, false
 }
 
 //
-// EncloseRemove given a line, remove all bytes inside it, starting from
-// `leftcap` until the `rightcap` and return cutted line and status to true.
+// EncloseRemove given a text, find the leftToken and rightToken and cut
+// the content in between them and return it with status true.
+// Keep doing it until no more leftToken and rightToken found.
 //
-// If no `leftcap` or `rightcap` is found, it will return line as is, and
-// status will be false.
+// If no leftToken or rightToken is found, it will return text as is and
+// false.
 //
-func EncloseRemove(line, leftcap, rightcap []byte) ([]byte, bool) {
-	lidx := TokenFind(line, leftcap, 0)
-	ridx := TokenFind(line, rightcap, lidx+1)
-
-	if lidx < 0 || ridx < 0 || lidx >= ridx {
-		return line, false
+func EncloseRemove(text, leftToken, rightToken []byte) (cut []byte, found bool) {
+	lidx := TokenFind(text, leftToken, 0)
+	if lidx < 0 {
+		return text, false
+	}
+	ridx := TokenFind(text, rightToken, lidx+1)
+	if ridx < 0 {
+		return text, false
 	}
 
-	var newline []byte
-	newline = append(newline, line[:lidx]...)
-	newline = append(newline, line[ridx+len(rightcap):]...)
-	newline, _ = EncloseRemove(newline, leftcap, rightcap)
+	cut = make([]byte, 0, len(text[:lidx])+len(text[ridx:]))
+	cut = append(cut, text[:lidx]...)
+	cut = append(cut, text[ridx+len(rightToken):]...)
+	cut, _ = EncloseRemove(cut, leftToken, rightToken)
 
-	return newline, true
+	return cut, true
 }
 
 //
-// EncloseToken will find `token` in `line` and enclose it with bytes from
-// `leftcap` and `rightcap`.
-// If at least one token found, it will return modified line with true status.
-// If no token is found, it will return the same line with false status.
+// EncloseToken find "token" in "text" and enclose it with bytes from
+// "leftcap" and "rightcap".
+// If at least one token found, it will return modified text with true status.
+// If no token is found, it will return the same text with false status.
 //
-func EncloseToken(line, token, leftcap, rightcap []byte) (
-	newline []byte,
-	status bool,
+func EncloseToken(text, token, leftcap, rightcap []byte) (
+	newtext []byte,
+	found bool,
 ) {
 	enclosedLen := len(token)
 
 	startat := 0
 	for {
-		foundat := TokenFind(line, token, startat)
+		foundat := TokenFind(text, token, startat)
 
 		if foundat < 0 {
-			newline = append(newline, line[startat:]...)
+			newtext = append(newtext, text[startat:]...)
 			break
 		}
 
-		newline = append(newline, line[startat:foundat]...)
-		newline = append(newline, leftcap...)
-		newline = append(newline, token...)
-		newline = append(newline, rightcap...)
+		newtext = append(newtext, text[startat:foundat]...)
+		newtext = append(newtext, leftcap...)
+		newtext = append(newtext, token...)
+		newtext = append(newtext, rightcap...)
 
 		startat = foundat + enclosedLen
 	}
 	if startat > 0 {
-		status = true
+		found = true
 	}
 
-	return
+	return newtext, found
+}
+
+//
+// InReplace replace any characters in "text" that is not in "allowed" with
+// character "c".
+// The replacement occur inside the "text" backing storage, which means the
+// passed "text" will changes and returned.
+//
+func InReplace(text, allowed []byte, c byte) []byte {
+	if len(text) == 0 {
+		return nil
+	}
+
+	var found bool
+	for x := 0; x < len(text); x++ {
+		found = false
+		for y := 0; y < len(allowed); y++ {
+			if text[x] == allowed[y] {
+				found = true
+				break
+			}
+		}
+		if !found {
+			text[x] = c
+		}
+	}
+	return text
 }
 
 //
-// IsTokenAt return true if `line` at index `p` match with `token`,
+// Indexes returns the index of the all instance of "token" in "text", or nil
+// if no "token" found.
+//
+func Indexes(text, token []byte) (idxs []int) {
+	if len(text) == 0 || len(token) == 0 {
+		return nil
+	}
+
+	offset := 0
+	for {
+		idx := bytes.Index(text, token)
+		if idx == -1 {
+			break
+		}
+		idxs = append(idxs, offset+idx)
+		skip := idx + len(token)
+		offset += skip
+		text = text[skip:]
+	}
+	return idxs
+}
+
+//
+// IsTokenAt return true if `text` at index `p` match with `token`,
 // otherwise it will return false.
 // Empty token always return false.
 //
-func IsTokenAt(line, token []byte, p int) bool {
-	linelen := len(line)
+func IsTokenAt(text, token []byte, p int) bool {
+	textlen := len(text)
 	tokenlen := len(token)
 	if tokenlen == 0 {
 		return false
@@ -249,12 +282,12 @@ func IsTokenAt(line, token []byte, p int) bool {
 		p = 0
 	}
 
-	if p+tokenlen > linelen {
+	if p+tokenlen > textlen {
 		return false
 	}
 
 	for x := 0; x < tokenlen; x++ {
-		if line[p] != token[x] {
+		if text[p] != token[x] {
 			return false
 		}
 		p++
@@ -263,15 +296,39 @@ func IsTokenAt(line, token []byte, p int) bool {
 }
 
 //
+// MergeSpaces convert sequences of white spaces into single space ' '.
+//
+func MergeSpaces(in []byte) (out []byte) {
+	var isSpace bool
+	for _, c := range in {
+		if c == ' ' || c == '\t' || c == '\v' || c == '\f' || c == '\n' || c == '\r' {
+			isSpace = true
+			continue
+		}
+		if isSpace {
+			out = append(out, ' ')
+			isSpace = false
+		}
+		out = append(out, c)
+	}
+	if isSpace {
+		out = append(out, ' ')
+	}
+
+	return out
+}
+
+//
 // PrintHex will print each byte in slice as hexadecimal value into N column
 // length.
 //
 func PrintHex(title string, data []byte, col int) {
 	var (
 		start, x int
+		c        byte
 	)
 	fmt.Print(title)
-	for x = 0; x < len(data); x++ {
+	for x, c = range data {
 		if x%col == 0 {
 			if x > 0 {
 				fmt.Print(" ||")
@@ -287,16 +344,16 @@ func PrintHex(title string, data []byte, col int) {
 			start = x
 		}
 
-		fmt.Printf(" %02X", data[x])
+		fmt.Printf(" %02X", c)
 	}
-	rest := 16 - (x % col)
+	rest := col - (x % col)
 	if rest > 0 {
-		for y := 0; y < rest; y++ {
+		for y := 1; y < rest; y++ {
 			fmt.Print("   ")
 		}
 		fmt.Print(" ||")
 	}
-	for y := start; y < x; y++ {
+	for y := start; y <= x; y++ {
 		if data[y] >= 33 && data[y] <= 126 {
 			fmt.Printf(" %c", data[y])
 		} else {
@@ -308,15 +365,20 @@ func PrintHex(title string, data []byte, col int) {
 }
 
 //
-// ReadHexByte read two characters from data start from index "x" and convert
-// them to byte.
+// ReadHexByte read two hexadecimal characters from "data" start from index
+// "x" and convert them to byte.
+// It will return the byte and true if its read exactly two hexadecimal
+// characters, otherwise it will return 0 and false.
 //
 func ReadHexByte(data []byte, x int) (b byte, ok bool) {
+	if x < 0 {
+		return 0, false
+	}
 	if len(data) < x+2 {
 		return 0, false
 	}
-	var y uint = 4
-	for {
+	var y int = 4
+	for y >= 0 {
 		switch {
 		case data[x] >= '0' && data[x] <= '9':
 			b |= (data[x] - '0') << y
@@ -325,10 +387,7 @@ func ReadHexByte(data []byte, x int) (b byte, ok bool) {
 		case data[x] >= 'a' && data[x] <= 'f':
 			b |= (data[x] - ('a' - 10)) << y
 		default:
-			return b, false
-		}
-		if y == 0 {
-			break
+			return 0, false
 		}
 		y -= 4
 		x++
@@ -338,136 +397,86 @@ func ReadHexByte(data []byte, x int) (b byte, ok bool) {
 }
 
 //
-// ReadInt16 will convert two bytes from data start at `x` into int16 and
-// return it.
-//
-func ReadInt16(data []byte, x uint) int16 {
-	return int16(data[x])<<8 | int16(data[x+1])
-}
-
-//
-// ReadInt32 will convert four bytes from data start at `x` into int32 and
-// return it.
-//
-func ReadInt32(data []byte, x uint) int32 {
-	return int32(data[x])<<24 | int32(data[x+1])<<16 | int32(data[x+2])<<8 | int32(data[x+3])
-}
-
+// ReadInt16 read int16 value from "data" start at index "x".
+// It will return 0 if "x" is out of range.
 //
-// ReadUint16 will convert two bytes from data start at `x` into uint16 and
-// return it.
-//
-func ReadUint16(data []byte, x uint) uint16 {
-	return uint16(data[x])<<8 | uint16(data[x+1])
-}
-
-//
-// ReadUint32 will convert four bytes from data start at `x` into uint32 and
-// return it.
-//
-func ReadUint32(data []byte, x uint) uint32 {
-	return uint32(data[x])<<24 | uint32(data[x+1])<<16 | uint32(data[x+2])<<8 | uint32(data[x+3])
+func ReadInt16(data []byte, x uint) (v int16) {
+	if x+1 >= uint(len(data)) {
+		return 0
+	}
+	v = int16(data[x]) << 8
+	v |= int16(data[x+1])
+	return v
 }
 
 //
-// InReplace do a reverse replace on input, any characters that is not on
-// allowed, will be replaced with character c.
+// ReadInt32 read int32 value from "data" start at index "x".
+// It will return 0 if "x" is out of range.
 //
-func InReplace(in, allowed []byte, c byte) (out []byte) {
-	if len(in) == 0 {
-		return
-	}
-
-	out = make([]byte, len(in))
-	copy(out, in)
-	var found bool
-	for x := 0; x < len(in); x++ {
-		found = false
-		for y := 0; y < len(allowed); y++ {
-			if in[x] == allowed[y] {
-				found = true
-				break
-			}
-		}
-		if !found {
-			out[x] = c
-		}
+func ReadInt32(data []byte, x uint) (v int32) {
+	if x+3 >= uint(len(data)) {
+		return 0
 	}
-
-	return
+	v = int32(data[x]) << 24
+	v |= int32(data[x+1]) << 16
+	v |= int32(data[x+2]) << 8
+	v |= int32(data[x+3])
+	return v
 }
 
 //
-// Indexes returns the index of the all instance of token in s, or nil if
-// token is not present in s.
+// ReadUint16 read uint16 value from "data" start at index "x".
+// If x is out of range, it will return 0.
 //
-func Indexes(s []byte, token []byte) (idxs []int) {
-	if len(s) == 0 || len(token) == 0 {
-		return nil
-	}
-
-	offset := 0
-	for {
-		idx := bytes.Index(s, token)
-		if idx == -1 {
-			break
-		}
-		idxs = append(idxs, offset+idx)
-		skip := idx + len(token)
-		offset += skip
-		s = s[skip:]
+func ReadUint16(data []byte, x uint) (v uint16) {
+	if x+1 >= uint(len(data)) {
+		return 0
 	}
-	return idxs
+	v = uint16(data[x]) << 8
+	v |= uint16(data[x+1])
+	return v
 }
 
 //
-// MergeSpaces convert sequences of white spaces into single space ' '.
+// ReadUint32 read uint32 value from "data" start at index "x".
+// If x is out of range, it will return 0.
 //
-func MergeSpaces(in []byte) (out []byte) {
-	var isSpace bool
-	for _, c := range in {
-		if c == ' ' || c == '\t' || c == '\v' || c == '\f' || c == '\n' || c == '\r' {
-			isSpace = true
-			continue
-		}
-		if isSpace {
-			out = append(out, ' ')
-			isSpace = false
-		}
-		out = append(out, c)
+func ReadUint32(data []byte, x uint) (v uint32) {
+	if x+3 >= uint(len(data)) {
+		return 0
 	}
-	if isSpace {
-		out = append(out, ' ')
-	}
-
-	return out
+	v = uint32(data[x]) << 24
+	v |= uint32(data[x+1]) << 16
+	v |= uint32(data[x+2]) << 8
+	v |= uint32(data[x+3])
+	return v
 }
 
 //
-// SkipAfterToken skip all bytes until matched token is found and return the
+// SkipAfterToken skip all bytes until matched "token" is found and return the
 // index after the token and boolean true.
 //
-// If `checkEsc` is true, token that is prefixed with escaped character
+// If "checkEsc" is true, token that is prefixed with escaped character
 // '\' will be considered as non-match token.
 //
 // If no token found it will return -1 and boolean false.
 //
-func SkipAfterToken(line, token []byte, startAt int, checkEsc bool) (int, bool) {
-	linelen := len(line)
+func SkipAfterToken(text, token []byte, startAt int, checkEsc bool) (int, bool) {
+	textlen := len(text)
 	escaped := false
 	if startAt < 0 {
 		startAt = 0
 	}
 
 	p := startAt
-	for ; p < linelen; p++ {
+	for ; p < textlen; p++ {
 		// Check if the escape character is used to escaped the
 		// token.
-		if checkEsc && line[p] == '\\' {
+		if checkEsc && text[p] == '\\' {
 			escaped = true
 			continue
 		}
-		if line[p] != token[0] {
+		if text[p] != token[0] {
 			if escaped {
 				escaped = false
 			}
@@ -476,7 +485,7 @@ func SkipAfterToken(line, token []byte, startAt int, checkEsc bool) (int, bool)
 
 		// We found the first token character.
 		// Lets check if its match with all content of token.
-		found := IsTokenAt(line, token, p)
+		found := IsTokenAt(text, token, p)
 
 		// False alarm ...
 		if !found {
@@ -498,7 +507,7 @@ func SkipAfterToken(line, token []byte, startAt int, checkEsc bool) (int, bool)
 		return p, true
 	}
 
-	return p, false
+	return -1, false
 }
 
 //
@@ -525,13 +534,13 @@ func SnippetByIndexes(s []byte, indexes []int, sniplen int) (snippets [][]byte)
 }
 
 //
-// TokenFind return the first index of matched token in line, start at custom
+// TokenFind return the first index of matched token in text, start at custom
 // index.
 // If "startat" parameter is less than 0, then it will be set to 0.
 // If token is empty or no token found it will return -1.
 //
-func TokenFind(line, token []byte, startat int) (at int) {
-	linelen := len(line)
+func TokenFind(text, token []byte, startat int) (at int) {
+	textlen := len(text)
 	tokenlen := len(token)
 	if tokenlen == 0 {
 		return -1
@@ -542,8 +551,8 @@ func TokenFind(line, token []byte, startat int) (at int) {
 
 	y := 0
 	at = -1
-	for x := startat; x < linelen; x++ {
-		if line[x] == token[y] {
+	for x := startat; x < textlen; x++ {
+		if text[x] == token[y] {
 			if y == 0 {
 				at = x
 			}
@@ -598,19 +607,27 @@ func WordIndexes(s []byte, word []byte) (idxs []int) {
 }
 
 //
-// WriteUint16 into slice of byte.
+// WriteUint16 write uint16 value "v" into "data" start at position "x".
+// If x is out range, the data will not change.
 //
-func WriteUint16(data *[]byte, x uint, v uint16) {
-	(*data)[x] = byte(v >> 8)
-	(*data)[x+1] = byte(v)
+func WriteUint16(data []byte, x uint, v uint16) {
+	if x+1 >= uint(len(data)) {
+		return
+	}
+	data[x] = byte(v >> 8)
+	data[x+1] = byte(v)
 }
 
 //
-// WriteUint32 into slice of byte.
+// WriteUint32 write uint32 value into "data" start at position "x".
+// If x is out range, the data will not change.
 //
-func WriteUint32(data *[]byte, x uint, v uint32) {
-	(*data)[x] = byte(v >> 24)
-	(*data)[x+1] = byte(v >> 16)
-	(*data)[x+2] = byte(v >> 8)
-	(*data)[x+3] = byte(v)
+func WriteUint32(data []byte, x uint, v uint32) {
+	if x+3 >= uint(len(data)) {
+		return
+	}
+	data[x] = byte(v >> 24)
+	data[x+1] = byte(v >> 16)
+	data[x+2] = byte(v >> 8)
+	data[x+3] = byte(v)
 }
author	Shulhan <ms@kilabit.info>	2021-10-16 18:45:40 +0700
committer	Shulhan <ms@kilabit.info>	2021-10-17 01:16:27 +0700
commit	8fc20636aa79b422864c0fb3e5c6e13d1eba3795 (patch)
tree	c3656651d0a55720641ce317adb5397ba09e51f9 /lib/bytes/bytes.go
parent	2080593f8856863d8819062eede8ea59036b3299 (diff)
download	pakakeh.go-8fc20636aa79b422864c0fb3e5c6e13d1eba3795.tar.xz