aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorShulhan <ms@kilabit.info>2018-09-15 20:44:29 +0700
committerShulhan <ms@kilabit.info>2018-09-17 22:51:17 +0700
commit0f68f37ce159c96d56b4748eee6a9a88c6b7f801 (patch)
treeaad89a072ed73ca6669818c60596c0ab0c3f6333
parent05799acac0c977ea79c62df40de67bb3bef30db5 (diff)
downloadpakakeh.go-0f68f37ce159c96d56b4748eee6a9a88c6b7f801.tar.xz
Merge package "github.com/shuLhan/tekstus", part 1/1
-rw-r--r--lib/bytes/bytes.go275
-rw-r--r--lib/bytes/bytes_example_test.go106
-rw-r--r--lib/bytes/bytes_test.go228
3 files changed, 609 insertions, 0 deletions
diff --git a/lib/bytes/bytes.go b/lib/bytes/bytes.go
index d8f837cc..7210b746 100644
--- a/lib/bytes/bytes.go
+++ b/lib/bytes/bytes.go
@@ -64,6 +64,180 @@ func AppendUint32(data *[]byte, v uint32) {
}
//
+// CutUntilToken cut line until we found token.
+//
+// If token found, it will return all cutted bytes before token, positition of
+// byte after token, and boolean true.
+//
+// If no token found, it will return false.
+//
+// If `checkEsc` is true, token that is prefixed with escaped character
+// '\' will be skipped.
+//
+//
+func CutUntilToken(line, token []byte, startAt int, checkEsc bool) ([]byte, int, bool) {
+ var (
+ v []byte
+ p int
+ found, escaped bool
+ )
+
+ linelen := len(line)
+ tokenlen := len(token)
+ if tokenlen == 0 {
+ return line, -1, false
+ }
+ if startAt < 0 {
+ startAt = 0
+ }
+
+ for p = startAt; p < linelen; p++ {
+ // Check if the escape character is used to escaped the
+ // token ...
+ if checkEsc && line[p] == '\\' {
+ if escaped {
+ // escaped already, its mean double '\\'
+ v = append(v, '\\')
+ escaped = false
+ } else {
+ escaped = true
+ }
+ continue
+ }
+ if line[p] != token[0] {
+ if escaped {
+ // ... turn out its not escaping token.
+ v = append(v, '\\')
+ escaped = false
+ }
+ v = append(v, line[p])
+ continue
+ }
+
+ // We found the first token character.
+ // Lets check if its match with all content of token.
+ found = IsTokenAt(line, token, p)
+
+ // False alarm ...
+ if !found {
+ if escaped {
+ // ... turn out its not escaping token.
+ v = append(v, '\\')
+ escaped = false
+ }
+ v = append(v, line[p])
+ continue
+ }
+
+ // Found it, but if its prefixed with escaped char, then
+ // we assumed it as non breaking token.
+ if escaped {
+ v = append(v, token...)
+ p = p + tokenlen - 1
+ escaped = false
+ continue
+ }
+
+ // We found the token match in `line` at `p`
+ return v, p + tokenlen, true
+ }
+
+ // We did not found it...
+ return v, p, false
+}
+
+//
+// EncloseRemove given a line, remove all bytes inside it, starting from
+// `leftcap` until the `rightcap` and return cutted line and status to true.
+//
+// If no `leftcap` or `rightcap` is found, it will return line as is, and
+// status will be false.
+//
+func EncloseRemove(line, leftcap, rightcap []byte) (
+ newline []byte,
+ status bool,
+) {
+ lidx := TokenFind(line, leftcap, 0)
+ ridx := TokenFind(line, rightcap, lidx+1)
+
+ if lidx < 0 || ridx < 0 || lidx >= ridx {
+ return line, false
+ }
+
+ newline = append(newline, line[:lidx]...)
+ newline = append(newline, line[ridx+len(rightcap):]...)
+ status = true
+
+ // Repeat
+ newline, _ = EncloseRemove(newline, leftcap, rightcap)
+
+ return
+}
+
+//
+// EncloseToken will find `token` in `line` and enclose it with bytes from
+// `leftcap` and `rightcap`.
+// If at least one token found, it will return modified line with true status.
+// If no token is found, it will return the same line with false status.
+//
+func EncloseToken(line, token, leftcap, rightcap []byte) (
+ newline []byte,
+ status bool,
+) {
+ enclosedLen := len(token)
+
+ startat := 0
+ for {
+ foundat := TokenFind(line, token, startat)
+
+ if foundat < 0 {
+ newline = append(newline, line[startat:]...)
+ break
+ }
+
+ newline = append(newline, line[startat:foundat]...)
+ newline = append(newline, leftcap...)
+ newline = append(newline, token...)
+ newline = append(newline, rightcap...)
+
+ startat = foundat + enclosedLen
+ }
+ if startat > 0 {
+ status = true
+ }
+
+ return
+}
+
+//
+// IsTokenAt return true if `line` at index `p` match with `token`,
+// otherwise it will return false.
+// Empty token always return false.
+//
+func IsTokenAt(line, token []byte, p int) bool {
+ linelen := len(line)
+ tokenlen := len(token)
+ if tokenlen == 0 {
+ return false
+ }
+ if p < 0 {
+ p = 0
+ }
+
+ if p+tokenlen > linelen {
+ return false
+ }
+
+ for x := 0; x < tokenlen; x++ {
+ if line[p] != token[x] {
+ return false
+ }
+ p++
+ }
+ return true
+}
+
+//
// PrintHex will print each byte in slice as hexadecimal value into N column
// length.
//
@@ -112,6 +286,64 @@ func ReadUint32(data []byte, x uint) uint32 {
}
//
+// SkipAfterToken skip all bytes until matched token is found and return the
+// index after the token and boolean true.
+//
+// If `checkEsc` is true, token that is prefixed with escaped character
+// '\' will be considered as non-match token.
+//
+// If no token found it will return -1 and boolean false.
+//
+func SkipAfterToken(line, token []byte, startAt int, checkEsc bool) (int, bool) {
+ linelen := len(line)
+ escaped := false
+ if startAt < 0 {
+ startAt = 0
+ }
+
+ p := startAt
+ for ; p < linelen; p++ {
+ // Check if the escape character is used to escaped the
+ // token.
+ if checkEsc && line[p] == '\\' {
+ escaped = true
+ continue
+ }
+ if line[p] != token[0] {
+ if escaped {
+ escaped = false
+ }
+ continue
+ }
+
+ // We found the first token character.
+ // Lets check if its match with all content of token.
+ found := IsTokenAt(line, token, p)
+
+ // False alarm ...
+ if !found {
+ if escaped {
+ escaped = false
+ }
+ continue
+ }
+
+ // Its matched, but if its prefixed with escaped char, then
+ // we assumed it as non breaking token.
+ if checkEsc && escaped {
+ escaped = false
+ continue
+ }
+
+ // We found the token at `p`
+ p = p + len(token)
+ return p, true
+ }
+
+ return p, false
+}
+
+//
// ToLower convert slice of bytes to lower cases, in places.
//
func ToLower(data *[]byte) {
@@ -136,6 +368,49 @@ func ToUpper(data *[]byte) {
}
//
+// TokenFind return the first index of matched token in line, start at custom
+// index.
+// If "startat" parameter is less than 0, then it will be set to 0.
+// If token is empty or no token found it will return -1.
+//
+func TokenFind(line, token []byte, startat int) (at int) {
+ linelen := len(line)
+ tokenlen := len(token)
+ if tokenlen == 0 {
+ return -1
+ }
+ if startat < 0 {
+ startat = 0
+ }
+
+ y := 0
+ at = -1
+ for x := startat; x < linelen; x++ {
+ if line[x] == token[y] {
+ if y == 0 {
+ at = x
+ }
+ y++
+ if y == tokenlen {
+ // we found it!
+ return
+ }
+ } else {
+ if at != -1 {
+ // reset back
+ y = 0
+ at = -1
+ }
+ }
+ }
+ // x run out before y
+ if y < tokenlen {
+ at = -1
+ }
+ return
+}
+
+//
// WriteUint16 into slice of byte.
//
func WriteUint16(data *[]byte, x uint, v uint16) {
diff --git a/lib/bytes/bytes_example_test.go b/lib/bytes/bytes_example_test.go
new file mode 100644
index 00000000..141d7f17
--- /dev/null
+++ b/lib/bytes/bytes_example_test.go
@@ -0,0 +1,106 @@
+// Copyright 2018, Shulhan <ms@kilabit.info>. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package bytes
+
+import (
+ "fmt"
+)
+
+func ExampleCutUntilToken() {
+ line := []byte(`abc \def ghi`)
+
+ cut, p, found := CutUntilToken(line, []byte("def"), 0, false)
+ fmt.Printf("'%s' %d %t\n", cut, p, found)
+
+ cut, p, found = CutUntilToken(line, []byte("def"), 0, true)
+ fmt.Printf("'%s' %d %t\n", cut, p, found)
+
+ cut, p, found = CutUntilToken(line, []byte("ef"), 0, true)
+ fmt.Printf("'%s' %d %t\n", cut, p, found)
+
+ cut, p, found = CutUntilToken(line, []byte("hi"), 0, true)
+ fmt.Printf("'%s' %d %t\n", cut, p, found)
+
+ // Output:
+ // 'abc \' 8 true
+ // 'abc def ghi' 12 false
+ // 'abc \d' 8 true
+ // 'abc \def g' 12 true
+}
+
+func ExampleEncloseRemove() {
+ line := []byte(`[[ ABC ]] DEF`)
+ leftcap := []byte(`[[`)
+ rightcap := []byte(`]]`)
+
+ got, changed := EncloseRemove(line, leftcap, rightcap)
+
+ fmt.Printf("'%s' %t\n", got, changed)
+ // Output: ' DEF' true
+}
+
+func ExampleEncloseToken() {
+ line := []byte(`// Copyright 2016-2018 "Shulhan <ms@kilabit.info>". All rights reserved.`)
+ token := []byte(`"`)
+ leftcap := []byte(`\`)
+ rightcap := []byte(`_`)
+
+ got, changed := EncloseToken(line, token, leftcap, rightcap)
+
+ fmt.Printf("'%s' %t\n", got, changed)
+ // Output:
+ // '// Copyright 2016-2018 \"_Shulhan <ms@kilabit.info>\"_. All rights reserved.' true
+}
+
+func ExampleIsTokenAt() {
+ line := []byte("Hello, world")
+ token := []byte("world")
+ token2 := []byte("worlds")
+ tokenEmpty := []byte{}
+
+ fmt.Printf("%t\n", IsTokenAt(line, tokenEmpty, 6))
+ fmt.Printf("%t\n", IsTokenAt(line, token, 6))
+ fmt.Printf("%t\n", IsTokenAt(line, token, 7))
+ fmt.Printf("%t\n", IsTokenAt(line, token, 8))
+ fmt.Printf("%t\n", IsTokenAt(line, token2, 8))
+ // Output:
+ // false
+ // false
+ // true
+ // false
+ // false
+}
+
+func ExampleSkipAfterToken() {
+ line := []byte(`abc \def ghi`)
+
+ p, found := SkipAfterToken(line, []byte("def"), 0, false)
+ fmt.Printf("%d %t\n", p, found)
+
+ p, found = SkipAfterToken(line, []byte("def"), 0, true)
+ fmt.Printf("%d %t\n", p, found)
+
+ p, found = SkipAfterToken(line, []byte("ef"), 0, true)
+ fmt.Printf("%d %t\n", p, found)
+
+ p, found = SkipAfterToken(line, []byte("hi"), 0, true)
+ fmt.Printf("%d %t\n", p, found)
+
+ // Output:
+ // 8 true
+ // 12 false
+ // 8 true
+ // 12 true
+}
+
+func ExampleTokenFind() {
+ line := []byte("// Copyright 2018, Shulhan <ms@kilabit.info>. All rights reserved.")
+ token := []byte("right")
+
+ at := TokenFind(line, token, 0)
+
+ fmt.Printf("%d\n", at)
+ // Output: 7
+}
diff --git a/lib/bytes/bytes_test.go b/lib/bytes/bytes_test.go
index 50669c4f..11f741d0 100644
--- a/lib/bytes/bytes_test.go
+++ b/lib/bytes/bytes_test.go
@@ -7,6 +7,197 @@ import (
"github.com/shuLhan/share/lib/test"
)
+func TestCutUntilToken(t *testing.T) {
+ line := []byte(`abc \def ghi`)
+
+ cases := []struct {
+ token []byte
+ startAt int
+ checkEsc bool
+ exp string
+ expIdx int
+ expFound bool
+ }{{
+ exp: `abc \def ghi`,
+ expIdx: -1,
+ expFound: false,
+ }, {
+ token: []byte(`def`),
+ exp: `abc \`,
+ expIdx: 8,
+ expFound: true,
+ }, {
+ token: []byte(`def`),
+ checkEsc: true,
+ exp: `abc def ghi`,
+ expIdx: 12,
+ expFound: false,
+ }, {
+ token: []byte(`ef`),
+ checkEsc: true,
+ exp: `abc \d`,
+ expIdx: 8,
+ expFound: true,
+ }}
+
+ for x, c := range cases {
+ t.Logf("#%d\n", x)
+
+ got, idx, found := CutUntilToken(line, c.token, c.startAt, c.checkEsc)
+
+ test.Assert(t, "cut", c.exp, string(got), true)
+ test.Assert(t, "idx", c.expIdx, idx, true)
+ test.Assert(t, "found", c.expFound, found, true)
+ }
+}
+
+func TestEncloseRemove(t *testing.T) {
+ line := []byte(`// Copyright 2016-2018 "Shulhan <ms@kilabit.info>". All rights reserved.`)
+
+ cases := []struct {
+ line []byte
+ leftcap []byte
+ rightcap []byte
+ exp string
+ }{{
+ line: line,
+ leftcap: []byte("<"),
+ rightcap: []byte(">"),
+ exp: `// Copyright 2016-2018 "Shulhan ". All rights reserved.`,
+ }, {
+ line: line,
+ leftcap: []byte(`"`),
+ rightcap: []byte(`"`),
+ exp: `// Copyright 2016-2018 . All rights reserved.`,
+ }, {
+ line: line,
+ leftcap: []byte(`/`),
+ rightcap: []byte(`/`),
+ exp: ` Copyright 2016-2018 "Shulhan <ms@kilabit.info>". All rights reserved.`,
+ }, {
+ line: []byte(`/* TEST */`),
+ leftcap: []byte(`/*`),
+ rightcap: []byte(`*/`),
+ exp: "",
+ }}
+
+ for _, c := range cases {
+ got, _ := EncloseRemove(c.line, c.leftcap, c.rightcap)
+
+ test.Assert(t, "", c.exp, string(got), true)
+ }
+}
+
+func TestEncloseToken(t *testing.T) {
+ line := []byte(`// Copyright 2016-2018 "Shulhan <ms@kilabit.info>". All rights reserved.`)
+
+ cases := []struct {
+ token, leftcap, rightcap []byte
+ exp string
+ changed bool
+ }{{
+ token: []byte(`_`),
+ leftcap: []byte(`-`),
+ rightcap: []byte(`-`),
+ exp: `// Copyright 2016-2018 "Shulhan <ms@kilabit.info>". All rights reserved.`,
+ changed: false,
+ }, {
+ token: []byte(`/`),
+ leftcap: []byte(`\`),
+ rightcap: []byte{},
+ exp: `\/\/ Copyright 2016-2018 "Shulhan <ms@kilabit.info>". All rights reserved.`,
+ changed: true,
+ }, {
+ token: []byte(`<`),
+ leftcap: []byte(`<`),
+ rightcap: []byte(` `),
+ exp: `// Copyright 2016-2018 "Shulhan << ms@kilabit.info>". All rights reserved.`,
+ changed: true,
+ }, {
+ token: []byte(`"`),
+ leftcap: []byte(`\`),
+ rightcap: []byte(` `),
+ exp: `// Copyright 2016-2018 \" Shulhan <ms@kilabit.info>\" . All rights reserved.`,
+ changed: true,
+ }}
+
+ for _, c := range cases {
+ got, changed := EncloseToken(line, c.token, c.leftcap, c.rightcap)
+
+ test.Assert(t, "newline", c.exp, string(got), true)
+ test.Assert(t, "changed", c.changed, changed, true)
+ }
+}
+
+func TestIsTokenAt(t *testing.T) {
+ line := []byte("Hello, world")
+
+ cases := []struct {
+ token []byte
+ p int
+ exp bool
+ }{{
+ // empty
+ }, {
+ token: []byte("world"),
+ p: -1,
+ }, {
+ token: []byte("world"),
+ p: 6,
+ }, {
+ token: []byte("world"),
+ p: 7,
+ exp: true,
+ }, {
+ token: []byte("world"),
+ p: 8,
+ }, {
+ token: []byte("worlds"),
+ p: 7,
+ }}
+
+ for _, c := range cases {
+ got := IsTokenAt(line, c.token, c.p)
+ test.Assert(t, "IsTokenAt", c.exp, got, true)
+ }
+}
+
+func TestSkipAfterToken(t *testing.T) {
+ line := []byte(`abc \def ghi`)
+
+ cases := []struct {
+ token []byte
+ startAt int
+ checkEsc bool
+ exp int
+ expFound bool
+ }{{
+ token: []byte(`def`),
+ exp: 8,
+ expFound: true,
+ }, {
+ token: []byte(`def`),
+ checkEsc: true,
+ exp: 12,
+ }, {
+ token: []byte(`ef`),
+ checkEsc: true,
+ exp: 8,
+ expFound: true,
+ }, {
+ token: []byte(`hi`),
+ exp: len(line),
+ expFound: true,
+ }}
+
+ for x, c := range cases {
+ t.Logf("#%d\n", x)
+ got, found := SkipAfterToken(line, c.token, c.startAt, c.checkEsc)
+ test.Assert(t, "Index", c.exp, got, true)
+ test.Assert(t, "Found", c.expFound, found, true)
+ }
+}
+
func TestToLower(t *testing.T) {
cases := []struct {
in []byte
@@ -34,6 +225,43 @@ func TestToLower(t *testing.T) {
}
}
+func testTokenFind(t *testing.T, line, token []byte, startat int, exp []int) {
+ got := []int{}
+ tokenlen := len(token)
+
+ for {
+ foundat := TokenFind(line, token, startat)
+
+ if foundat < 0 {
+ break
+ }
+
+ got = append(got, foundat)
+ startat = foundat + tokenlen
+ }
+
+ test.Assert(t, "TokenFind", exp, got, true)
+}
+
+func TestTokenFind(t *testing.T) {
+ line := []byte("// Copyright 2016-2018 Shulhan <ms@kilabit.info>. All rights reserved.")
+
+ token := []byte("//")
+ exp := []int{0}
+
+ testTokenFind(t, line, token, 0, exp)
+
+ token = []byte(".")
+ exp = []int{42, 48, 69}
+
+ testTokenFind(t, line, token, 0, exp)
+
+ token = []byte("d.")
+ exp = []int{68}
+
+ testTokenFind(t, line, token, 0, exp)
+}
+
var randomInput256 = Random([]byte(HexaLetters), 256)
func BenchmarkToLowerStd(b *testing.B) {