diff options
| author | Shulhan <ms@kilabit.info> | 2018-09-15 20:44:29 +0700 |
|---|---|---|
| committer | Shulhan <ms@kilabit.info> | 2018-09-17 22:51:17 +0700 |
| commit | 0f68f37ce159c96d56b4748eee6a9a88c6b7f801 (patch) | |
| tree | aad89a072ed73ca6669818c60596c0ab0c3f6333 | |
| parent | 05799acac0c977ea79c62df40de67bb3bef30db5 (diff) | |
| download | pakakeh.go-0f68f37ce159c96d56b4748eee6a9a88c6b7f801.tar.xz | |
Merge package "github.com/shuLhan/tekstus", part 1/1
| -rw-r--r-- | lib/bytes/bytes.go | 275 | ||||
| -rw-r--r-- | lib/bytes/bytes_example_test.go | 106 | ||||
| -rw-r--r-- | lib/bytes/bytes_test.go | 228 |
3 files changed, 609 insertions, 0 deletions
diff --git a/lib/bytes/bytes.go b/lib/bytes/bytes.go index d8f837cc..7210b746 100644 --- a/lib/bytes/bytes.go +++ b/lib/bytes/bytes.go @@ -64,6 +64,180 @@ func AppendUint32(data *[]byte, v uint32) { } // +// CutUntilToken cut line until we found token. +// +// If token found, it will return all cutted bytes before token, positition of +// byte after token, and boolean true. +// +// If no token found, it will return false. +// +// If `checkEsc` is true, token that is prefixed with escaped character +// '\' will be skipped. +// +// +func CutUntilToken(line, token []byte, startAt int, checkEsc bool) ([]byte, int, bool) { + var ( + v []byte + p int + found, escaped bool + ) + + linelen := len(line) + tokenlen := len(token) + if tokenlen == 0 { + return line, -1, false + } + if startAt < 0 { + startAt = 0 + } + + for p = startAt; p < linelen; p++ { + // Check if the escape character is used to escaped the + // token ... + if checkEsc && line[p] == '\\' { + if escaped { + // escaped already, its mean double '\\' + v = append(v, '\\') + escaped = false + } else { + escaped = true + } + continue + } + if line[p] != token[0] { + if escaped { + // ... turn out its not escaping token. + v = append(v, '\\') + escaped = false + } + v = append(v, line[p]) + continue + } + + // We found the first token character. + // Lets check if its match with all content of token. + found = IsTokenAt(line, token, p) + + // False alarm ... + if !found { + if escaped { + // ... turn out its not escaping token. + v = append(v, '\\') + escaped = false + } + v = append(v, line[p]) + continue + } + + // Found it, but if its prefixed with escaped char, then + // we assumed it as non breaking token. + if escaped { + v = append(v, token...) + p = p + tokenlen - 1 + escaped = false + continue + } + + // We found the token match in `line` at `p` + return v, p + tokenlen, true + } + + // We did not found it... + return v, p, false +} + +// +// EncloseRemove given a line, remove all bytes inside it, starting from +// `leftcap` until the `rightcap` and return cutted line and status to true. +// +// If no `leftcap` or `rightcap` is found, it will return line as is, and +// status will be false. +// +func EncloseRemove(line, leftcap, rightcap []byte) ( + newline []byte, + status bool, +) { + lidx := TokenFind(line, leftcap, 0) + ridx := TokenFind(line, rightcap, lidx+1) + + if lidx < 0 || ridx < 0 || lidx >= ridx { + return line, false + } + + newline = append(newline, line[:lidx]...) + newline = append(newline, line[ridx+len(rightcap):]...) + status = true + + // Repeat + newline, _ = EncloseRemove(newline, leftcap, rightcap) + + return +} + +// +// EncloseToken will find `token` in `line` and enclose it with bytes from +// `leftcap` and `rightcap`. +// If at least one token found, it will return modified line with true status. +// If no token is found, it will return the same line with false status. +// +func EncloseToken(line, token, leftcap, rightcap []byte) ( + newline []byte, + status bool, +) { + enclosedLen := len(token) + + startat := 0 + for { + foundat := TokenFind(line, token, startat) + + if foundat < 0 { + newline = append(newline, line[startat:]...) + break + } + + newline = append(newline, line[startat:foundat]...) + newline = append(newline, leftcap...) + newline = append(newline, token...) + newline = append(newline, rightcap...) + + startat = foundat + enclosedLen + } + if startat > 0 { + status = true + } + + return +} + +// +// IsTokenAt return true if `line` at index `p` match with `token`, +// otherwise it will return false. +// Empty token always return false. +// +func IsTokenAt(line, token []byte, p int) bool { + linelen := len(line) + tokenlen := len(token) + if tokenlen == 0 { + return false + } + if p < 0 { + p = 0 + } + + if p+tokenlen > linelen { + return false + } + + for x := 0; x < tokenlen; x++ { + if line[p] != token[x] { + return false + } + p++ + } + return true +} + +// // PrintHex will print each byte in slice as hexadecimal value into N column // length. // @@ -112,6 +286,64 @@ func ReadUint32(data []byte, x uint) uint32 { } // +// SkipAfterToken skip all bytes until matched token is found and return the +// index after the token and boolean true. +// +// If `checkEsc` is true, token that is prefixed with escaped character +// '\' will be considered as non-match token. +// +// If no token found it will return -1 and boolean false. +// +func SkipAfterToken(line, token []byte, startAt int, checkEsc bool) (int, bool) { + linelen := len(line) + escaped := false + if startAt < 0 { + startAt = 0 + } + + p := startAt + for ; p < linelen; p++ { + // Check if the escape character is used to escaped the + // token. + if checkEsc && line[p] == '\\' { + escaped = true + continue + } + if line[p] != token[0] { + if escaped { + escaped = false + } + continue + } + + // We found the first token character. + // Lets check if its match with all content of token. + found := IsTokenAt(line, token, p) + + // False alarm ... + if !found { + if escaped { + escaped = false + } + continue + } + + // Its matched, but if its prefixed with escaped char, then + // we assumed it as non breaking token. + if checkEsc && escaped { + escaped = false + continue + } + + // We found the token at `p` + p = p + len(token) + return p, true + } + + return p, false +} + +// // ToLower convert slice of bytes to lower cases, in places. // func ToLower(data *[]byte) { @@ -136,6 +368,49 @@ func ToUpper(data *[]byte) { } // +// TokenFind return the first index of matched token in line, start at custom +// index. +// If "startat" parameter is less than 0, then it will be set to 0. +// If token is empty or no token found it will return -1. +// +func TokenFind(line, token []byte, startat int) (at int) { + linelen := len(line) + tokenlen := len(token) + if tokenlen == 0 { + return -1 + } + if startat < 0 { + startat = 0 + } + + y := 0 + at = -1 + for x := startat; x < linelen; x++ { + if line[x] == token[y] { + if y == 0 { + at = x + } + y++ + if y == tokenlen { + // we found it! + return + } + } else { + if at != -1 { + // reset back + y = 0 + at = -1 + } + } + } + // x run out before y + if y < tokenlen { + at = -1 + } + return +} + +// // WriteUint16 into slice of byte. // func WriteUint16(data *[]byte, x uint, v uint16) { diff --git a/lib/bytes/bytes_example_test.go b/lib/bytes/bytes_example_test.go new file mode 100644 index 00000000..141d7f17 --- /dev/null +++ b/lib/bytes/bytes_example_test.go @@ -0,0 +1,106 @@ +// Copyright 2018, Shulhan <ms@kilabit.info>. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package bytes + +import ( + "fmt" +) + +func ExampleCutUntilToken() { + line := []byte(`abc \def ghi`) + + cut, p, found := CutUntilToken(line, []byte("def"), 0, false) + fmt.Printf("'%s' %d %t\n", cut, p, found) + + cut, p, found = CutUntilToken(line, []byte("def"), 0, true) + fmt.Printf("'%s' %d %t\n", cut, p, found) + + cut, p, found = CutUntilToken(line, []byte("ef"), 0, true) + fmt.Printf("'%s' %d %t\n", cut, p, found) + + cut, p, found = CutUntilToken(line, []byte("hi"), 0, true) + fmt.Printf("'%s' %d %t\n", cut, p, found) + + // Output: + // 'abc \' 8 true + // 'abc def ghi' 12 false + // 'abc \d' 8 true + // 'abc \def g' 12 true +} + +func ExampleEncloseRemove() { + line := []byte(`[[ ABC ]] DEF`) + leftcap := []byte(`[[`) + rightcap := []byte(`]]`) + + got, changed := EncloseRemove(line, leftcap, rightcap) + + fmt.Printf("'%s' %t\n", got, changed) + // Output: ' DEF' true +} + +func ExampleEncloseToken() { + line := []byte(`// Copyright 2016-2018 "Shulhan <ms@kilabit.info>". All rights reserved.`) + token := []byte(`"`) + leftcap := []byte(`\`) + rightcap := []byte(`_`) + + got, changed := EncloseToken(line, token, leftcap, rightcap) + + fmt.Printf("'%s' %t\n", got, changed) + // Output: + // '// Copyright 2016-2018 \"_Shulhan <ms@kilabit.info>\"_. All rights reserved.' true +} + +func ExampleIsTokenAt() { + line := []byte("Hello, world") + token := []byte("world") + token2 := []byte("worlds") + tokenEmpty := []byte{} + + fmt.Printf("%t\n", IsTokenAt(line, tokenEmpty, 6)) + fmt.Printf("%t\n", IsTokenAt(line, token, 6)) + fmt.Printf("%t\n", IsTokenAt(line, token, 7)) + fmt.Printf("%t\n", IsTokenAt(line, token, 8)) + fmt.Printf("%t\n", IsTokenAt(line, token2, 8)) + // Output: + // false + // false + // true + // false + // false +} + +func ExampleSkipAfterToken() { + line := []byte(`abc \def ghi`) + + p, found := SkipAfterToken(line, []byte("def"), 0, false) + fmt.Printf("%d %t\n", p, found) + + p, found = SkipAfterToken(line, []byte("def"), 0, true) + fmt.Printf("%d %t\n", p, found) + + p, found = SkipAfterToken(line, []byte("ef"), 0, true) + fmt.Printf("%d %t\n", p, found) + + p, found = SkipAfterToken(line, []byte("hi"), 0, true) + fmt.Printf("%d %t\n", p, found) + + // Output: + // 8 true + // 12 false + // 8 true + // 12 true +} + +func ExampleTokenFind() { + line := []byte("// Copyright 2018, Shulhan <ms@kilabit.info>. All rights reserved.") + token := []byte("right") + + at := TokenFind(line, token, 0) + + fmt.Printf("%d\n", at) + // Output: 7 +} diff --git a/lib/bytes/bytes_test.go b/lib/bytes/bytes_test.go index 50669c4f..11f741d0 100644 --- a/lib/bytes/bytes_test.go +++ b/lib/bytes/bytes_test.go @@ -7,6 +7,197 @@ import ( "github.com/shuLhan/share/lib/test" ) +func TestCutUntilToken(t *testing.T) { + line := []byte(`abc \def ghi`) + + cases := []struct { + token []byte + startAt int + checkEsc bool + exp string + expIdx int + expFound bool + }{{ + exp: `abc \def ghi`, + expIdx: -1, + expFound: false, + }, { + token: []byte(`def`), + exp: `abc \`, + expIdx: 8, + expFound: true, + }, { + token: []byte(`def`), + checkEsc: true, + exp: `abc def ghi`, + expIdx: 12, + expFound: false, + }, { + token: []byte(`ef`), + checkEsc: true, + exp: `abc \d`, + expIdx: 8, + expFound: true, + }} + + for x, c := range cases { + t.Logf("#%d\n", x) + + got, idx, found := CutUntilToken(line, c.token, c.startAt, c.checkEsc) + + test.Assert(t, "cut", c.exp, string(got), true) + test.Assert(t, "idx", c.expIdx, idx, true) + test.Assert(t, "found", c.expFound, found, true) + } +} + +func TestEncloseRemove(t *testing.T) { + line := []byte(`// Copyright 2016-2018 "Shulhan <ms@kilabit.info>". All rights reserved.`) + + cases := []struct { + line []byte + leftcap []byte + rightcap []byte + exp string + }{{ + line: line, + leftcap: []byte("<"), + rightcap: []byte(">"), + exp: `// Copyright 2016-2018 "Shulhan ". All rights reserved.`, + }, { + line: line, + leftcap: []byte(`"`), + rightcap: []byte(`"`), + exp: `// Copyright 2016-2018 . All rights reserved.`, + }, { + line: line, + leftcap: []byte(`/`), + rightcap: []byte(`/`), + exp: ` Copyright 2016-2018 "Shulhan <ms@kilabit.info>". All rights reserved.`, + }, { + line: []byte(`/* TEST */`), + leftcap: []byte(`/*`), + rightcap: []byte(`*/`), + exp: "", + }} + + for _, c := range cases { + got, _ := EncloseRemove(c.line, c.leftcap, c.rightcap) + + test.Assert(t, "", c.exp, string(got), true) + } +} + +func TestEncloseToken(t *testing.T) { + line := []byte(`// Copyright 2016-2018 "Shulhan <ms@kilabit.info>". All rights reserved.`) + + cases := []struct { + token, leftcap, rightcap []byte + exp string + changed bool + }{{ + token: []byte(`_`), + leftcap: []byte(`-`), + rightcap: []byte(`-`), + exp: `// Copyright 2016-2018 "Shulhan <ms@kilabit.info>". All rights reserved.`, + changed: false, + }, { + token: []byte(`/`), + leftcap: []byte(`\`), + rightcap: []byte{}, + exp: `\/\/ Copyright 2016-2018 "Shulhan <ms@kilabit.info>". All rights reserved.`, + changed: true, + }, { + token: []byte(`<`), + leftcap: []byte(`<`), + rightcap: []byte(` `), + exp: `// Copyright 2016-2018 "Shulhan << ms@kilabit.info>". All rights reserved.`, + changed: true, + }, { + token: []byte(`"`), + leftcap: []byte(`\`), + rightcap: []byte(` `), + exp: `// Copyright 2016-2018 \" Shulhan <ms@kilabit.info>\" . All rights reserved.`, + changed: true, + }} + + for _, c := range cases { + got, changed := EncloseToken(line, c.token, c.leftcap, c.rightcap) + + test.Assert(t, "newline", c.exp, string(got), true) + test.Assert(t, "changed", c.changed, changed, true) + } +} + +func TestIsTokenAt(t *testing.T) { + line := []byte("Hello, world") + + cases := []struct { + token []byte + p int + exp bool + }{{ + // empty + }, { + token: []byte("world"), + p: -1, + }, { + token: []byte("world"), + p: 6, + }, { + token: []byte("world"), + p: 7, + exp: true, + }, { + token: []byte("world"), + p: 8, + }, { + token: []byte("worlds"), + p: 7, + }} + + for _, c := range cases { + got := IsTokenAt(line, c.token, c.p) + test.Assert(t, "IsTokenAt", c.exp, got, true) + } +} + +func TestSkipAfterToken(t *testing.T) { + line := []byte(`abc \def ghi`) + + cases := []struct { + token []byte + startAt int + checkEsc bool + exp int + expFound bool + }{{ + token: []byte(`def`), + exp: 8, + expFound: true, + }, { + token: []byte(`def`), + checkEsc: true, + exp: 12, + }, { + token: []byte(`ef`), + checkEsc: true, + exp: 8, + expFound: true, + }, { + token: []byte(`hi`), + exp: len(line), + expFound: true, + }} + + for x, c := range cases { + t.Logf("#%d\n", x) + got, found := SkipAfterToken(line, c.token, c.startAt, c.checkEsc) + test.Assert(t, "Index", c.exp, got, true) + test.Assert(t, "Found", c.expFound, found, true) + } +} + func TestToLower(t *testing.T) { cases := []struct { in []byte @@ -34,6 +225,43 @@ func TestToLower(t *testing.T) { } } +func testTokenFind(t *testing.T, line, token []byte, startat int, exp []int) { + got := []int{} + tokenlen := len(token) + + for { + foundat := TokenFind(line, token, startat) + + if foundat < 0 { + break + } + + got = append(got, foundat) + startat = foundat + tokenlen + } + + test.Assert(t, "TokenFind", exp, got, true) +} + +func TestTokenFind(t *testing.T) { + line := []byte("// Copyright 2016-2018 Shulhan <ms@kilabit.info>. All rights reserved.") + + token := []byte("//") + exp := []int{0} + + testTokenFind(t, line, token, 0, exp) + + token = []byte(".") + exp = []int{42, 48, 69} + + testTokenFind(t, line, token, 0, exp) + + token = []byte("d.") + exp = []int{68} + + testTokenFind(t, line, token, 0, exp) +} + var randomInput256 = Random([]byte(HexaLetters), 256) func BenchmarkToLowerStd(b *testing.B) { |
