Merge package "github.com/shuLhan/tekstus", part 1/1

author: Shulhan <ms@kilabit.info> 2018-09-15 20:44:29 +0700
committer: Shulhan <ms@kilabit.info> 2018-09-17 22:51:17 +0700
commit: 0f68f37ce159c96d56b4748eee6a9a88c6b7f801 (patch)
tree: aad89a072ed73ca6669818c60596c0ab0c3f6333
parent: 05799acac0c977ea79c62df40de67bb3bef30db5 (diff)
download: pakakeh.go-0f68f37ce159c96d56b4748eee6a9a88c6b7f801.tar.xz
3 files changed, 609 insertions, 0 deletions
diff --git a/lib/bytes/bytes.go b/lib/bytes/bytes.go
index d8f837cc..7210b746 100644
--- a/lib/bytes/bytes.go
+++ b/lib/bytes/bytes.go
@@ -64,6 +64,180 @@ func AppendUint32(data *[]byte, v uint32) {
 }
 
 //
+// CutUntilToken cut line until we found token.
+//
+// If token found, it will return all cutted bytes before token, positition of
+// byte after token, and boolean true.
+//
+// If no token found, it will return false.
+//
+// If `checkEsc` is true, token that is prefixed with escaped character
+// '\' will be skipped.
+//
+//
+func CutUntilToken(line, token []byte, startAt int, checkEsc bool) ([]byte, int, bool) {
+	var (
+		v              []byte
+		p              int
+		found, escaped bool
+	)
+
+	linelen := len(line)
+	tokenlen := len(token)
+	if tokenlen == 0 {
+		return line, -1, false
+	}
+	if startAt < 0 {
+		startAt = 0
+	}
+
+	for p = startAt; p < linelen; p++ {
+		// Check if the escape character is used to escaped the
+		// token ...
+		if checkEsc && line[p] == '\\' {
+			if escaped {
+				// escaped already, its mean double '\\'
+				v = append(v, '\\')
+				escaped = false
+			} else {
+				escaped = true
+			}
+			continue
+		}
+		if line[p] != token[0] {
+			if escaped {
+				// ... turn out its not escaping token.
+				v = append(v, '\\')
+				escaped = false
+			}
+			v = append(v, line[p])
+			continue
+		}
+
+		// We found the first token character.
+		// Lets check if its match with all content of token.
+		found = IsTokenAt(line, token, p)
+
+		// False alarm ...
+		if !found {
+			if escaped {
+				// ... turn out its not escaping token.
+				v = append(v, '\\')
+				escaped = false
+			}
+			v = append(v, line[p])
+			continue
+		}
+
+		// Found it, but if its prefixed with escaped char, then
+		// we assumed it as non breaking token.
+		if escaped {
+			v = append(v, token...)
+			p = p + tokenlen - 1
+			escaped = false
+			continue
+		}
+
+		// We found the token match in `line` at `p`
+		return v, p + tokenlen, true
+	}
+
+	// We did not found it...
+	return v, p, false
+}
+
+//
+// EncloseRemove given a line, remove all bytes inside it, starting from
+// `leftcap` until the `rightcap` and return cutted line and status to true.
+//
+// If no `leftcap` or `rightcap` is found, it will return line as is, and
+// status will be false.
+//
+func EncloseRemove(line, leftcap, rightcap []byte) (
+	newline []byte,
+	status bool,
+) {
+	lidx := TokenFind(line, leftcap, 0)
+	ridx := TokenFind(line, rightcap, lidx+1)
+
+	if lidx < 0 || ridx < 0 || lidx >= ridx {
+		return line, false
+	}
+
+	newline = append(newline, line[:lidx]...)
+	newline = append(newline, line[ridx+len(rightcap):]...)
+	status = true
+
+	// Repeat
+	newline, _ = EncloseRemove(newline, leftcap, rightcap)
+
+	return
+}
+
+//
+// EncloseToken will find `token` in `line` and enclose it with bytes from
+// `leftcap` and `rightcap`.
+// If at least one token found, it will return modified line with true status.
+// If no token is found, it will return the same line with false status.
+//
+func EncloseToken(line, token, leftcap, rightcap []byte) (
+	newline []byte,
+	status bool,
+) {
+	enclosedLen := len(token)
+
+	startat := 0
+	for {
+		foundat := TokenFind(line, token, startat)
+
+		if foundat < 0 {
+			newline = append(newline, line[startat:]...)
+			break
+		}
+
+		newline = append(newline, line[startat:foundat]...)
+		newline = append(newline, leftcap...)
+		newline = append(newline, token...)
+		newline = append(newline, rightcap...)
+
+		startat = foundat + enclosedLen
+	}
+	if startat > 0 {
+		status = true
+	}
+
+	return
+}
+
+//
+// IsTokenAt return true if `line` at index `p` match with `token`,
+// otherwise it will return false.
+// Empty token always return false.
+//
+func IsTokenAt(line, token []byte, p int) bool {
+	linelen := len(line)
+	tokenlen := len(token)
+	if tokenlen == 0 {
+		return false
+	}
+	if p < 0 {
+		p = 0
+	}
+
+	if p+tokenlen > linelen {
+		return false
+	}
+
+	for x := 0; x < tokenlen; x++ {
+		if line[p] != token[x] {
+			return false
+		}
+		p++
+	}
+	return true
+}
+
+//
 // PrintHex will print each byte in slice as hexadecimal value into N column
 // length.
 //
@@ -112,6 +286,64 @@ func ReadUint32(data []byte, x uint) uint32 {
 }
 
 //
+// SkipAfterToken skip all bytes until matched token is found and return the
+// index after the token and boolean true.
+//
+// If `checkEsc` is true, token that is prefixed with escaped character
+// '\' will be considered as non-match token.
+//
+// If no token found it will return -1 and boolean false.
+//
+func SkipAfterToken(line, token []byte, startAt int, checkEsc bool) (int, bool) {
+	linelen := len(line)
+	escaped := false
+	if startAt < 0 {
+		startAt = 0
+	}
+
+	p := startAt
+	for ; p < linelen; p++ {
+		// Check if the escape character is used to escaped the
+		// token.
+		if checkEsc && line[p] == '\\' {
+			escaped = true
+			continue
+		}
+		if line[p] != token[0] {
+			if escaped {
+				escaped = false
+			}
+			continue
+		}
+
+		// We found the first token character.
+		// Lets check if its match with all content of token.
+		found := IsTokenAt(line, token, p)
+
+		// False alarm ...
+		if !found {
+			if escaped {
+				escaped = false
+			}
+			continue
+		}
+
+		// Its matched, but if its prefixed with escaped char, then
+		// we assumed it as non breaking token.
+		if checkEsc && escaped {
+			escaped = false
+			continue
+		}
+
+		// We found the token at `p`
+		p = p + len(token)
+		return p, true
+	}
+
+	return p, false
+}
+
+//
 // ToLower convert slice of bytes to lower cases, in places.
 //
 func ToLower(data *[]byte) {
@@ -136,6 +368,49 @@ func ToUpper(data *[]byte) {
 }
 
 //
+// TokenFind return the first index of matched token in line, start at custom
+// index.
+// If "startat" parameter is less than 0, then it will be set to 0.
+// If token is empty or no token found it will return -1.
+//
+func TokenFind(line, token []byte, startat int) (at int) {
+	linelen := len(line)
+	tokenlen := len(token)
+	if tokenlen == 0 {
+		return -1
+	}
+	if startat < 0 {
+		startat = 0
+	}
+
+	y := 0
+	at = -1
+	for x := startat; x < linelen; x++ {
+		if line[x] == token[y] {
+			if y == 0 {
+				at = x
+			}
+			y++
+			if y == tokenlen {
+				// we found it!
+				return
+			}
+		} else {
+			if at != -1 {
+				// reset back
+				y = 0
+				at = -1
+			}
+		}
+	}
+	// x run out before y
+	if y < tokenlen {
+		at = -1
+	}
+	return
+}
+
+//
 // WriteUint16 into slice of byte.
 //
 func WriteUint16(data *[]byte, x uint, v uint16) {
diff --git a/lib/bytes/bytes_example_test.go b/lib/bytes/bytes_example_test.go
new file mode 100644
index 00000000..141d7f17
--- /dev/null
+++ b/lib/bytes/bytes_example_test.go
@@ -0,0 +1,106 @@
+// Copyright 2018, Shulhan <ms@kilabit.info>. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package bytes
+
+import (
+	"fmt"
+)
+
+func ExampleCutUntilToken() {
+	line := []byte(`abc \def ghi`)
+
+	cut, p, found := CutUntilToken(line, []byte("def"), 0, false)
+	fmt.Printf("'%s' %d %t\n", cut, p, found)
+
+	cut, p, found = CutUntilToken(line, []byte("def"), 0, true)
+	fmt.Printf("'%s' %d %t\n", cut, p, found)
+
+	cut, p, found = CutUntilToken(line, []byte("ef"), 0, true)
+	fmt.Printf("'%s' %d %t\n", cut, p, found)
+
+	cut, p, found = CutUntilToken(line, []byte("hi"), 0, true)
+	fmt.Printf("'%s' %d %t\n", cut, p, found)
+
+	// Output:
+	// 'abc \' 8 true
+	// 'abc def ghi' 12 false
+	// 'abc \d' 8 true
+	// 'abc \def g' 12 true
+}
+
+func ExampleEncloseRemove() {
+	line := []byte(`[[ ABC ]] DEF`)
+	leftcap := []byte(`[[`)
+	rightcap := []byte(`]]`)
+
+	got, changed := EncloseRemove(line, leftcap, rightcap)
+
+	fmt.Printf("'%s' %t\n", got, changed)
+	// Output: ' DEF' true
+}
+
+func ExampleEncloseToken() {
+	line := []byte(`// Copyright 2016-2018 "Shulhan <ms@kilabit.info>". All rights reserved.`)
+	token := []byte(`"`)
+	leftcap := []byte(`\`)
+	rightcap := []byte(`_`)
+
+	got, changed := EncloseToken(line, token, leftcap, rightcap)
+
+	fmt.Printf("'%s' %t\n", got, changed)
+	// Output:
+	// '// Copyright 2016-2018 \"_Shulhan <ms@kilabit.info>\"_. All rights reserved.' true
+}
+
+func ExampleIsTokenAt() {
+	line := []byte("Hello, world")
+	token := []byte("world")
+	token2 := []byte("worlds")
+	tokenEmpty := []byte{}
+
+	fmt.Printf("%t\n", IsTokenAt(line, tokenEmpty, 6))
+	fmt.Printf("%t\n", IsTokenAt(line, token, 6))
+	fmt.Printf("%t\n", IsTokenAt(line, token, 7))
+	fmt.Printf("%t\n", IsTokenAt(line, token, 8))
+	fmt.Printf("%t\n", IsTokenAt(line, token2, 8))
+	// Output:
+	// false
+	// false
+	// true
+	// false
+	// false
+}
+
+func ExampleSkipAfterToken() {
+	line := []byte(`abc \def ghi`)
+
+	p, found := SkipAfterToken(line, []byte("def"), 0, false)
+	fmt.Printf("%d %t\n", p, found)
+
+	p, found = SkipAfterToken(line, []byte("def"), 0, true)
+	fmt.Printf("%d %t\n", p, found)
+
+	p, found = SkipAfterToken(line, []byte("ef"), 0, true)
+	fmt.Printf("%d %t\n", p, found)
+
+	p, found = SkipAfterToken(line, []byte("hi"), 0, true)
+	fmt.Printf("%d %t\n", p, found)
+
+	// Output:
+	// 8 true
+	// 12 false
+	// 8 true
+	// 12 true
+}
+
+func ExampleTokenFind() {
+	line := []byte("// Copyright 2018, Shulhan <ms@kilabit.info>. All rights reserved.")
+	token := []byte("right")
+
+	at := TokenFind(line, token, 0)
+
+	fmt.Printf("%d\n", at)
+	// Output: 7
+}
diff --git a/lib/bytes/bytes_test.go b/lib/bytes/bytes_test.go
index 50669c4f..11f741d0 100644
--- a/lib/bytes/bytes_test.go
+++ b/lib/bytes/bytes_test.go
@@ -7,6 +7,197 @@ import (
 	"github.com/shuLhan/share/lib/test"
 )
 
+func TestCutUntilToken(t *testing.T) {
+	line := []byte(`abc \def ghi`)
+
+	cases := []struct {
+		token    []byte
+		startAt  int
+		checkEsc bool
+		exp      string
+		expIdx   int
+		expFound bool
+	}{{
+		exp:      `abc \def ghi`,
+		expIdx:   -1,
+		expFound: false,
+	}, {
+		token:    []byte(`def`),
+		exp:      `abc \`,
+		expIdx:   8,
+		expFound: true,
+	}, {
+		token:    []byte(`def`),
+		checkEsc: true,
+		exp:      `abc def ghi`,
+		expIdx:   12,
+		expFound: false,
+	}, {
+		token:    []byte(`ef`),
+		checkEsc: true,
+		exp:      `abc \d`,
+		expIdx:   8,
+		expFound: true,
+	}}
+
+	for x, c := range cases {
+		t.Logf("#%d\n", x)
+
+		got, idx, found := CutUntilToken(line, c.token, c.startAt, c.checkEsc)
+
+		test.Assert(t, "cut", c.exp, string(got), true)
+		test.Assert(t, "idx", c.expIdx, idx, true)
+		test.Assert(t, "found", c.expFound, found, true)
+	}
+}
+
+func TestEncloseRemove(t *testing.T) {
+	line := []byte(`// Copyright 2016-2018 "Shulhan <ms@kilabit.info>". All rights reserved.`)
+
+	cases := []struct {
+		line     []byte
+		leftcap  []byte
+		rightcap []byte
+		exp      string
+	}{{
+		line:     line,
+		leftcap:  []byte("<"),
+		rightcap: []byte(">"),
+		exp:      `// Copyright 2016-2018 "Shulhan ". All rights reserved.`,
+	}, {
+		line:     line,
+		leftcap:  []byte(`"`),
+		rightcap: []byte(`"`),
+		exp:      `// Copyright 2016-2018 . All rights reserved.`,
+	}, {
+		line:     line,
+		leftcap:  []byte(`/`),
+		rightcap: []byte(`/`),
+		exp:      ` Copyright 2016-2018 "Shulhan <ms@kilabit.info>". All rights reserved.`,
+	}, {
+		line:     []byte(`/* TEST */`),
+		leftcap:  []byte(`/*`),
+		rightcap: []byte(`*/`),
+		exp:      "",
+	}}
+
+	for _, c := range cases {
+		got, _ := EncloseRemove(c.line, c.leftcap, c.rightcap)
+
+		test.Assert(t, "", c.exp, string(got), true)
+	}
+}
+
+func TestEncloseToken(t *testing.T) {
+	line := []byte(`// Copyright 2016-2018 "Shulhan <ms@kilabit.info>". All rights reserved.`)
+
+	cases := []struct {
+		token, leftcap, rightcap []byte
+		exp                      string
+		changed                  bool
+	}{{
+		token:    []byte(`_`),
+		leftcap:  []byte(`-`),
+		rightcap: []byte(`-`),
+		exp:      `// Copyright 2016-2018 "Shulhan <ms@kilabit.info>". All rights reserved.`,
+		changed:  false,
+	}, {
+		token:    []byte(`/`),
+		leftcap:  []byte(`\`),
+		rightcap: []byte{},
+		exp:      `\/\/ Copyright 2016-2018 "Shulhan <ms@kilabit.info>". All rights reserved.`,
+		changed:  true,
+	}, {
+		token:    []byte(`<`),
+		leftcap:  []byte(`<`),
+		rightcap: []byte(` `),
+		exp:      `// Copyright 2016-2018 "Shulhan << ms@kilabit.info>". All rights reserved.`,
+		changed:  true,
+	}, {
+		token:    []byte(`"`),
+		leftcap:  []byte(`\`),
+		rightcap: []byte(` `),
+		exp:      `// Copyright 2016-2018 \" Shulhan <ms@kilabit.info>\" . All rights reserved.`,
+		changed:  true,
+	}}
+
+	for _, c := range cases {
+		got, changed := EncloseToken(line, c.token, c.leftcap, c.rightcap)
+
+		test.Assert(t, "newline", c.exp, string(got), true)
+		test.Assert(t, "changed", c.changed, changed, true)
+	}
+}
+
+func TestIsTokenAt(t *testing.T) {
+	line := []byte("Hello, world")
+
+	cases := []struct {
+		token []byte
+		p     int
+		exp   bool
+	}{{
+	// empty
+	}, {
+		token: []byte("world"),
+		p:     -1,
+	}, {
+		token: []byte("world"),
+		p:     6,
+	}, {
+		token: []byte("world"),
+		p:     7,
+		exp:   true,
+	}, {
+		token: []byte("world"),
+		p:     8,
+	}, {
+		token: []byte("worlds"),
+		p:     7,
+	}}
+
+	for _, c := range cases {
+		got := IsTokenAt(line, c.token, c.p)
+		test.Assert(t, "IsTokenAt", c.exp, got, true)
+	}
+}
+
+func TestSkipAfterToken(t *testing.T) {
+	line := []byte(`abc \def ghi`)
+
+	cases := []struct {
+		token    []byte
+		startAt  int
+		checkEsc bool
+		exp      int
+		expFound bool
+	}{{
+		token:    []byte(`def`),
+		exp:      8,
+		expFound: true,
+	}, {
+		token:    []byte(`def`),
+		checkEsc: true,
+		exp:      12,
+	}, {
+		token:    []byte(`ef`),
+		checkEsc: true,
+		exp:      8,
+		expFound: true,
+	}, {
+		token:    []byte(`hi`),
+		exp:      len(line),
+		expFound: true,
+	}}
+
+	for x, c := range cases {
+		t.Logf("#%d\n", x)
+		got, found := SkipAfterToken(line, c.token, c.startAt, c.checkEsc)
+		test.Assert(t, "Index", c.exp, got, true)
+		test.Assert(t, "Found", c.expFound, found, true)
+	}
+}
+
 func TestToLower(t *testing.T) {
 	cases := []struct {
 		in  []byte
@@ -34,6 +225,43 @@ func TestToLower(t *testing.T) {
 	}
 }
 
+func testTokenFind(t *testing.T, line, token []byte, startat int, exp []int) {
+	got := []int{}
+	tokenlen := len(token)
+
+	for {
+		foundat := TokenFind(line, token, startat)
+
+		if foundat < 0 {
+			break
+		}
+
+		got = append(got, foundat)
+		startat = foundat + tokenlen
+	}
+
+	test.Assert(t, "TokenFind", exp, got, true)
+}
+
+func TestTokenFind(t *testing.T) {
+	line := []byte("// Copyright 2016-2018 Shulhan <ms@kilabit.info>. All rights reserved.")
+
+	token := []byte("//")
+	exp := []int{0}
+
+	testTokenFind(t, line, token, 0, exp)
+
+	token = []byte(".")
+	exp = []int{42, 48, 69}
+
+	testTokenFind(t, line, token, 0, exp)
+
+	token = []byte("d.")
+	exp = []int{68}
+
+	testTokenFind(t, line, token, 0, exp)
+}
+
 var randomInput256 = Random([]byte(HexaLetters), 256)
 
 func BenchmarkToLowerStd(b *testing.B) {
author	Shulhan <ms@kilabit.info>	2018-09-15 20:44:29 +0700
committer	Shulhan <ms@kilabit.info>	2018-09-17 22:51:17 +0700
commit	0f68f37ce159c96d56b4748eee6a9a88c6b7f801 (patch)
tree	aad89a072ed73ca6669818c60596c0ab0c3f6333
parent	05799acac0c977ea79c62df40de67bb3bef30db5 (diff)
download	pakakeh.go-0f68f37ce159c96d56b4748eee6a9a88c6b7f801.tar.xz