diff options
| author | Shulhan <ms@kilabit.info> | 2018-09-17 00:26:46 +0700 |
|---|---|---|
| committer | Shulhan <ms@kilabit.info> | 2018-09-17 22:51:23 +0700 |
| commit | 66caeb368336e9f149d6f40772e7f0fdd070cf78 (patch) | |
| tree | d0295d2fb895ce9e046400fae90cd7d69081c16a | |
| parent | f911fdc362d2a98a9f4deb93c18231ae77df12a1 (diff) | |
| download | pakakeh.go-66caeb368336e9f149d6f40772e7f0fdd070cf78.tar.xz | |
Merge package "github.com/shuLhan/tekstus", part 3/3
| -rw-r--r-- | lib/strings/row.go | 59 | ||||
| -rw-r--r-- | lib/strings/row_example_test.go | 35 | ||||
| -rw-r--r-- | lib/strings/row_test.go | 66 | ||||
| -rw-r--r-- | lib/strings/statistic.go | 319 | ||||
| -rw-r--r-- | lib/strings/statistic_example_test.go | 113 | ||||
| -rw-r--r-- | lib/strings/statistic_test.go | 384 | ||||
| -rw-r--r-- | lib/strings/string_test.go | 2 | ||||
| -rw-r--r-- | lib/strings/strings.go | 251 | ||||
| -rw-r--r-- | lib/strings/strings_example_test.go | 147 | ||||
| -rw-r--r-- | lib/strings/strings_test.go | 251 | ||||
| -rw-r--r-- | lib/strings/table.go | 181 | ||||
| -rw-r--r-- | lib/strings/table_example_test.go | 68 | ||||
| -rw-r--r-- | lib/strings/table_test.go | 153 | ||||
| -rw-r--r-- | lib/strings/to.go | 68 | ||||
| -rw-r--r-- | lib/strings/to_example_test.go | 30 | ||||
| -rw-r--r-- | lib/strings/to_test.go | 43 |
16 files changed, 2170 insertions, 0 deletions
diff --git a/lib/strings/row.go b/lib/strings/row.go new file mode 100644 index 00000000..bc4c1456 --- /dev/null +++ b/lib/strings/row.go @@ -0,0 +1,59 @@ +// Copyright 2018, Shulhan <ms@kilabit.info>. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package strings + +import ( + "strings" +) + +// +// Row is simplified name for slice of slice of string. +// +type Row [][]string + +// +// IsEqual compare two row without regard to their order. +// +// Return true if both contain the same list, false otherwise. +// +func (row Row) IsEqual(b Row) bool { + rowlen := len(row) + + if rowlen != len(b) { + return false + } + + check := make([]bool, rowlen) + + for x, row := range row { + for _, rstrings := range b { + if IsEqual(row, rstrings) { + check[x] = true + break + } + } + } + + for _, v := range check { + if !v { + return false + } + } + return true +} + +// +// Join list of slice of string using `lsep` as separator between row items +// and `ssep` for element in each item. +// +func (row Row) Join(lsep string, ssep string) (s string) { + for x := 0; x < len(row); x++ { + if x > 0 { + s += lsep + } + s += strings.Join(row[x], ssep) + } + return +} diff --git a/lib/strings/row_example_test.go b/lib/strings/row_example_test.go new file mode 100644 index 00000000..45292e94 --- /dev/null +++ b/lib/strings/row_example_test.go @@ -0,0 +1,35 @@ +// Copyright 2018, Shulhan <ms@kilabit.info>. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package strings + +import ( + "fmt" +) + +func ExampleRow_IsEqual() { + row := Row{{"a"}, {"b", "c"}} + fmt.Println(row.IsEqual(Row{{"a"}, {"b", "c"}})) + fmt.Println(row.IsEqual(Row{{"a"}, {"c", "b"}})) + fmt.Println(row.IsEqual(Row{{"c", "b"}, {"a"}})) + fmt.Println(row.IsEqual(Row{{"b", "c"}, {"a"}})) + fmt.Println(row.IsEqual(Row{{"a"}, {"b"}})) + // Output: + // true + // true + // true + // true + // false +} + +func ExampleRow_Join() { + row := Row{{"a"}, {"b", "c"}} + fmt.Println(row.Join(";", ",")) + + row = Row{{"a"}, {}} + fmt.Println(row.Join(";", ",")) + // Output: + // a;b,c + // a; +} diff --git a/lib/strings/row_test.go b/lib/strings/row_test.go new file mode 100644 index 00000000..dc7c0519 --- /dev/null +++ b/lib/strings/row_test.go @@ -0,0 +1,66 @@ +// Copyright 2018, Shulhan <ms@kilabit.info>. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package strings + +import ( + "testing" + + "github.com/shuLhan/share/lib/test" +) + +func TestRowIsEqual(t *testing.T) { + cases := []struct { + a, b Row + exp bool + }{{ + a: Row{{"a"}, {"b", "c"}}, + b: Row{{"a"}, {"b", "c"}}, + exp: true, + }, { + a: Row{{"a"}, {"b", "c"}}, + b: Row{{"a"}, {"c", "b"}}, + exp: true, + }, { + a: Row{{"a"}, {"b", "c"}}, + b: Row{{"c", "b"}, {"a"}}, + exp: true, + }, { + a: Row{{"a"}, {"b", "c"}}, + b: Row{{"a"}, {"b", "a"}}, + }} + + for _, c := range cases { + got := c.a.IsEqual(c.b) + test.Assert(t, "", c.exp, got, true) + } +} + +func TestRowJoin(t *testing.T) { + cases := []struct { + row Row + lsep, ssep string + exp string + }{{ + // + lsep: ";", + ssep: ",", + exp: "", + }, { + row: Row{{"a"}, {}}, + lsep: ";", + ssep: ",", + exp: "a;", + }, { + row: Row{{"a"}, {"b", "c"}}, + lsep: ";", + ssep: ",", + exp: "a;b,c", + }} + + for _, c := range cases { + got := c.row.Join(c.lsep, c.ssep) + test.Assert(t, "", c.exp, got, true) + } +} diff --git a/lib/strings/statistic.go b/lib/strings/statistic.go new file mode 100644 index 00000000..d79dcc3d --- /dev/null +++ b/lib/strings/statistic.go @@ -0,0 +1,319 @@ +// Copyright 2018, Shulhan <ms@kilabit.info>. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package strings + +import ( + "strings" + "unicode" + + "github.com/shuLhan/share/lib/numbers" + "github.com/shuLhan/share/lib/runes" +) + +// +// CountAlnum return number of alpha-numeric character in text. +// +func CountAlnum(text string) (n int) { + if len(text) == 0 { + return + } + + for _, v := range text { + if unicode.IsDigit(v) || unicode.IsLetter(v) { + n++ + } + } + return +} + +// +// CountAlnumDistribution count distribution of alpha-numeric characters in +// text. +// +// Example, given a text "abbcccddddeeeee", it will return [a b c d e] and +// [1 2 3 4 5]. +// +func CountAlnumDistribution(text string) (chars []rune, counts []int) { + var found bool + + for _, v := range text { + if !(unicode.IsDigit(v) || unicode.IsLetter(v)) { + continue + } + found = false + for y, c := range chars { + if v == c { + counts[y]++ + found = true + break + } + } + if !found { + chars = append(chars, v) + counts = append(counts, 1) + } + } + return +} + +// +// CountCharSequence given a string, count number of repeated character more +// than one in sequence and return list of characters and their counts. +// +func CountCharSequence(text string) (chars []rune, counts []int) { + var lastv rune + count := 1 + for _, v := range text { + if v == lastv { + if !unicode.IsSpace(v) { + count++ + } + } else { + if count > 1 { + chars = append(chars, lastv) + counts = append(counts, count) + count = 1 + } + } + lastv = v + } + if count > 1 { + chars = append(chars, lastv) + counts = append(counts, count) + } + return +} + +// +// CountDigit return number of digit in text. +// +func CountDigit(text string) (n int) { + if len(text) == 0 { + return 0 + } + + for _, v := range text { + if unicode.IsDigit(v) { + n++ + } + } + return +} + +// +// CountNonAlnum return number of non alpha-numeric character in text. +// If `withspace` is true, it will be counted as non-alpha-numeric, if it +// false it will be ignored. +// +func CountNonAlnum(text string, withspace bool) (n int) { + if len(text) == 0 { + return + } + + for _, v := range text { + if unicode.IsDigit(v) || unicode.IsLetter(v) { + continue + } + if unicode.IsSpace(v) { + if withspace { + n++ + } + continue + } + n++ + } + return +} + +// +// CountUniqChar count number of character in text without duplication. +// +func CountUniqChar(text string) (n int) { + if len(text) == 0 { + return + } + + var uchars []rune + + for _, v := range text { + yes, _ := runes.Contain(uchars, v) + if yes { + continue + } + uchars = append(uchars, v) + n++ + } + return +} + +// +// CountUpperLower return number of uppercase and lowercase in text. +// +func CountUpperLower(text string) (upper, lower int) { + for _, v := range text { + if !unicode.IsLetter(v) { + continue + } + if unicode.IsUpper(v) { + upper++ + } else { + lower++ + } + } + return +} + +// +// MaxCharSequence return character which have maximum sequence in `text`. +// +func MaxCharSequence(text string) (rune, int) { + if len(text) == 0 { + return 0, 0 + } + + chars, counts := CountCharSequence(text) + + if len(chars) == 0 { + return 0, 0 + } + + _, idx, _ := numbers.IntsFindMax(counts) + + return chars[idx], counts[idx] +} + +// +// RatioAlnum compute and return ratio of alpha-numeric within all character +// in text. +// +func RatioAlnum(text string) float64 { + textlen := len(text) + if textlen == 0 { + return 0 + } + + n := CountAlnum(text) + + return float64(n) / float64(textlen) +} + +// +// RatioDigit compute and return digit ratio to all characters in text. +// +func RatioDigit(text string) float64 { + textlen := len(text) + + if textlen == 0 { + return 0 + } + + n := CountDigit(text) + + if n == 0 { + return 0 + } + + return float64(n) / float64(textlen) +} + +// +// RatioUpper compute and return ratio of uppercase character to all character +// in text. +// +func RatioUpper(text string) float64 { + if len(text) == 0 { + return 0 + } + up, lo := CountUpperLower(text) + + total := up + lo + if total == 0 { + return 0 + } + + return float64(up) / float64(total) +} + +// +// RatioNonAlnum return ratio of non-alphanumeric character to all +// character in text. +// +// If `withspace` is true then white-space character will be counted as +// non-alpha numeric, otherwise it will be skipped. +// +func RatioNonAlnum(text string, withspace bool) float64 { + textlen := len(text) + if textlen == 0 { + return 0 + } + + n := CountNonAlnum(text, withspace) + + return float64(n) / float64(textlen) +} + +// +// RatioUpperLower compute and return ratio of uppercase with lowercase +// character in text. +// +func RatioUpperLower(text string) float64 { + if len(text) == 0 { + return 0 + } + + up, lo := CountUpperLower(text) + + if lo == 0 { + return float64(up) + } + + return float64(up) / float64(lo) +} + +// +// TextSumCountTokens given a text, count how many tokens inside of it and +// return sum of all counts. +// +func TextSumCountTokens(text string, tokens []string, sensitive bool) ( + cnt int, +) { + if len(text) == 0 { + return 0 + } + + if !sensitive { + text = strings.ToLower(text) + } + + for _, v := range tokens { + if !sensitive { + v = strings.ToLower(v) + } + cnt += strings.Count(text, v) + } + + return +} + +// +// TextFrequencyOfTokens return frequencies of tokens by counting each +// occurence of token and divide it with total words in text. +// +func TextFrequencyOfTokens(text string, tokens []string, sensitive bool) ( + freq float64, +) { + if len(text) == 0 { + return 0 + } + + words := Split(text, false, false) + wordsLen := float64(len(words)) + + tokensCnt := float64(TextSumCountTokens(text, tokens, sensitive)) + + freq = tokensCnt / wordsLen + + return +} diff --git a/lib/strings/statistic_example_test.go b/lib/strings/statistic_example_test.go new file mode 100644 index 00000000..e3df7d56 --- /dev/null +++ b/lib/strings/statistic_example_test.go @@ -0,0 +1,113 @@ +// Copyright 2018, Shulhan <ms@kilabit.info>. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package strings + +import ( + "fmt" +) + +func ExampleCountAlnum() { + fmt.Println(CountAlnum("// A b c 1 2 3")) + // Output: 6 +} + +func ExampleCountAlnumDistribution() { + chars, counts := CountAlnumDistribution("// A b c A b") + fmt.Printf("%c %v\n", chars, counts) + // Output: [A b c] [2 2 1] +} + +func ExampleCountCharSequence() { + text := "aaa abcdee ffgf" + + chars, counts := CountCharSequence(text) + + // 'a' is not counted as 4 because its breaked by another character, + // space ' '. + fmt.Printf("%c %v\n", chars, counts) + // Output: + // [a e f] [3 2 2] +} + +func ExampleCountDigit() { + text := "// Copyright 2018 Mhd Sulhan <ms@kilabit.info>. All rights reserved." + fmt.Println(CountDigit(text)) + // Output: 4 +} + +func ExampleCountUniqChar() { + fmt.Println(CountUniqChar("abc abc")) + fmt.Println(CountUniqChar("abc ABC")) + // Output: + // 4 + // 7 +} + +func ExampleCountUpperLower() { + fmt.Println(CountUpperLower("// A B C d e f g h I J K")) + // Output: 6 5 +} + +func ExampleMaxCharSequence() { + c, n := MaxCharSequence("aaa abcdee ffgf") + + fmt.Printf("%c %d\n", c, n) + // Output: a 3 +} + +func ExampleRatioAlnum() { + fmt.Println(RatioAlnum("//A1")) + // Output: 0.5 +} + +func ExampleRatioDigit() { + fmt.Println(RatioDigit("// A b 0 1")) + // Output: 0.2 +} + +func ExampleRatioNonAlnum() { + fmt.Println(RatioNonAlnum("// A1", false)) + fmt.Println(RatioNonAlnum("// A1", true)) + // Output: + // 0.4 + // 0.6 +} + +func ExampleRatioUpper() { + fmt.Println(RatioUpper("// A b c d")) + // Output: 0.25 +} + +func ExampleRatioUpperLower() { + fmt.Println(RatioUpperLower("// A b c d e")) + // Output: 0.25 +} + +func ExampleTextSumCountTokens() { + text := "[[aa]] [[AA]]" + + tokens := []string{"[["} + fmt.Println(TextSumCountTokens(text, tokens, false)) + + tokens = []string{"aa"} + fmt.Println(TextSumCountTokens(text, tokens, false)) + + fmt.Println(TextSumCountTokens(text, tokens, true)) + + // Output: + // 2 + // 2 + // 1 +} + +func ExampleTextFrequencyOfTokens() { + text := "a b c d A B C D 1 2" + + fmt.Println(TextFrequencyOfTokens(text, []string{"a"}, false)) + fmt.Println(TextFrequencyOfTokens(text, []string{"a"}, true)) + // Output: + // 0.2 + // 0.1 +} diff --git a/lib/strings/statistic_test.go b/lib/strings/statistic_test.go new file mode 100644 index 00000000..aabc88fb --- /dev/null +++ b/lib/strings/statistic_test.go @@ -0,0 +1,384 @@ +// Copyright 2018, Shulhan <ms@kilabit.info>. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package strings + +import ( + "testing" + + "github.com/shuLhan/share/lib/test" +) + +func TestCountAlnum(t *testing.T) { + cases := []struct { + text string + exp int + }{{ + // Empty + }, { + text: "// 123", + exp: 3, + }, { + text: "// A B C", + exp: 3, + }, { + text: "// A b c 1 2 3", + exp: 6, + }} + + for _, c := range cases { + got := CountAlnum(c.text) + test.Assert(t, "", c.exp, got, true) + } +} + +func TestCountAlnumDistribution(t *testing.T) { + cases := []struct { + text string + expChars []rune + expCounts []int + }{{ + // Empty + }, { + text: "// 123", + expChars: []rune{'1', '2', '3'}, + expCounts: []int{1, 1, 1}, + }, { + text: "// A B C", + expChars: []rune{'A', 'B', 'C'}, + expCounts: []int{1, 1, 1}, + }, { + text: "// A B C A B C", + expChars: []rune{'A', 'B', 'C'}, + expCounts: []int{2, 2, 2}, + }} + + for _, c := range cases { + gotChars, gotCounts := CountAlnumDistribution(c.text) + test.Assert(t, "chars", c.expChars, gotChars, true) + test.Assert(t, "counts", c.expCounts, gotCounts, true) + } +} + +func TestCountCharSequence(t *testing.T) { + cases := []struct { + text string + expChars []rune + expCounts []int + }{{ + text: "// Copyright 2016 Mhd Sulhan <ms@kilabit.info>. All rights reserved.", + expChars: []rune{'/', 'l'}, + expCounts: []int{2, 2}, + }, { + text: "Use of this source code is governed by a BSD-style", + }, { + text: "aaa abcdee ffgf", + expChars: []rune{'a', 'e', 'f'}, + expCounts: []int{3, 2, 2}, + }, { + text: " | image name = {{legend|#0080FF|Areas affected by flooding}}{{legend|#002255|Death(s) affected by flooding}}{{legend|#C83737|Areas affected by flooding and strong winds}}{{legend|#550000|Death(s) affected by flooding and strong winds}}", + expChars: []rune{'{', '0', 'F', 'f', 'o', '}', '{', '0', '2', '5', 'f', 'o', '}', '{', 'f', 'o', '}', '{', '5', '0', 'f', 'o', '}'}, + expCounts: []int{2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 4, 2, 2, 2}, + }} + + for _, c := range cases { + gotChars, gotCounts := CountCharSequence(c.text) + + test.Assert(t, "", c.expChars, gotChars, true) + test.Assert(t, "", c.expCounts, gotCounts, true) + } +} + +func TestCountDigit(t *testing.T) { + cases := []struct { + text string + exp int + }{{ + // Empty. + }, { + text: "// Copyright 2018 Mhd Sulhan <ms@kilabit.info>. All rights reserved.", + exp: 4, + }} + + for _, c := range cases { + got := CountDigit(c.text) + + test.Assert(t, "", c.exp, got, true) + } +} + +func TestCountNonAlnum(t *testing.T) { + cases := []struct { + text string + withspace bool + exp int + }{{ + // Empty + }, { + text: "// 123", + exp: 2, + }, { + text: "// 123", + withspace: true, + exp: 3, + }, { + text: "// A B C", + exp: 2, + }, { + text: "// A B C", + withspace: true, + exp: 5, + }, { + text: "// A b c 1 2 3", + exp: 2, + }, { + text: "// A b c 1 2 3", + withspace: true, + exp: 8, + }} + + for _, c := range cases { + got := CountNonAlnum(c.text, c.withspace) + test.Assert(t, "", c.exp, got, true) + } +} + +func TestCountUniqChar(t *testing.T) { + cases := []struct { + text string + exp int + }{{ + // Empty. + }, { + text: "abc abc", + exp: 4, + }, { + text: "abc ABC", + exp: 7, + }} + + for _, c := range cases { + got := CountUniqChar(c.text) + test.Assert(t, "", c.exp, got, true) + } +} + +func TestCountUpperLower(t *testing.T) { + cases := []struct { + text string + expUpper int + expLower int + }{{ + text: "// Copyright 2016 Mhd Sulhan <ms@kilabit.info>. All rights reserved.", + expUpper: 4, + expLower: 44, + }} + + for _, c := range cases { + gotup, gotlo := CountUpperLower(c.text) + + test.Assert(t, "", c.expUpper, gotup, true) + test.Assert(t, "", c.expLower, gotlo, true) + } +} + +func TestMaxCharSequence(t *testing.T) { + cases := []struct { + text string + char rune + count int + }{{ + text: "// Copyright 2016 Mhd Sulhan <ms@kilabit.info>. All rights reserved.", + char: '/', + count: 2, + }, { + text: "Use of this source code is governed by a BSD-style", + }, { + text: "aaa abcdee ffgf", + char: 'a', + count: 3, + }, { + text: " | image name = {{legend|#0080FF|Areas affected by flooding}}{{legend|#002255|Death(s) affected by flooding}}{{legend|#C83737|Areas affected by flooding and strong winds}}{{legend|#550000|Death(s) affected by flooding and strong winds}}", + char: '0', + count: 4, + }} + + for _, c := range cases { + gotv, gotc := MaxCharSequence(c.text) + + test.Assert(t, "", c.char, gotv, true) + test.Assert(t, "", c.count, gotc, true) + } +} + +func TestRatioAlnum(t *testing.T) { + cases := []struct { + text string + exp float64 + }{{ + // Empty. + }, { + text: "// A b c d", + exp: 0.4, + }, { + text: "// A123b", + exp: 0.625, + }} + + for _, c := range cases { + got := RatioAlnum(c.text) + test.Assert(t, "", c.exp, got, true) + } +} + +func TestRatioDigit(t *testing.T) { + cases := []struct { + text string + exp float64 + }{{ + // Empty. + }, { + text: "// A b c d", + exp: 0, + }, { + text: "// A123b", + exp: 0.375, + }} + + for _, c := range cases { + got := RatioDigit(c.text) + test.Assert(t, "", c.exp, got, true) + } +} + +func TestRatioNonAlnum(t *testing.T) { + cases := []struct { + text string + withspace bool + exp float64 + }{{ + // Empty. + }, { + text: "// A b c d", + exp: 0.2, + }, { + text: "// A b c d", + withspace: true, + exp: 0.6, + }, { + text: "// A123b", + exp: 0.25, + }, { + text: "// A123b", + withspace: true, + exp: 0.375, + }} + + for _, c := range cases { + got := RatioNonAlnum(c.text, c.withspace) + test.Assert(t, "", c.exp, got, true) + } +} + +func TestRatioUpper(t *testing.T) { + cases := []struct { + text string + exp float64 + }{{ + // Empty. + }, { + text: "// A b c d", + exp: 0.25, + }} + + for _, c := range cases { + got := RatioUpper(c.text) + test.Assert(t, "", c.exp, got, true) + } +} + +func TestRatioUpperLower(t *testing.T) { + cases := []struct { + text string + exp float64 + }{{ + // Empty + }, { + text: "// 134234", + }, { + text: "// A B C", + exp: 3, + }, { + text: "// A b c d e", + exp: 0.25, + }} + + for _, c := range cases { + got := RatioUpperLower(c.text) + test.Assert(t, "", c.exp, got, true) + } +} + +func TestTextSumCountTokens(t *testing.T) { + cases := []struct { + text string + tokens []string + sensitive bool + exp int + }{{ + // Empty. + }, { + text: "[[aa]] [[AA]]", + tokens: []string{"[["}, + exp: 2, + }, { + text: "[[aa]] [[AA]]", + tokens: []string{"]]"}, + exp: 2, + }, { + text: "[[aa]] [[AA]]", + tokens: []string{"[[", "]]"}, + exp: 4, + }, { + text: "[[aa]] [[AA]]", + tokens: []string{"aa"}, + exp: 2, + }, { + text: "[[aa]] [[AA]]", + tokens: []string{"aa"}, + sensitive: true, + exp: 1, + }} + + for _, c := range cases { + got := TextSumCountTokens(c.text, c.tokens, c.sensitive) + test.Assert(t, "", c.exp, got, true) + } +} + +func TestTextFrequencyOfTokens(t *testing.T) { + cases := []struct { + text string + tokens []string + sensitive bool + exp float64 + }{{ + // Empty. + }, { + text: "a b c d A B C D", + tokens: []string{"a"}, + exp: 0.25, + }, { + text: "a b c d A B C D", + tokens: []string{"a"}, + sensitive: true, + exp: 0.125, + }} + + for _, c := range cases { + got := TextFrequencyOfTokens(c.text, c.tokens, c.sensitive) + test.Assert(t, "", c.exp, got, true) + } + +} diff --git a/lib/strings/string_test.go b/lib/strings/string_test.go index 859cbc51..e77affb5 100644 --- a/lib/strings/string_test.go +++ b/lib/strings/string_test.go @@ -15,6 +15,8 @@ func TestCleanURI(t *testing.T) { text string exp string }{{ + // Empty + }, { text: `ftp://test.com/123 The [[United States]] has regularly voted alone and against international consensus, using its [[United Nations Security Council veto power|veto power]] to block the adoption of proposed UN Security Council resolutions supporting the [[PLO]] and calling for a two-state solution to the [[Israeli-Palestinian conflict]].<ref>[http://books.google.ca/books?id=CHL5SwGvobQC&pg=PA168&dq=US+veto+Israel+regularly#v=onepage&q=US%20veto%20Israel%20regularly&f=false Pirates and emperors, old and new: international terrorism in the real world], [[Noam Chomsky]], p. 168.</ref><ref>The US has also used its veto to block resolutions that are critical of Israel.[https://books.google.ca/books?id=yzmpDAz7ZAwC&pg=PT251&dq=US+veto+Israel+regularly&lr=#v=onepage&q=US%20veto%20Israel%20regularly&f=false Uneasy neighbors], David T. Jones and David Kilgour, p. 235.</ref> The United States responded to the frequent criticism from UN organs by adopting the [[Negroponte doctrine]].`, exp: ` The [[United States]] has regularly voted alone and against international consensus, using its [[United Nations Security Council veto power|veto power]] to block the adoption of proposed UN Security Council resolutions supporting the [[PLO]] and calling for a two-state solution to the [[Israeli-Palestinian conflict]].<ref>[ Pirates and emperors, old and new: international terrorism in the real world], [[Noam Chomsky]], p. 168.</ref><ref>The US has also used its veto to block resolutions that are critical of Israel.[ Uneasy neighbors], David T. Jones and David Kilgour, p. 235.</ref> The United States responded to the frequent criticism from UN organs by adopting the [[Negroponte doctrine]].`, }} diff --git a/lib/strings/strings.go b/lib/strings/strings.go index 3b4db829..d050ce52 100644 --- a/lib/strings/strings.go +++ b/lib/strings/strings.go @@ -7,3 +7,254 @@ // strings. // package strings + +import ( + "strings" + + "github.com/shuLhan/share/lib/numbers" +) + +// +// CountMissRate given two slice of string, count number of string that is +// not equal with each other, and return the miss rate as +// +// number of not equal / number of data +// +// and count of missing, and length of input `src`. +// +func CountMissRate(src []string, target []string) ( + missrate float64, + nmiss, length int, +) { + length = len(src) + targetlen := len(target) + if length == 0 && targetlen == 0 { + return + } + if targetlen < length { + length = targetlen + } + + for x := 0; x < length; x++ { + if src[x] != target[x] { + nmiss++ + } + } + + return float64(nmiss) / float64(length), nmiss, length +} + +// +// CountToken will return number of token occurence in words. +// +func CountToken(words []string, token string, sensitive bool) int { + if !sensitive { + token = strings.ToLower(token) + } + + var cnt int + for _, v := range words { + if !sensitive { + v = strings.ToLower(v) + } + + if v == token { + cnt++ + } + } + return cnt +} + +// +// CountTokens count number of occurrence of each `tokens` values in words. +// Return number of each tokens based on their index. +// +func CountTokens(words []string, tokens []string, sensitive bool) []int { + tokenslen := len(tokens) + if tokenslen <= 0 { + return nil + } + + counters := make([]int, tokenslen) + + for x := 0; x < len(tokens); x++ { + counters[x] = CountToken(words, tokens[x], sensitive) + } + + return counters +} + +// +// FrequencyOfToken return frequency of token in words using +// +// count-of-token / total-words +// +func FrequencyOfToken(words []string, token string, sensitive bool) float64 { + wordslen := float64(len(words)) + if wordslen <= 0 { + return 0 + } + + cnt := CountToken(words, token, sensitive) + + return float64(cnt) / wordslen +} + +// +// FrequencyOfTokens will compute each frequency of token in words. +// +func FrequencyOfTokens(words, tokens []string, sensitive bool) (probs []float64) { + if len(words) == 0 || len(tokens) == 0 { + return + } + + probs = make([]float64, len(tokens)) + + for x := 0; x < len(tokens); x++ { + probs[x] = FrequencyOfToken(words, tokens[x], sensitive) + } + + return probs +} + +// +// IsContain return true if elemen `el` is in slice of string `ss`, +// otherwise return false. +// +func IsContain(ss []string, el string) bool { + for x := 0; x < len(ss); x++ { + if ss[x] == el { + return true + } + } + return false +} + +// +// IsEqual compare elements of two slice of string without regard to +// their order. +// +// Return true if each both slice have the same elements, false otherwise. +// +func IsEqual(a, b []string) bool { + alen := len(a) + + if alen != len(b) { + return false + } + + check := make([]bool, alen) + + for x, ls := range a { + for _, rs := range b { + if ls == rs { + check[x] = true + } + } + } + + for _, v := range check { + if !v { + return false + } + } + return true +} + +// +// Longest find the longest word in words and return their value and index. +// +// If words is empty return nil string with negative (-1) index. +// +func Longest(words []string) (string, int) { + if len(words) <= 0 { + return "", -1 + } + + var ( + outlen, idx int + out string + ) + for x := 0; x < len(words); x++ { + vlen := len(words[x]) + if vlen > outlen { + outlen = vlen + out = words[x] + idx = x + } + } + return out, idx +} + +// +// MostFrequentTokens return the token that has highest frequency in words. +// +// For example, given input +// +// words: [A A B A B C C] +// tokens: [A B] +// +// it will return A as the majority tokens in words. +// If tokens has equal frequency, then the first token in order will returned. +// +func MostFrequentTokens(words []string, tokens []string, sensitive bool) string { + if len(words) == 0 || len(tokens) == 0 { + return "" + } + + tokensCount := CountTokens(words, tokens, sensitive) + _, maxIdx, _ := numbers.IntsFindMax(tokensCount) + + return tokens[maxIdx] +} + +// +// SortByIndex will sort the slice of string in place using list of index. +// +func SortByIndex(ss *[]string, sortedIds []int) { + newd := make([]string, len(*ss)) + + for x := 0; x < len(sortedIds); x++ { + newd[x] = (*ss)[sortedIds[x]] + } + + (*ss) = newd +} + +// +// Swap two indices value of string. +// If x or y is less than zero, it will return unchanged slice. +// If x or y is greater than length of slice, it will return unchanged slice. +// +func Swap(ss []string, x, y int) { + if x == y { + return + } + if x < 0 || y < 0 { + return + } + if x > len(ss) || y > len(ss) { + return + } + + tmp := ss[x] + ss[x] = ss[y] + ss[y] = tmp +} + +// +// TotalFrequencyOfTokens return total frequency of list of token in words. +// +func TotalFrequencyOfTokens(words, tokens []string, sensitive bool) float64 { + if len(words) <= 0 || len(tokens) <= 0 { + return 0 + } + + var sumfreq float64 + + for x := 0; x < len(tokens); x++ { + sumfreq += FrequencyOfToken(words, tokens[x], sensitive) + } + + return sumfreq +} diff --git a/lib/strings/strings_example_test.go b/lib/strings/strings_example_test.go new file mode 100644 index 00000000..d661232a --- /dev/null +++ b/lib/strings/strings_example_test.go @@ -0,0 +1,147 @@ +// Copyright 2018, Shulhan <ms@kilabit.info>. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package strings + +import ( + "fmt" +) + +func ExampleCountMissRate() { + src := []string{"A", "B", "C", "D"} + tgt := []string{"A", "B", "C", "D"} + fmt.Println(CountMissRate(src, tgt)) + + src = []string{"A", "B", "C", "D"} + tgt = []string{"B", "B", "C", "D"} + fmt.Println(CountMissRate(src, tgt)) + + src = []string{"A", "B", "C", "D"} + tgt = []string{"B", "C", "C", "D"} + fmt.Println(CountMissRate(src, tgt)) + + src = []string{"A", "B", "C", "D"} + tgt = []string{"B", "C", "D", "D"} + fmt.Println(CountMissRate(src, tgt)) + + src = []string{"A", "B", "C", "D"} + tgt = []string{"C", "D", "D", "E"} + fmt.Println(CountMissRate(src, tgt)) + + // Output: + // 0 0 4 + // 0.25 1 4 + // 0.5 2 4 + // 0.75 3 4 + // 1 4 4 +} + +func ExampleCountToken() { + words := []string{"A", "B", "C", "a", "b", "c"} + fmt.Println(CountToken(words, "C", false)) + fmt.Println(CountToken(words, "C", true)) + // Output: + // 2 + // 1 +} + +func ExampleCountTokens() { + words := []string{"A", "B", "C", "a", "b", "c"} + tokens := []string{"A", "B"} + fmt.Println(CountTokens(words, tokens, false)) + fmt.Println(CountTokens(words, tokens, true)) + // Output: + // [2 2] + // [1 1] +} + +func ExampleFrequencyOfToken() { + words := []string{"A", "B", "C", "a", "b", "c"} + fmt.Println(FrequencyOfToken(words, "C", false)) + fmt.Println(FrequencyOfToken(words, "C", true)) + // Output: + // 0.3333333333333333 + // 0.16666666666666666 + +} + +func ExampleFrequencyOfTokens() { + words := []string{"A", "B", "C", "a", "b", "c"} + tokens := []string{"A", "B"} + fmt.Println(FrequencyOfTokens(words, tokens, false)) + fmt.Println(FrequencyOfTokens(words, tokens, true)) + // Output: + // [0.3333333333333333 0.3333333333333333] + // [0.16666666666666666 0.16666666666666666] +} + +func ExampleIsEqual() { + fmt.Println(IsEqual([]string{"a", "b"}, []string{"a", "b"})) + fmt.Println(IsEqual([]string{"a", "b"}, []string{"b", "a"})) + fmt.Println(IsEqual([]string{"a", "b"}, []string{"a"})) + fmt.Println(IsEqual([]string{"a", "b"}, []string{"b", "b"})) + // Output: + // true + // true + // false + // false +} + +func ExampleLongest() { + words := []string{"a", "bb", "ccc", "d", "eee"} + fmt.Println(Longest(words)) + // Output: ccc 2 +} + +func ExampleMostFrequentTokens() { + words := []string{"a", "b", "B", "B", "a"} + tokens := []string{"a", "b"} + fmt.Println(MostFrequentTokens(words, tokens, false)) + fmt.Println(MostFrequentTokens(words, tokens, true)) + // Output: + // b + // a +} + +func ExampleSortByIndex() { + dat := []string{"Z", "X", "C", "V", "B", "N", "M"} + ids := []int{4, 2, 6, 5, 3, 1, 0} + + fmt.Println(dat) + SortByIndex(&dat, ids) + fmt.Println(dat) + // Output: + // [Z X C V B N M] + // [B C M N V X Z] +} + +func ExampleSwap() { + ss := []string{"a", "b", "c"} + Swap(ss, -1, 1) + fmt.Println(ss) + Swap(ss, 1, -1) + fmt.Println(ss) + Swap(ss, 4, 1) + fmt.Println(ss) + Swap(ss, 1, 4) + fmt.Println(ss) + Swap(ss, 1, 2) + fmt.Println(ss) + // Output: + // [a b c] + // [a b c] + // [a b c] + // [a b c] + // [a c b] +} + +func ExampleTotalFrequencyOfTokens() { + words := []string{"A", "B", "C", "a", "b", "c"} + tokens := []string{"A", "B"} + fmt.Println(TotalFrequencyOfTokens(words, tokens, false)) + fmt.Println(TotalFrequencyOfTokens(words, tokens, true)) + // Output: + // 0.6666666666666666 + // 0.3333333333333333 +} diff --git a/lib/strings/strings_test.go b/lib/strings/strings_test.go new file mode 100644 index 00000000..3e0b046c --- /dev/null +++ b/lib/strings/strings_test.go @@ -0,0 +1,251 @@ +// Copyright 2018, Shulhan <ms@kilabit.info>. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package strings + +import ( + "testing" + + "github.com/shuLhan/share/lib/numbers" + "github.com/shuLhan/share/lib/test" +) + +func TestCountMissRate(t *testing.T) { + cases := []struct { + src []string + target []string + exp float64 + }{{ + // Empty. + }, { + src: []string{"A", "B", "C", "D"}, + target: []string{"A", "B", "C"}, + exp: 0, + }, { + src: []string{"A", "B", "C", "D"}, + target: []string{"A", "B", "C", "D"}, + exp: 0, + }, { + src: []string{"A", "B", "C", "D"}, + target: []string{"B", "B", "C", "D"}, + exp: 0.25, + }, { + src: []string{"A", "B", "C", "D"}, + target: []string{"B", "C", "C", "D"}, + exp: 0.5, + }, { + src: []string{"A", "B", "C", "D"}, + target: []string{"B", "C", "D", "D"}, + exp: 0.75, + }, { + src: []string{"A", "B", "C", "D"}, + target: []string{"C", "D", "D", "E"}, + exp: 1.0, + }} + + for _, c := range cases { + got, _, _ := CountMissRate(c.src, c.target) + test.Assert(t, "", c.exp, got, true) + } +} + +func TestCountTokens(t *testing.T) { + cases := []struct { + words []string + tokens []string + sensitive bool + exp []int + }{{ + // Empty. + }, { + words: []string{"A", "B", "C", "a", "b", "c"}, + tokens: []string{"A", "B"}, + exp: []int{2, 2}, + }, { + words: []string{"A", "B", "C", "a", "b", "c"}, + tokens: []string{"A", "B"}, + sensitive: true, + exp: []int{1, 1}, + }} + + for _, c := range cases { + got := CountTokens(c.words, c.tokens, c.sensitive) + + test.Assert(t, "", c.exp, got, true) + } +} + +func TestFrequencyOfTokens(t *testing.T) { + cases := []struct { + words, tokens []string + exp []float64 + sensitive bool + }{{ + // Empty. + }, { + words: []string{"a", "b", "a", "b", "a", "c"}, + tokens: []string{"a", "b"}, + exp: []float64{0.5, 0.3333333333333333}, + }} + + for _, c := range cases { + got := FrequencyOfTokens(c.words, c.tokens, c.sensitive) + test.Assert(t, "", c.exp, got, true) + } +} + +func TestIsContain(t *testing.T) { + ss := []string{"a", "b", "c", "d"} + + got := IsContain(ss, "a") + test.Assert(t, "", true, got, true) + + got = IsContain(ss, "e") + test.Assert(t, "", false, got, true) +} + +func TestIsEqual(t *testing.T) { + cases := []struct { + a, b []string + exp bool + }{{ + a: []string{"a", "b"}, + b: []string{"a", "b"}, + exp: true, + }, { + a: []string{"a", "b"}, + b: []string{"b", "a"}, + exp: true, + }, { + a: []string{"a", "b"}, + b: []string{"a"}, + }, { + a: []string{"a"}, + b: []string{"b", "a"}, + }, { + a: []string{"a", "b"}, + b: []string{"a", "c"}, + }} + + for _, c := range cases { + test.Assert(t, "", c.exp, IsEqual(c.a, c.b), true) + } +} + +func TestLongest(t *testing.T) { + cases := []struct { + words []string + exp string + expIdx int + }{{ + // Empty. + expIdx: -1, + }, { + words: []string{"a", "bb", "ccc", "d", "eee"}, + exp: "ccc", + expIdx: 2, + }, { + words: []string{"a", "bb", "ccc", "dddd", "eee"}, + exp: "dddd", + expIdx: 3, + }} + + for _, c := range cases { + got, idx := Longest(c.words) + + test.Assert(t, "word", c.exp, got, true) + test.Assert(t, "idx", c.expIdx, idx, true) + } +} + +func TestMostFrequentTokens(t *testing.T) { + cases := []struct { + words, tokens []string + sensitive bool + exp string + }{{ + // Empty + }, { + words: []string{"a", "b", "A"}, + tokens: []string{"a", "b"}, + exp: "a", + }, { + words: []string{"a", "b", "A", "b"}, + tokens: []string{"a", "b"}, + sensitive: true, + exp: "b", + }, { + words: []string{"a", "b", "A", "B"}, + tokens: []string{"a", "b"}, + exp: "a", + }} + + for _, c := range cases { + got := MostFrequentTokens(c.words, c.tokens, c.sensitive) + test.Assert(t, "", c.exp, got, true) + } +} + +func TestSortByIndex(t *testing.T) { + dat := []string{"Z", "X", "C", "V", "B", "N", "M"} + exp := []string{"B", "C", "M", "N", "V", "X", "Z"} + ids := []int{4, 2, 6, 5, 3, 1, 0} + + SortByIndex(&dat, ids) + + test.Assert(t, "", exp, dat, true) +} + +func TestSwap(t *testing.T) { + ss := []string{"a", "b", "c"} + + cases := []struct { + x, y int + exp []string + }{{ + x: -1, + exp: []string{"a", "b", "c"}, + }, { + y: -1, + exp: []string{"a", "b", "c"}, + }, { + x: 4, + exp: []string{"a", "b", "c"}, + }, { + y: 4, + exp: []string{"a", "b", "c"}, + }, { + x: 1, + y: 1, + exp: []string{"a", "b", "c"}, + }, { + x: 1, + y: 2, + exp: []string{"a", "c", "b"}, + }} + for _, c := range cases { + Swap(ss, c.x, c.y) + test.Assert(t, "", c.exp, ss, true) + } +} + +func TestTotalFrequencyOfTokens(t *testing.T) { + cases := []struct { + words, tokens []string + sensitive bool + exp float64 + }{{ + // Empty. + }, { + words: []string{"a", "b", "a", "b", "a", "c"}, + tokens: []string{"a", "b"}, + exp: numbers.Float64Round((3.0/6)+(2.0/6), 3), + }} + + for _, c := range cases { + got := TotalFrequencyOfTokens(c.words, c.tokens, c.sensitive) + + test.Assert(t, "", c.exp, numbers.Float64Round(got, 3), true) + } +} diff --git a/lib/strings/table.go b/lib/strings/table.go new file mode 100644 index 00000000..708f39c6 --- /dev/null +++ b/lib/strings/table.go @@ -0,0 +1,181 @@ +// Copyright 2018, Shulhan <ms@kilabit.info>. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package strings + +import ( + "fmt" + + "github.com/shuLhan/share/lib/debug" +) + +// +// Table is for working with set of row. +// +// Each element in table is in the form of +// +// [ +// [["a"],["b","c"],...], // Row +// [["x"],["y",z"],...] // Row +// ] +// +type Table []Row + +// +// createIndent create n space indentation and return it. +// +func createIndent(n int) (s string) { + for i := 0; i < n; i++ { + s += " " + } + return +} + +// +// Partition group the each element of slice "ss" into non-empty +// record, in such a way that every element is included in one and only of the +// record. +// +// Given a list of element in "ss", and number of partition "k", return +// the set of all group of all elements without duplication. +// +// Number of possible list can be computed using Stirling number of second kind. +// +// For more information see, +// +// - https://en.wikipedia.org/wiki/Partition_of_a_set +// +func Partition(ss []string, k int) (table Table) { + n := len(ss) + seed := make([]string, n) + copy(seed, ss) + + if debug.Value >= 1 { + fmt.Printf("lib/strings: %s Partition(%v,%v)\n", createIndent(n), n, k) + } + + // if only one split return the set contain only seed as list. + // input: {a,b,c}, output: {{a,b,c}} + if k == 1 { + list := make(Row, 1) + list[0] = seed + + table = append(table, list) + return table + } + + // if number of element in set equal with number split, return the set + // that contain each element in list. + // input: {a,b,c}, output:= {{a},{b},{c}} + if n == k { + return SinglePartition(seed) + } + + // take the first element + el := seed[0] + + // remove the first element from set + seed = append(seed[:0], seed[1:]...) + + if debug.Value >= 1 { + fmt.Printf("[tekstus] %s el: %s, seed: %s", createIndent(n), el, seed) + } + + // generate child list + genTable := Partition(seed, k) + + if debug.Value >= 1 { + fmt.Printf("[tekstus] %s genTable join: %v", createIndent(n), genTable) + } + + // join elemen with generated set + table = genTable.JoinCombination(el) + + if debug.Value >= 1 { + fmt.Printf("[tekstus] %s join %s : %v\n", createIndent(n), el, + table) + } + + genTable = Partition(seed, k-1) + + if debug.Value >= 1 { + fmt.Printf("[tesktus] %s genTable append: %s", createIndent(n), genTable) + } + + for _, row := range genTable { + list := make(Row, len(row)) + copy(list, row) + list = append(list, []string{el}) + table = append(table, list) + } + + if debug.Value >= 1 { + fmt.Printf("[tesktus] %s append %v : %v\n", createIndent(n), el, + table) + } + + return +} + +// +// SinglePartition create a table from a slice of string, where each element +// in slice become a single record. +// +func SinglePartition(ss []string) Table { + table := make(Table, 0) + row := make(Row, len(ss)) + + for x := 0; x < len(ss); x++ { + row[x] = []string{ss[x]} + } + + table = append(table, row) + + return table +} + +// +// IsEqual compare two table of string without regard to their order. +// +// Return true if both set is contains the same list, false otherwise. +// +func (table Table) IsEqual(other Table) bool { + if len(table) != len(other) { + return false + } + + check := make([]bool, len(table)) + + for x := 0; x < len(table); x++ { + for y := 0; y < len(other); y++ { + if table[x].IsEqual(other[y]) { + check[x] = true + break + } + } + } + + for _, v := range check { + if !v { + return false + } + } + return true +} + +// +// JoinCombination for each row in table, generate new row and insert "s" into +// different record in different new row. +// +func (table Table) JoinCombination(s string) (tout Table) { + for _, row := range table { + for y := 0; y < len(row); y++ { + newRow := make(Row, len(row)) + copy(newRow, row) + newRow[y] = append(newRow[y], s) + tout = append(tout, newRow) + } + } + return +} diff --git a/lib/strings/table_example_test.go b/lib/strings/table_example_test.go new file mode 100644 index 00000000..2af224c3 --- /dev/null +++ b/lib/strings/table_example_test.go @@ -0,0 +1,68 @@ +// Copyright 2018, Shulhan <ms@kilabit.info>. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package strings + +import ( + "fmt" +) + +func ExamplePartition() { + ss := []string{"a", "b", "c"} + + fmt.Println("Partition k=1:", Partition(ss, 1)) + fmt.Println("Partition k=2:", Partition(ss, 2)) + fmt.Println("Partition k=3:", Partition(ss, 3)) + + // Output: + // Partition k=1: [[[a b c]]] + // Partition k=2: [[[b a] [c]] [[b] [c a]] [[b c] [a]]] + // Partition k=3: [[[a] [b] [c]]] +} + +func ExampleSinglePartition() { + ss := []string{"a", "b", "c"} + fmt.Println(SinglePartition(ss)) + // Output: + // [[[a] [b] [c]]] +} + +func ExampleTable_IsEqual() { + table := Table{ + {{"a"}, {"b", "c"}}, + {{"b"}, {"a", "c"}}, + {{"c"}, {"a", "b"}}, + } + fmt.Println(table.IsEqual(table)) + + other := Table{ + {{"c"}, {"a", "b"}}, + {{"a"}, {"b", "c"}}, + {{"b"}, {"a", "c"}}, + } + fmt.Println(table.IsEqual(other)) + + other = Table{ + {{"a"}, {"b", "c"}}, + {{"b"}, {"a", "c"}}, + } + fmt.Println(table.IsEqual(other)) + + // Output: + // true + // true + // false +} + +func ExampleTable_JoinCombination() { + table := Table{ + {{"a"}, {"b"}, {"c"}}, + } + s := "X" + + fmt.Println(table.JoinCombination(s)) + // Output: + // [[[a X] [b] [c]] [[a] [b X] [c]] [[a] [b] [c X]]] + +} diff --git a/lib/strings/table_test.go b/lib/strings/table_test.go new file mode 100644 index 00000000..c1078496 --- /dev/null +++ b/lib/strings/table_test.go @@ -0,0 +1,153 @@ +// Copyright 2018, Shulhan <ms@kilabit.info>. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package strings + +import ( + "testing" + + "github.com/shuLhan/share/lib/test" +) + +func TestPartition(t *testing.T) { + cases := []struct { + ss []string + k int + exp Table + }{{ + ss: []string{"a", "b"}, + k: 1, + exp: Table{ + {{"a", "b"}}, + }, + }, { + ss: []string{"a", "b"}, + k: 2, + exp: Table{ + {{"a"}, {"b"}}, + }, + }, { + ss: []string{"a", "b", "c"}, + k: 1, + exp: Table{ + {{"a", "b", "c"}}, + }, + }, { + ss: []string{"a", "b", "c"}, + k: 2, + exp: Table{ + {{"b", "a"}, {"c"}}, + {{"b"}, {"c", "a"}}, + {{"b", "c"}, {"a"}}, + }, + }, { + ss: []string{"a", "b", "c"}, + k: 3, + exp: Table{ + {{"a"}, {"b"}, {"c"}}, + }, + }} + + for _, c := range cases { + t.Logf("Partition: %d\n", c.k) + + got := Partition(c.ss, c.k) + + test.Assert(t, "", c.exp, got, true) + } +} + +func TestSinglePartition(t *testing.T) { + cases := []struct { + ss []string + exp Table + }{{ + ss: []string{"a", "b", "c"}, + exp: Table{ + {{"a"}, {"b"}, {"c"}}, + }, + }} + + for _, c := range cases { + got := SinglePartition(c.ss) + test.Assert(t, "", c.exp, got, true) + } +} + +func TestTable_IsEqual(t *testing.T) { + table := Table{ + {{"a"}, {"b", "c"}}, + {{"b"}, {"a", "c"}}, + {{"c"}, {"a", "b"}}, + } + + cases := []struct { + tcmp Table + exp bool + }{{ + // Empty. + }, { + tcmp: table, + exp: true, + }, { + tcmp: Table{ + {{"c"}, {"a", "b"}}, + {{"a"}, {"b", "c"}}, + {{"b"}, {"a", "c"}}, + }, + exp: true, + }, { + tcmp: Table{ + {{"c"}, {"a", "b"}}, + {{"a"}, {"b", "c"}}, + }, + }, { + tcmp: Table{ + {{"b"}, {"a", "b"}}, + {{"c"}, {"a", "b"}}, + {{"a"}, {"b", "c"}}, + }, + }} + + for _, c := range cases { + got := table.IsEqual(c.tcmp) + test.Assert(t, "", c.exp, got, true) + } +} + +func TestTable_JoinCombination(t *testing.T) { + cases := []struct { + table Table + s string + exp Table + }{{ + table: Table{ + {{"a"}, {"b"}, {"c"}}, + }, + s: "X", + exp: Table{ + {{"a", "X"}, {"b"}, {"c"}}, + {{"a"}, {"b", "X"}, {"c"}}, + {{"a"}, {"b"}, {"c", "X"}}, + }, + }, { + table: Table{ + {{"a"}, {"b"}, {"c"}}, + {{"g"}, {"h"}}, + }, + s: "X", + exp: Table{ + {{"a", "X"}, {"b"}, {"c"}}, + {{"a"}, {"b", "X"}, {"c"}}, + {{"a"}, {"b"}, {"c", "X"}}, + {{"g", "X"}, {"h"}}, + {{"g"}, {"h", "X"}}, + }, + }} + + for _, c := range cases { + got := c.table.JoinCombination(c.s) + test.Assert(t, "", c.exp, got, true) + } +} diff --git a/lib/strings/to.go b/lib/strings/to.go new file mode 100644 index 00000000..9ae71c19 --- /dev/null +++ b/lib/strings/to.go @@ -0,0 +1,68 @@ +// Copyright 2018, Shulhan <ms@kilabit.info>. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package strings + +import ( + "fmt" + "strconv" +) + +// +// ToFloat64 convert slice of string to slice of float64. If converted +// string return error it will set the float value to 0. +// +func ToFloat64(ss []string) (sv []float64) { + var v float64 + var e error + + for _, s := range ss { + v, e = strconv.ParseFloat(s, 64) + + if nil != e { + v = 0 + } + + sv = append(sv, v) + } + return +} + +// +// ToInt64 convert slice of string to slice of int64. If converted +// string return an error it will set the integer value to 0. +// +func ToInt64(ss []string) (sv []int64) { + for _, s := range ss { + v, e := strconv.ParseInt(s, 10, 64) + + if e == nil { + sv = append(sv, v) + continue + } + + // Handle error, try to convert to float64 first. + ev := e.(*strconv.NumError) + if ev.Err == strconv.ErrSyntax { + f, e := strconv.ParseFloat(s, 64) + if e == nil { + v = int64(f) + } + } + + sv = append(sv, v) + } + return +} + +// +// ToStrings convert slice of interface to slice of string. +// +func ToStrings(is []interface{}) (vs []string) { + for x := 0; x < len(is); x++ { + v := fmt.Sprintf("%v", is[x]) + vs = append(vs, v) + } + return +} diff --git a/lib/strings/to_example_test.go b/lib/strings/to_example_test.go new file mode 100644 index 00000000..f714419a --- /dev/null +++ b/lib/strings/to_example_test.go @@ -0,0 +1,30 @@ +// Copyright 2018, Shulhan <ms@kilabit.info>. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package strings + +import ( + "fmt" +) + +func ExampleToFloat64() { + in := []string{"0", "1.1", "e", "3"} + + fmt.Println(ToFloat64(in)) + // Output: [0 1.1 0 3] +} + +func ExampleToInt64() { + in := []string{"0", "1", "e", "3.3"} + + fmt.Println(ToInt64(in)) + // Output: [0 1 0 3] +} + +func ExampleToStrings() { + i64 := []interface{}{0, 1.99, 2, 3} + + fmt.Println(ToStrings(i64)) + // Output: [0 1.99 2 3] +} diff --git a/lib/strings/to_test.go b/lib/strings/to_test.go new file mode 100644 index 00000000..ac7906b3 --- /dev/null +++ b/lib/strings/to_test.go @@ -0,0 +1,43 @@ +// Copyright 2018, Shulhan <ms@kilabit.info>. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package strings + +import ( + "testing" + + "github.com/shuLhan/share/lib/test" +) + +func TestToFloat64(t *testing.T) { + in := []string{"0", "1.1", "e", "3"} + exp := []float64{0, 1.1, 0, 3} + + got := ToFloat64(in) + + test.Assert(t, "", exp, got, true) +} + +func TestToInt64(t *testing.T) { + in := []string{"0", "1", "e", "3.3"} + exp := []int64{0, 1, 0, 3} + + got := ToInt64(in) + + test.Assert(t, "", exp, got, true) +} + +func TestToStrings(t *testing.T) { + is := make([]interface{}, 0) + i64 := []int64{0, 1, 2, 3} + exp := []string{"0", "1", "2", "3"} + + for _, v := range i64 { + is = append(is, v) + } + + got := ToStrings(is) + + test.Assert(t, "", exp, got, true) +} |
