diff options
Diffstat (limited to 'src/pkg/bytes')
| -rw-r--r-- | src/pkg/bytes/bytes.go | 49 | ||||
| -rw-r--r-- | src/pkg/bytes/bytes_test.go | 37 |
2 files changed, 82 insertions, 4 deletions
diff --git a/src/pkg/bytes/bytes.go b/src/pkg/bytes/bytes.go index 6eb6772328..e0b30b9677 100644 --- a/src/pkg/bytes/bytes.go +++ b/src/pkg/bytes/bytes.go @@ -3,7 +3,7 @@ // license that can be found in the LICENSE file. // The bytes package implements functions for the manipulation of byte slices. -// Analagous to the facilities of the strings package. +// Analogous to the facilities of the strings package. package bytes import ( @@ -127,6 +127,20 @@ func LastIndex(s, sep []byte) int { return -1 } +// IndexRune interprets s as a sequence of UTF-8-encoded Unicode code points. +// It returns the byte index of the first occurrence in s of the given rune. +// It returns -1 if rune is not present in s. +func IndexRune(s []byte, rune int) int { + for i := 0; i < len(s); { + r, size := utf8.DecodeRune(s[i:]) + if r == rune { + return i + } + i += size + } + return -1 +} + // IndexAny interprets s as a sequence of UTF-8-encoded Unicode code points. // It returns the byte index of the first occurrence in s of any of the Unicode // code points in chars. It returns -1 if chars is empty or if there is no code @@ -202,12 +216,20 @@ func SplitAfter(s, sep []byte, n int) [][]byte { // Fields splits the array s around each instance of one or more consecutive white space // characters, returning a slice of subarrays of s or an empty list if s contains only white space. func Fields(s []byte) [][]byte { + return FieldsFunc(s, unicode.IsSpace) +} + +// FieldsFunc interprets s as a sequence of UTF-8-encoded Unicode code points. +// It splits the array s at each run of code points c satisfying f(c) and +// returns a slice of subarrays of s. If no code points in s satisfy f(c), an +// empty slice is returned. +func FieldsFunc(s []byte, f func(int) bool) [][]byte { n := 0 inField := false for i := 0; i < len(s); { rune, size := utf8.DecodeRune(s[i:]) wasInField := inField - inField = !unicode.IsSpace(rune) + inField = !f(rune) if inField && !wasInField { n++ } @@ -219,12 +241,12 @@ func Fields(s []byte) [][]byte { fieldStart := -1 for i := 0; i <= len(s) && na < n; { rune, size := utf8.DecodeRune(s[i:]) - if fieldStart < 0 && size > 0 && !unicode.IsSpace(rune) { + if fieldStart < 0 && size > 0 && !f(rune) { fieldStart = i i += size continue } - if fieldStart >= 0 && (size == 0 || unicode.IsSpace(rune)) { + if fieldStart >= 0 && (size == 0 || f(rune)) { a[na] = s[fieldStart:i] na++ fieldStart = -1 @@ -337,6 +359,25 @@ func ToLower(s []byte) []byte { return Map(unicode.ToLower, s) } // ToTitle returns a copy of the byte array s with all Unicode letters mapped to their title case. func ToTitle(s []byte) []byte { return Map(unicode.ToTitle, s) } +// ToUpperSpecial returns a copy of the byte array s with all Unicode letters mapped to their +// upper case, giving priority to the special casing rules. +func ToUpperSpecial(_case unicode.SpecialCase, s []byte) []byte { + return Map(func(r int) int { return _case.ToUpper(r) }, s) +} + +// ToLowerSpecial returns a copy of the byte array s with all Unicode letters mapped to their +// lower case, giving priority to the special casing rules. +func ToLowerSpecial(_case unicode.SpecialCase, s []byte) []byte { + return Map(func(r int) int { return _case.ToLower(r) }, s) +} + +// ToTitleSpecial returns a copy of the byte array s with all Unicode letters mapped to their +// title case, giving priority to the special casing rules. +func ToTitleSpecial(_case unicode.SpecialCase, s []byte) []byte { + return Map(func(r int) int { return _case.ToTitle(r) }, s) +} + + // isSeparator reports whether the rune could mark a word boundary. // TODO: update when package unicode captures more of the properties. func isSeparator(rune int) bool { diff --git a/src/pkg/bytes/bytes_test.go b/src/pkg/bytes/bytes_test.go index b91ae5734d..de503878cd 100644 --- a/src/pkg/bytes/bytes_test.go +++ b/src/pkg/bytes/bytes_test.go @@ -125,6 +125,15 @@ var indexAnyTests = []BinOpTest{ BinOpTest{dots + dots + dots, " ", -1}, } +var indexRuneTests = []BinOpTest{ + BinOpTest{"", "a", -1}, + BinOpTest{"", "☺", -1}, + BinOpTest{"foo", "☹", -1}, + BinOpTest{"foo", "o", 1}, + BinOpTest{"foo☺bar", "☺", 3}, + BinOpTest{"foo☺☻☹bar", "☹", 9}, +} + // Execute f on each test case. funcName should be the name of f; it's used // in failure reports. func runIndexTests(t *testing.T, f func(s, sep []byte) int, funcName string, testCases []BinOpTest) { @@ -168,6 +177,17 @@ func TestIndexByte(t *testing.T) { } } +func TestIndexRune(t *testing.T) { + for _, tt := range indexRuneTests { + a := []byte(tt.a) + r, _ := utf8.DecodeRuneInString(tt.b) + pos := IndexRune(a, r) + if pos != tt.i { + t.Errorf(`IndexRune(%q, '%c') = %v`, tt.a, r, pos) + } + } +} + func BenchmarkIndexByte4K(b *testing.B) { bmIndex(b, IndexByte, 4<<10) } func BenchmarkIndexByte4M(b *testing.B) { bmIndex(b, IndexByte, 4<<20) } @@ -336,6 +356,23 @@ func TestFields(t *testing.T) { } } +func TestFieldsFunc(t *testing.T) { + pred := func(c int) bool { return c == 'X' } + var fieldsFuncTests = []FieldsTest{ + FieldsTest{"", []string{}}, + FieldsTest{"XX", []string{}}, + FieldsTest{"XXhiXXX", []string{"hi"}}, + FieldsTest{"aXXbXXXcX", []string{"a", "b", "c"}}, + } + for _, tt := range fieldsFuncTests { + a := FieldsFunc([]byte(tt.s), pred) + result := arrayOfString(a) + if !eq(result, tt.a) { + t.Errorf("FieldsFunc(%q) = %v, want %v", tt.s, a, tt.a) + } + } +} + // Test case for any function which accepts and returns a byte array. // For ease of creation, we write the byte arrays as strings. type StringTest struct { |
