aboutsummaryrefslogtreecommitdiff
path: root/src/bytes
diff options
context:
space:
mode:
Diffstat (limited to 'src/bytes')
-rw-r--r--src/bytes/bytes.go97
-rw-r--r--src/bytes/bytes_test.go54
2 files changed, 139 insertions, 12 deletions
diff --git a/src/bytes/bytes.go b/src/bytes/bytes.go
index e7931387aa..ef7294d805 100644
--- a/src/bytes/bytes.go
+++ b/src/bytes/bytes.go
@@ -183,6 +183,29 @@ func IndexAny(s []byte, chars string) int {
// Avoid scanning all of s.
return -1
}
+ if len(s) == 1 {
+ r := rune(s[0])
+ if r >= utf8.RuneSelf {
+ // search utf8.RuneError.
+ for _, r = range chars {
+ if r == utf8.RuneError {
+ return 0
+ }
+ }
+ return -1
+ }
+ if bytealg.IndexByteString(chars, s[0]) >= 0 {
+ return 0
+ }
+ return -1
+ }
+ if len(chars) == 1 {
+ r := rune(chars[0])
+ if r >= utf8.RuneSelf {
+ r = utf8.RuneError
+ }
+ return IndexRune(s, r)
+ }
if len(s) > 8 {
if as, isASCII := makeASCIISet(chars); isASCII {
for i, c := range s {
@@ -197,14 +220,26 @@ func IndexAny(s []byte, chars string) int {
for i := 0; i < len(s); i += width {
r := rune(s[i])
if r < utf8.RuneSelf {
+ if bytealg.IndexByteString(chars, s[i]) >= 0 {
+ return i
+ }
width = 1
- } else {
- r, width = utf8.DecodeRune(s[i:])
+ continue
}
- for _, ch := range chars {
- if r == ch {
- return i
+ r, width = utf8.DecodeRune(s[i:])
+ if r == utf8.RuneError {
+ for _, r = range chars {
+ if r == utf8.RuneError {
+ return i
+ }
}
+ continue
+ }
+ // r is 2 to 4 bytes. Using strings.Index is more reasonable, but as the bytes
+ // package should not import the strings package, use bytealg.IndexString
+ // instead. And this does not seem to lose much performance.
+ if chars == string(r) || bytealg.IndexString(chars, string(r)) >= 0 {
+ return i
}
}
return -1
@@ -229,13 +264,59 @@ func LastIndexAny(s []byte, chars string) int {
return -1
}
}
+ if len(s) == 1 {
+ r := rune(s[0])
+ if r >= utf8.RuneSelf {
+ for _, r = range chars {
+ if r == utf8.RuneError {
+ return 0
+ }
+ }
+ return -1
+ }
+ if bytealg.IndexByteString(chars, s[0]) >= 0 {
+ return 0
+ }
+ return -1
+ }
+ if len(chars) == 1 {
+ cr := rune(chars[0])
+ if cr >= utf8.RuneSelf {
+ cr = utf8.RuneError
+ }
+ for i := len(s); i > 0; {
+ r, size := utf8.DecodeLastRune(s[:i])
+ i -= size
+ if r == cr {
+ return i
+ }
+ }
+ return -1
+ }
for i := len(s); i > 0; {
+ r := rune(s[i-1])
+ if r < utf8.RuneSelf {
+ if bytealg.IndexByteString(chars, s[i-1]) >= 0 {
+ return i - 1
+ }
+ i--
+ continue
+ }
r, size := utf8.DecodeLastRune(s[:i])
i -= size
- for _, c := range chars {
- if r == c {
- return i
+ if r == utf8.RuneError {
+ for _, r = range chars {
+ if r == utf8.RuneError {
+ return i
+ }
}
+ continue
+ }
+ // r is 2 to 4 bytes. Using strings.Index is more reasonable, but as the bytes
+ // package should not import the strings package, use bytealg.IndexString
+ // instead. And this does not seem to lose much performance.
+ if chars == string(r) || bytealg.IndexString(chars, string(r)) >= 0 {
+ return i
}
}
return -1
diff --git a/src/bytes/bytes_test.go b/src/bytes/bytes_test.go
index a208d4ed76..544ee46f90 100644
--- a/src/bytes/bytes_test.go
+++ b/src/bytes/bytes_test.go
@@ -169,6 +169,7 @@ var indexAnyTests = []BinOpTest{
{"", "abc", -1},
{"a", "", -1},
{"a", "a", 0},
+ {"\x80", "\xffb", 0},
{"aaa", "a", 0},
{"abc", "xyz", -1},
{"abc", "xcz", 2},
@@ -179,6 +180,7 @@ var indexAnyTests = []BinOpTest{
{dots + dots + dots, " ", -1},
{"012abcba210", "\xffb", 4},
{"012\x80bcb\x80210", "\xffb", 3},
+ {"0123456\xcf\x80abc", "\xcfb\x80", 10},
}
var lastIndexAnyTests = []BinOpTest{
@@ -187,6 +189,7 @@ var lastIndexAnyTests = []BinOpTest{
{"", "abc", -1},
{"a", "", -1},
{"a", "a", 0},
+ {"\x80", "\xffb", 0},
{"aaa", "a", 2},
{"abc", "xyz", -1},
{"abc", "ab", 1},
@@ -197,6 +200,7 @@ var lastIndexAnyTests = []BinOpTest{
{dots + dots + dots, " ", -1},
{"012abcba210", "\xffb", 6},
{"012\x80bcb\x80210", "\xffb", 7},
+ {"0123456\xcf\x80abc", "\xcfb\x80", 10},
}
// Execute f on each test case. funcName should be the name of f; it's used
@@ -1890,10 +1894,24 @@ func BenchmarkBytesCompare(b *testing.B) {
}
func BenchmarkIndexAnyASCII(b *testing.B) {
- x := Repeat([]byte{'#'}, 4096) // Never matches set
- cs := "0123456789abcdef"
- for k := 1; k <= 4096; k <<= 4 {
- for j := 1; j <= 16; j <<= 1 {
+ x := Repeat([]byte{'#'}, 2048) // Never matches set
+ cs := "0123456789abcdefghijklmnopqrstuvwxyz0123456789abcdefghijklmnopqrstuvwxyz"
+ for k := 1; k <= 2048; k <<= 4 {
+ for j := 1; j <= 64; j <<= 1 {
+ b.Run(fmt.Sprintf("%d:%d", k, j), func(b *testing.B) {
+ for i := 0; i < b.N; i++ {
+ IndexAny(x[:k], cs[:j])
+ }
+ })
+ }
+ }
+}
+
+func BenchmarkIndexAnyUTF8(b *testing.B) {
+ x := Repeat([]byte{'#'}, 2048) // Never matches set
+ cs := "你好世界, hello world. 你好世界, hello world. 你好世界, hello world."
+ for k := 1; k <= 2048; k <<= 4 {
+ for j := 1; j <= 64; j <<= 1 {
b.Run(fmt.Sprintf("%d:%d", k, j), func(b *testing.B) {
for i := 0; i < b.N; i++ {
IndexAny(x[:k], cs[:j])
@@ -1903,6 +1921,34 @@ func BenchmarkIndexAnyASCII(b *testing.B) {
}
}
+func BenchmarkLastIndexAnyASCII(b *testing.B) {
+ x := Repeat([]byte{'#'}, 2048) // Never matches set
+ cs := "0123456789abcdefghijklmnopqrstuvwxyz0123456789abcdefghijklmnopqrstuvwxyz"
+ for k := 1; k <= 2048; k <<= 4 {
+ for j := 1; j <= 64; j <<= 1 {
+ b.Run(fmt.Sprintf("%d:%d", k, j), func(b *testing.B) {
+ for i := 0; i < b.N; i++ {
+ LastIndexAny(x[:k], cs[:j])
+ }
+ })
+ }
+ }
+}
+
+func BenchmarkLastIndexAnyUTF8(b *testing.B) {
+ x := Repeat([]byte{'#'}, 2048) // Never matches set
+ cs := "你好世界, hello world. 你好世界, hello world. 你好世界, hello world."
+ for k := 1; k <= 2048; k <<= 4 {
+ for j := 1; j <= 64; j <<= 1 {
+ b.Run(fmt.Sprintf("%d:%d", k, j), func(b *testing.B) {
+ for i := 0; i < b.N; i++ {
+ LastIndexAny(x[:k], cs[:j])
+ }
+ })
+ }
+ }
+}
+
func BenchmarkTrimASCII(b *testing.B) {
cs := "0123456789abcdef"
for k := 1; k <= 4096; k <<= 4 {