2 files changed, 139 insertions, 12 deletions
diff --git a/src/bytes/bytes.go b/src/bytes/bytes.go
index e7931387aa..ef7294d805 100644
--- a/src/bytes/bytes.go
+++ b/src/bytes/bytes.go
@@ -183,6 +183,29 @@ func IndexAny(s []byte, chars string) int {
 		// Avoid scanning all of s.
 		return -1
 	}
+	if len(s) == 1 {
+		r := rune(s[0])
+		if r >= utf8.RuneSelf {
+			// search utf8.RuneError.
+			for _, r = range chars {
+				if r == utf8.RuneError {
+					return 0
+				}
+			}
+			return -1
+		}
+		if bytealg.IndexByteString(chars, s[0]) >= 0 {
+			return 0
+		}
+		return -1
+	}
+	if len(chars) == 1 {
+		r := rune(chars[0])
+		if r >= utf8.RuneSelf {
+			r = utf8.RuneError
+		}
+		return IndexRune(s, r)
+	}
 	if len(s) > 8 {
 		if as, isASCII := makeASCIISet(chars); isASCII {
 			for i, c := range s {
@@ -197,14 +220,26 @@ func IndexAny(s []byte, chars string) int {
 	for i := 0; i < len(s); i += width {
 		r := rune(s[i])
 		if r < utf8.RuneSelf {
+			if bytealg.IndexByteString(chars, s[i]) >= 0 {
+				return i
+			}
 			width = 1
-		} else {
-			r, width = utf8.DecodeRune(s[i:])
+			continue
 		}
-		for _, ch := range chars {
-			if r == ch {
-				return i
+		r, width = utf8.DecodeRune(s[i:])
+		if r == utf8.RuneError {
+			for _, r = range chars {
+				if r == utf8.RuneError {
+					return i
+				}
 			}
+			continue
+		}
+		// r is 2 to 4 bytes. Using strings.Index is more reasonable, but as the bytes
+		// package should not import the strings package, use bytealg.IndexString
+		// instead. And this does not seem to lose much performance.
+		if chars == string(r) || bytealg.IndexString(chars, string(r)) >= 0 {
+			return i
 		}
 	}
 	return -1
@@ -229,13 +264,59 @@ func LastIndexAny(s []byte, chars string) int {
 			return -1
 		}
 	}
+	if len(s) == 1 {
+		r := rune(s[0])
+		if r >= utf8.RuneSelf {
+			for _, r = range chars {
+				if r == utf8.RuneError {
+					return 0
+				}
+			}
+			return -1
+		}
+		if bytealg.IndexByteString(chars, s[0]) >= 0 {
+			return 0
+		}
+		return -1
+	}
+	if len(chars) == 1 {
+		cr := rune(chars[0])
+		if cr >= utf8.RuneSelf {
+			cr = utf8.RuneError
+		}
+		for i := len(s); i > 0; {
+			r, size := utf8.DecodeLastRune(s[:i])
+			i -= size
+			if r == cr {
+				return i
+			}
+		}
+		return -1
+	}
 	for i := len(s); i > 0; {
+		r := rune(s[i-1])
+		if r < utf8.RuneSelf {
+			if bytealg.IndexByteString(chars, s[i-1]) >= 0 {
+				return i - 1
+			}
+			i--
+			continue
+		}
 		r, size := utf8.DecodeLastRune(s[:i])
 		i -= size
-		for _, c := range chars {
-			if r == c {
-				return i
+		if r == utf8.RuneError {
+			for _, r = range chars {
+				if r == utf8.RuneError {
+					return i
+				}
 			}
+			continue
+		}
+		// r is 2 to 4 bytes. Using strings.Index is more reasonable, but as the bytes
+		// package should not import the strings package, use bytealg.IndexString
+		// instead. And this does not seem to lose much performance.
+		if chars == string(r) || bytealg.IndexString(chars, string(r)) >= 0 {
+			return i
 		}
 	}
 	return -1
diff --git a/src/bytes/bytes_test.go b/src/bytes/bytes_test.go
index a208d4ed76..544ee46f90 100644
--- a/src/bytes/bytes_test.go
+++ b/src/bytes/bytes_test.go
@@ -169,6 +169,7 @@ var indexAnyTests = []BinOpTest{
 	{"", "abc", -1},
 	{"a", "", -1},
 	{"a", "a", 0},
+	{"\x80", "\xffb", 0},
 	{"aaa", "a", 0},
 	{"abc", "xyz", -1},
 	{"abc", "xcz", 2},
@@ -179,6 +180,7 @@ var indexAnyTests = []BinOpTest{
 	{dots + dots + dots, " ", -1},
 	{"012abcba210", "\xffb", 4},
 	{"012\x80bcb\x80210", "\xffb", 3},
+	{"0123456\xcf\x80abc", "\xcfb\x80", 10},
 }
 
 var lastIndexAnyTests = []BinOpTest{
@@ -187,6 +189,7 @@ var lastIndexAnyTests = []BinOpTest{
 	{"", "abc", -1},
 	{"a", "", -1},
 	{"a", "a", 0},
+	{"\x80", "\xffb", 0},
 	{"aaa", "a", 2},
 	{"abc", "xyz", -1},
 	{"abc", "ab", 1},
@@ -197,6 +200,7 @@ var lastIndexAnyTests = []BinOpTest{
 	{dots + dots + dots, " ", -1},
 	{"012abcba210", "\xffb", 6},
 	{"012\x80bcb\x80210", "\xffb", 7},
+	{"0123456\xcf\x80abc", "\xcfb\x80", 10},
 }
 
 // Execute f on each test case.  funcName should be the name of f; it's used
@@ -1890,10 +1894,24 @@ func BenchmarkBytesCompare(b *testing.B) {
 }
 
 func BenchmarkIndexAnyASCII(b *testing.B) {
-	x := Repeat([]byte{'#'}, 4096) // Never matches set
-	cs := "0123456789abcdef"
-	for k := 1; k <= 4096; k <<= 4 {
-		for j := 1; j <= 16; j <<= 1 {
+	x := Repeat([]byte{'#'}, 2048) // Never matches set
+	cs := "0123456789abcdefghijklmnopqrstuvwxyz0123456789abcdefghijklmnopqrstuvwxyz"
+	for k := 1; k <= 2048; k <<= 4 {
+		for j := 1; j <= 64; j <<= 1 {
+			b.Run(fmt.Sprintf("%d:%d", k, j), func(b *testing.B) {
+				for i := 0; i < b.N; i++ {
+					IndexAny(x[:k], cs[:j])
+				}
+			})
+		}
+	}
+}
+
+func BenchmarkIndexAnyUTF8(b *testing.B) {
+	x := Repeat([]byte{'#'}, 2048) // Never matches set
+	cs := "你好世界, hello world. 你好世界, hello world. 你好世界, hello world."
+	for k := 1; k <= 2048; k <<= 4 {
+		for j := 1; j <= 64; j <<= 1 {
 			b.Run(fmt.Sprintf("%d:%d", k, j), func(b *testing.B) {
 				for i := 0; i < b.N; i++ {
 					IndexAny(x[:k], cs[:j])
@@ -1903,6 +1921,34 @@ func BenchmarkIndexAnyASCII(b *testing.B) {
 	}
 }
 
+func BenchmarkLastIndexAnyASCII(b *testing.B) {
+	x := Repeat([]byte{'#'}, 2048) // Never matches set
+	cs := "0123456789abcdefghijklmnopqrstuvwxyz0123456789abcdefghijklmnopqrstuvwxyz"
+	for k := 1; k <= 2048; k <<= 4 {
+		for j := 1; j <= 64; j <<= 1 {
+			b.Run(fmt.Sprintf("%d:%d", k, j), func(b *testing.B) {
+				for i := 0; i < b.N; i++ {
+					LastIndexAny(x[:k], cs[:j])
+				}
+			})
+		}
+	}
+}
+
+func BenchmarkLastIndexAnyUTF8(b *testing.B) {
+	x := Repeat([]byte{'#'}, 2048) // Never matches set
+	cs := "你好世界, hello world. 你好世界, hello world. 你好世界, hello world."
+	for k := 1; k <= 2048; k <<= 4 {
+		for j := 1; j <= 64; j <<= 1 {
+			b.Run(fmt.Sprintf("%d:%d", k, j), func(b *testing.B) {
+				for i := 0; i < b.N; i++ {
+					LastIndexAny(x[:k], cs[:j])
+				}
+			})
+		}
+	}
+}
+
 func BenchmarkTrimASCII(b *testing.B) {
 	cs := "0123456789abcdef"
 	for k := 1; k <= 4096; k <<= 4 {