diff options
| author | Joe Tsai <joetsai@digital-static.net> | 2016-10-26 14:18:37 -0700 |
|---|---|---|
| committer | Joe Tsai <thebrokentoaster@gmail.com> | 2016-10-26 23:02:27 +0000 |
| commit | 4b2665786ec13c82ab751cd2d4312772b80cef12 (patch) | |
| tree | b3e0cc0d9677b3bc8400c6280e054eb144ecb1cb /src/bytes/bytes_test.go | |
| parent | 4f1e7be51f401a5374c0def2df0773abc924b03c (diff) | |
| download | go-4b2665786ec13c82ab751cd2d4312772b80cef12.tar.xz | |
bytes, strings: fix regression in IndexRune
In all previous versions of Go, the behavior of IndexRune(s, r)
where r was utf.RuneError was that it would effectively return the
index of any invalid UTF-8 byte sequence (include RuneError).
Optimizations made in http://golang.org/cl/28537 and
http://golang.org/cl/28546 altered this undocumented behavior such
that RuneError would only match on the RuneError rune itself.
Although, the new behavior is arguably reasonable, it did break code
that depended on the previous behavior. Thus, we add special checks
to ensure that we preserve the old behavior.
There is a slight performance hit for correctness:
benchmark old ns/op new ns/op delta
BenchmarkIndexRune/10-4 19.3 21.6 +11.92%
BenchmarkIndexRune/32-4 33.6 35.2 +4.76%
This only occurs on small strings. The performance hit for larger strings
is neglible and not shown.
Fixes #17611
Change-Id: I1d863a741213d46c40b2e1724c41245df52502a5
Reviewed-on: https://go-review.googlesource.com/32123
Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>
Diffstat (limited to 'src/bytes/bytes_test.go')
| -rw-r--r-- | src/bytes/bytes_test.go | 50 |
1 files changed, 33 insertions, 17 deletions
diff --git a/src/bytes/bytes_test.go b/src/bytes/bytes_test.go index 91f87bbc1c..146dc42b0d 100644 --- a/src/bytes/bytes_test.go +++ b/src/bytes/bytes_test.go @@ -185,15 +185,6 @@ var lastIndexAnyTests = []BinOpTest{ {dots + dots + dots, " ", -1}, } -var indexRuneTests = []BinOpTest{ - {"", "a", -1}, - {"", "☺", -1}, - {"foo", "☹", -1}, - {"foo", "o", 1}, - {"foo☺bar", "☺", 3}, - {"foo☺☻☹bar", "☹", 9}, -} - // Execute f on each test case. funcName should be the name of f; it's used // in failure reports. func runIndexTests(t *testing.T, f func(s, sep []byte) int, funcName string, testCases []BinOpTest) { @@ -348,17 +339,42 @@ func TestIndexByteSmall(t *testing.T) { } func TestIndexRune(t *testing.T) { - for _, tt := range indexRuneTests { - a := []byte(tt.a) - r, _ := utf8.DecodeRuneInString(tt.b) - pos := IndexRune(a, r) - if pos != tt.i { - t.Errorf(`IndexRune(%q, '%c') = %v`, tt.a, r, pos) + tests := []struct { + in string + rune rune + want int + }{ + {"", 'a', -1}, + {"", '☺', -1}, + {"foo", '☹', -1}, + {"foo", 'o', 1}, + {"foo☺bar", '☺', 3}, + {"foo☺☻☹bar", '☹', 9}, + {"a A x", 'A', 2}, + {"some_text=some_value", '=', 9}, + {"☺a", 'a', 3}, + {"a☻☺b", '☺', 4}, + + // RuneError should match any invalid UTF-8 byte sequence. + {"�", '�', 0}, + {"\xff", '�', 0}, + {"☻x�", '�', len("☻x")}, + {"☻x\xe2\x98", '�', len("☻x")}, + {"☻x\xe2\x98�", '�', len("☻x")}, + {"☻x\xe2\x98x", '�', len("☻x")}, + + // Invalid rune values should never match. + {"a☺b☻c☹d\xe2\x98�\xff�\xed\xa0\x80", -1, -1}, + {"a☺b☻c☹d\xe2\x98�\xff�\xed\xa0\x80", 0xD800, -1}, // Surrogate pair + {"a☺b☻c☹d\xe2\x98�\xff�\xed\xa0\x80", utf8.MaxRune + 1, -1}, + } + for _, tt := range tests { + if got := IndexRune([]byte(tt.in), tt.rune); got != tt.want { + t.Errorf("IndexRune(%q, %d) = %v; want %v", tt.in, tt.rune, got, tt.want) } } haystack := []byte("test世界") - allocs := testing.AllocsPerRun(1000, func() { if i := IndexRune(haystack, 's'); i != 2 { t.Fatalf("'s' at %d; want 2", i) @@ -368,7 +384,7 @@ func TestIndexRune(t *testing.T) { } }) if allocs != 0 { - t.Errorf(`expected no allocations, got %f`, allocs) + t.Errorf("expected no allocations, got %f", allocs) } } |
