diff options
| author | Martin Möhrmann <martisch@uos.de> | 2016-09-02 17:04:41 +0200 |
|---|---|---|
| committer | Brad Fitzpatrick <bradfitz@golang.org> | 2016-10-17 11:25:22 +0000 |
| commit | d2951740303587fc0c5d14cb5461e39b099e6695 (patch) | |
| tree | 97625f704787b103946792ad7475235744826c91 /src/unicode/utf8 | |
| parent | fe4307f0607dff7742d047b04df06e721aea7906 (diff) | |
| download | go-d2951740303587fc0c5d14cb5461e39b099e6695.tar.xz | |
runtime: speed up non-ASCII rune decoding
Copies utf8 constants and EncodeRune implementation from unicode/utf8.
Adds a new decoderune implementation that is used by the compiler
in code generated for ranging over strings. It does not handle
ASCII runes since these are handled directly before calls to decoderune.
The DecodeRuneInString implementation from unicode/utf8 is not used
since it uses a lookup table that would increase the use of cpu caches.
Adds more tests that check decoding of valid and invalid utf8 sequences.
name old time/op new time/op delta
RuneIterate/range2/ASCII-4 7.45ns ± 2% 7.45ns ± 1% ~ (p=0.634 n=16+16)
RuneIterate/range2/Japanese-4 53.5ns ± 1% 49.2ns ± 2% -8.03% (p=0.000 n=20+20)
RuneIterate/range2/MixedLength-4 46.3ns ± 1% 41.0ns ± 2% -11.57% (p=0.000 n=20+20)
new:
"".decoderune t=1 size=423 args=0x28 locals=0x0
old:
"".charntorune t=1 size=666 args=0x28 locals=0x0
Change-Id: I1df1fdb385bb9ea5e5e71b8818ea2bf5ce62de52
Reviewed-on: https://go-review.googlesource.com/28490
Run-TryBot: Martin Möhrmann <martisch@uos.de>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>
Diffstat (limited to 'src/unicode/utf8')
| -rw-r--r-- | src/unicode/utf8/utf8_test.go | 91 |
1 files changed, 91 insertions, 0 deletions
diff --git a/src/unicode/utf8/utf8_test.go b/src/unicode/utf8/utf8_test.go index 51571b61eb..dc9c4251bd 100644 --- a/src/unicode/utf8/utf8_test.go +++ b/src/unicode/utf8/utf8_test.go @@ -54,14 +54,18 @@ var utf8map = []Utf8Map{ {0x00ff, "\xc3\xbf"}, {0x0100, "\xc4\x80"}, {0x07ff, "\xdf\xbf"}, + {0x0400, "\xd0\x80"}, {0x0800, "\xe0\xa0\x80"}, {0x0801, "\xe0\xa0\x81"}, + {0x1000, "\xe1\x80\x80"}, + {0xd000, "\xed\x80\x80"}, {0xd7ff, "\xed\x9f\xbf"}, // last code point before surrogate half. {0xe000, "\xee\x80\x80"}, // first code point after surrogate half. {0xfffe, "\xef\xbf\xbe"}, {0xffff, "\xef\xbf\xbf"}, {0x10000, "\xf0\x90\x80\x80"}, {0x10001, "\xf0\x90\x80\x81"}, + {0x40000, "\xf1\x80\x80\x80"}, {0x10fffe, "\xf4\x8f\xbf\xbe"}, {0x10ffff, "\xf4\x8f\xbf\xbf"}, {0xFFFD, "\xef\xbf\xbd"}, @@ -228,6 +232,93 @@ func TestIntConversion(t *testing.T) { } } +var invalidSequenceTests = []string{ + "\xed\xa0\x80\x80", // surrogate min + "\xed\xbf\xbf\x80", // surrogate max + + // xx + "\x91\x80\x80\x80", + + // s1 + "\xC2\x7F\x80\x80", + "\xC2\xC0\x80\x80", + "\xDF\x7F\x80\x80", + "\xDF\xC0\x80\x80", + + // s2 + "\xE0\x9F\xBF\x80", + "\xE0\xA0\x7F\x80", + "\xE0\xBF\xC0\x80", + "\xE0\xC0\x80\x80", + + // s3 + "\xE1\x7F\xBF\x80", + "\xE1\x80\x7F\x80", + "\xE1\xBF\xC0\x80", + "\xE1\xC0\x80\x80", + + //s4 + "\xED\x7F\xBF\x80", + "\xED\x80\x7F\x80", + "\xED\x9F\xC0\x80", + "\xED\xA0\x80\x80", + + // s5 + "\xF0\x8F\xBF\xBF", + "\xF0\x90\x7F\xBF", + "\xF0\x90\x80\x7F", + "\xF0\xBF\xBF\xC0", + "\xF0\xBF\xC0\x80", + "\xF0\xC0\x80\x80", + + // s6 + "\xF1\x7F\xBF\xBF", + "\xF1\x80\x7F\xBF", + "\xF1\x80\x80\x7F", + "\xF1\xBF\xBF\xC0", + "\xF1\xBF\xC0\x80", + "\xF1\xC0\x80\x80", + + // s7 + "\xF4\x7F\xBF\xBF", + "\xF4\x80\x7F\xBF", + "\xF4\x80\x80\x7F", + "\xF4\x8F\xBF\xC0", + "\xF4\x8F\xC0\x80", + "\xF4\x90\x80\x80", +} + +func runtimeDecodeRune(s string) rune { + for _, r := range s { + return r + } + return -1 +} + +func TestDecodeInvalidSequence(t *testing.T) { + for _, s := range invalidSequenceTests { + r1, _ := DecodeRune([]byte(s)) + if want := RuneError; r1 != want { + t.Errorf("DecodeRune(%#x) = %#04x, want %#04x", s, r1, want) + return + } + r2, _ := DecodeRuneInString(s) + if want := RuneError; r2 != want { + t.Errorf("DecodeRuneInString(%q) = %#04x, want %#04x", s, r2, want) + return + } + if r1 != r2 { + t.Errorf("DecodeRune(%#x) = %#04x mismatch with DecodeRuneInString(%q) = %#04x", s, r1, s, r2) + return + } + r3 := runtimeDecodeRune(s) + if r2 != r3 { + t.Errorf("DecodeRuneInString(%q) = %#04x mismatch with runtime.decoderune(%q) = %#04x", s, r2, s, r3) + return + } + } +} + func testSequence(t *testing.T, s string) { type info struct { index int |
