diff options
| author | Josselin Costanzi <josselin@costanzi.fr> | 2017-03-19 12:18:08 +0100 |
|---|---|---|
| committer | Keith Randall <khr@golang.org> | 2017-03-21 20:25:17 +0000 |
| commit | 01cd22c68792b659ca0912c104b14c86044110cb (patch) | |
| tree | fc8ebc67f4e8a65b19d7c9f66ac66e265010176b /src/bytes/bytes_test.go | |
| parent | 0ebaca6ba27534add5930a95acffa9acff182e2b (diff) | |
| download | go-01cd22c68792b659ca0912c104b14c86044110cb.tar.xz | |
bytes: add optimized countByte for amd64
Use SSE/AVX2 when counting a single byte.
Inspired from runtime indexbyte implementation.
Benchmark against previous implementation, where
1 byte in every 8 is the one we are looking for:
* On a machine without AVX2
name old time/op new time/op delta
CountSingle/10-4 61.8ns ±10% 15.6ns ±11% -74.83% (p=0.000 n=10+10)
CountSingle/32-4 100ns ± 4% 17ns ±10% -82.54% (p=0.000 n=10+9)
CountSingle/4K-4 9.66µs ± 3% 0.37µs ± 6% -96.21% (p=0.000 n=10+10)
CountSingle/4M-4 11.0ms ± 6% 0.4ms ± 4% -96.04% (p=0.000 n=10+10)
CountSingle/64M-4 194ms ± 8% 8ms ± 2% -95.64% (p=0.000 n=10+10)
name old speed new speed delta
CountSingle/10-4 162MB/s ±10% 645MB/s ±10% +297.00% (p=0.000 n=10+10)
CountSingle/32-4 321MB/s ± 5% 1844MB/s ± 9% +474.79% (p=0.000 n=10+9)
CountSingle/4K-4 424MB/s ± 3% 11169MB/s ± 6% +2533.10% (p=0.000 n=10+10)
CountSingle/4M-4 381MB/s ± 7% 9609MB/s ± 4% +2421.88% (p=0.000 n=10+10)
CountSingle/64M-4 346MB/s ± 7% 7924MB/s ± 2% +2188.78% (p=0.000 n=10+10)
* On a machine with AVX2
name old time/op new time/op delta
CountSingle/10-8 37.1ns ± 3% 8.2ns ± 1% -77.80% (p=0.000 n=10+10)
CountSingle/32-8 66.1ns ± 3% 9.8ns ± 2% -85.23% (p=0.000 n=10+10)
CountSingle/4K-8 7.36µs ± 3% 0.11µs ± 1% -98.54% (p=0.000 n=10+10)
CountSingle/4M-8 7.46ms ± 2% 0.15ms ± 2% -97.95% (p=0.000 n=10+9)
CountSingle/64M-8 124ms ± 2% 6ms ± 4% -95.09% (p=0.000 n=10+10)
name old speed new speed delta
CountSingle/10-8 269MB/s ± 3% 1213MB/s ± 1% +350.32% (p=0.000 n=10+10)
CountSingle/32-8 484MB/s ± 4% 3277MB/s ± 2% +576.66% (p=0.000 n=10+10)
CountSingle/4K-8 556MB/s ± 3% 37933MB/s ± 1% +6718.36% (p=0.000 n=10+10)
CountSingle/4M-8 562MB/s ± 2% 27444MB/s ± 3% +4783.43% (p=0.000 n=10+9)
CountSingle/64M-8 543MB/s ± 2% 11054MB/s ± 3% +1935.81% (p=0.000 n=10+10)
Fixes #19411
Change-Id: Ieaf20b1fabccabe767c55c66e242e86f3617f883
Reviewed-on: https://go-review.googlesource.com/38258
Run-TryBot: Brad Fitzpatrick <bradfitz@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
Diffstat (limited to 'src/bytes/bytes_test.go')
| -rw-r--r-- | src/bytes/bytes_test.go | 93 |
1 files changed, 93 insertions, 0 deletions
diff --git a/src/bytes/bytes_test.go b/src/bytes/bytes_test.go index dd8bdf2b04..ca0cdbb7c9 100644 --- a/src/bytes/bytes_test.go +++ b/src/bytes/bytes_test.go @@ -396,6 +396,79 @@ func TestIndexRune(t *testing.T) { } } +// test count of a single byte across page offsets +func TestCountByte(t *testing.T) { + b := make([]byte, 5015) // bigger than a page + windows := []int{1, 2, 3, 4, 15, 16, 17, 31, 32, 33, 63, 64, 65, 128} + testCountWindow := func(i, window int) { + for j := 0; j < window; j++ { + b[i+j] = byte(100) + p := Count(b[i:i+window], []byte{100}) + if p != j+1 { + t.Errorf("TestCountByte.Count(%q, 100) = %d", b[i:i+window], p) + } + pGeneric := CountGeneric(b[i:i+window], []byte{100}) + if pGeneric != j+1 { + t.Errorf("TestCountByte.CountGeneric(%q, 100) = %d", b[i:i+window], p) + } + } + } + + maxWnd := windows[len(windows)-1] + + for i := 0; i <= 2*maxWnd; i++ { + for _, window := range windows { + if window > len(b[i:]) { + window = len(b[i:]) + } + testCountWindow(i, window) + for j := 0; j < window; j++ { + b[i+j] = byte(0) + } + } + } + for i := 4096 - (maxWnd + 1); i < len(b); i++ { + for _, window := range windows { + if window > len(b[i:]) { + window = len(b[i:]) + } + testCountWindow(i, window) + for j := 0; j < window; j++ { + b[i+j] = byte(0) + } + } + } +} + +// Make sure we don't count bytes outside our window +func TestCountByteNoMatch(t *testing.T) { + b := make([]byte, 5015) + windows := []int{1, 2, 3, 4, 15, 16, 17, 31, 32, 33, 63, 64, 65, 128} + for i := 0; i <= len(b); i++ { + for _, window := range windows { + if window > len(b[i:]) { + window = len(b[i:]) + } + // Fill the window with non-match + for j := 0; j < window; j++ { + b[i+j] = byte(100) + } + // Try to find something that doesn't exist + p := Count(b[i:i+window], []byte{0}) + if p != 0 { + t.Errorf("TestCountByteNoMatch(%q, 0) = %d", b[i:i+window], p) + } + pGeneric := CountGeneric(b[i:i+window], []byte{0}) + if pGeneric != 0 { + t.Errorf("TestCountByteNoMatch.CountGeneric(%q, 100) = %d", b[i:i+window], p) + } + for j := 0; j < window; j++ { + b[i+j] = byte(0) + } + } + } +} + var bmbuf []byte func valName(x int) string { @@ -589,6 +662,26 @@ func BenchmarkCountEasy(b *testing.B) { }) } +func BenchmarkCountSingle(b *testing.B) { + benchBytes(b, indexSizes, func(b *testing.B, n int) { + buf := bmbuf[0:n] + step := 8 + for i := 0; i < len(buf); i += step { + buf[i] = 1 + } + expect := (len(buf) + (step - 1)) / step + for i := 0; i < b.N; i++ { + j := Count(buf, []byte{1}) + if j != expect { + b.Fatal("bad count", j, expect) + } + } + for i := 0; i < len(buf); i++ { + buf[i] = 0 + } + }) +} + type ExplodeTest struct { s string n int |
