From 9c2be4c22d78cebc52458ba7298a470a3be0cdce Mon Sep 17 00:00:00 2001 From: Iskander Sharipov Date: Thu, 6 Sep 2018 13:28:17 +0300 Subject: bytes: remove bootstrap array from Buffer MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Rationale: small buffer optimization does not work and it has made things slower since 2014. Until we can make it work, we should prefer simpler code that also turns out to be more efficient. With this change, it's possible to use NewBuffer(make([]byte, 0, bootstrapSize)) to get the desired stack-allocated initial buffer since escape analysis can prove the created slice to be non-escaping. New implementation key points: - Zero value bytes.Buffer performs better than before - You can have a truly stack-allocated buffer, and it's not even limited to 64 bytes - The unsafe.Sizeof(bytes.Buffer{}) is reduced significantly - Empty writes don't cause allocations Buffer benchmarks from bytes package: name old time/op new time/op delta ReadString-8 9.20µs ± 1% 9.22µs ± 1% ~ (p=0.148 n=10+10) WriteByte-8 28.1µs ± 0% 26.2µs ± 0% -6.78% (p=0.000 n=10+10) WriteRune-8 64.9µs ± 0% 65.0µs ± 0% +0.16% (p=0.000 n=10+10) BufferNotEmptyWriteRead-8 469µs ± 0% 461µs ± 0% -1.76% (p=0.000 n=9+10) BufferFullSmallReads-8 108µs ± 0% 108µs ± 0% -0.21% (p=0.000 n=10+10) name old speed new speed delta ReadString-8 3.56GB/s ± 1% 3.55GB/s ± 1% ~ (p=0.165 n=10+10) WriteByte-8 146MB/s ± 0% 156MB/s ± 0% +7.26% (p=0.000 n=9+10) WriteRune-8 189MB/s ± 0% 189MB/s ± 0% -0.16% (p=0.000 n=10+10) name old alloc/op new alloc/op delta ReadString-8 32.8kB ± 0% 32.8kB ± 0% ~ (all equal) WriteByte-8 0.00B 0.00B ~ (all equal) WriteRune-8 0.00B 0.00B ~ (all equal) BufferNotEmptyWriteRead-8 4.72kB ± 0% 4.67kB ± 0% -1.02% (p=0.000 n=10+10) BufferFullSmallReads-8 3.44kB ± 0% 3.33kB ± 0% -3.26% (p=0.000 n=10+10) name old allocs/op new allocs/op delta ReadString-8 1.00 ± 0% 1.00 ± 0% ~ (all equal) WriteByte-8 0.00 0.00 ~ (all equal) WriteRune-8 0.00 0.00 ~ (all equal) BufferNotEmptyWriteRead-8 3.00 ± 0% 3.00 ± 0% ~ (all equal) BufferFullSmallReads-8 3.00 ± 0% 2.00 ± 0% -33.33% (p=0.000 n=10+10) The most notable thing in go1 benchmarks is reduced allocs in HTTPClientServer (-1 alloc): HTTPClientServer-8 64.0 ± 0% 63.0 ± 0% -1.56% (p=0.000 n=10+10) For more explanations and benchmarks see the referenced issue. Updates #7921 Change-Id: Ica0bf85e1b70fb4f5dc4f6a61045e2cf4ef72aa3 Reviewed-on: https://go-review.googlesource.com/133715 Reviewed-by: Martin Möhrmann Reviewed-by: Robert Griesemer Reviewed-by: Brad Fitzpatrick --- src/bytes/buffer.go | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) (limited to 'src/bytes') diff --git a/src/bytes/buffer.go b/src/bytes/buffer.go index 14c5bc38d6..087cc0e427 100644 --- a/src/bytes/buffer.go +++ b/src/bytes/buffer.go @@ -12,13 +12,15 @@ import ( "unicode/utf8" ) +// smallBufferSize is an initial allocation minimal capacity. +const smallBufferSize = 64 + // A Buffer is a variable-sized buffer of bytes with Read and Write methods. // The zero value for Buffer is an empty buffer ready to use. type Buffer struct { - buf []byte // contents are the bytes buf[off : len(buf)] - off int // read at &buf[off], write at &buf[len(buf)] - bootstrap [64]byte // memory to hold first slice; helps small buffers avoid allocation. - lastRead readOp // last read operation, so that Unread* can work correctly. + buf []byte // contents are the bytes buf[off : len(buf)] + off int // read at &buf[off], write at &buf[len(buf)] + lastRead readOp // last read operation, so that Unread* can work correctly. // FIXME: it would be advisable to align Buffer to cachelines to avoid false // sharing. @@ -125,9 +127,8 @@ func (b *Buffer) grow(n int) int { if i, ok := b.tryGrowByReslice(n); ok { return i } - // Check if we can make use of bootstrap array. - if b.buf == nil && n <= len(b.bootstrap) { - b.buf = b.bootstrap[:n] + if b.buf == nil && n <= smallBufferSize { + b.buf = make([]byte, n, smallBufferSize) return 0 } c := cap(b.buf) -- cgit v1.3 From 691f5c34ad7b7d676644dd749bc4ddb5d20b90d0 Mon Sep 17 00:00:00 2001 From: erifan01 Date: Thu, 19 Jul 2018 09:55:17 +0000 Subject: strings, bytes: optimize function Index This change compares the first two characters instead of the first one, and if they match, the entire string is compared. Comparing the first two characters helps to filter out the case where the first character matches but the subsequent characters do not match, thereby improving the substring search speed in this case. Benchmarks with no effect or minimal impact (less than 5%) is not listed, the following are improved benchmarks: On arm64: strings: IndexPeriodic/IndexPeriodic16-8 172890.00ns +- 2% 124156.20ns +- 0% -28.19% (p=0.008 n=5+5) IndexPeriodic/IndexPeriodic32-8 78092.80ns +- 0% 65138.60ns +- 0% -16.59% (p=0.008 n=5+5) IndexPeriodic/IndexPeriodic64-8 42322.20ns +- 0% 34661.60ns +- 0% -18.10% (p=0.008 n=5+5) bytes: IndexPeriodic/IndexPeriodic16-8 183468.20ns +- 6% 123759.00ns +- 0% -32.54% (p=0.008 n=5+5) IndexPeriodic/IndexPeriodic32-8 84776.40ns +- 0% 63907.80ns +- 0% -24.62% (p=0.008 n=5+5) IndexPeriodic/IndexPeriodic64-8 45835.60ns +- 0% 34194.20ns +- 0% -25.40% (p=0.008 n=5+5) On amd64: strings: IndexPeriodic/IndexPeriodic8-16 219499.00ns +- 0% 178123.40ns +- 0% -18.85% (p=0.008 n=5+5) IndexPeriodic/IndexPeriodic16-16 109760.20ns +- 0% 88957.80ns +- 0% -18.95% (p=0.008 n=5+5) IndexPeriodic/IndexPeriodic32-16 54943.00ns +- 0% 44573.80ns +- 0% -18.87% (p=0.008 n=5+5) IndexPeriodic/IndexPeriodic64-16 29804.80ns +- 0% 24417.80ns +- 0% -18.07% (p=0.008 n=5+5) bytes: IndexPeriodic/IndexPeriodic8-16 226592.60ns +- 0% 181183.20ns +- 0% -20.04% (p=0.008 n=5+5) IndexPeriodic/IndexPeriodic16-16 111432.60ns +- 0% 90634.60ns +- 0% -18.66% (p=0.008 n=5+5) IndexPeriodic/IndexPeriodic32-16 55640.60ns +- 0% 45433.00ns +- 0% -18.35% (p=0.008 n=5+5) IndexPeriodic/IndexPeriodic64-16 30833.00ns +- 0% 24784.20ns +- 0% -19.62% (p=0.008 n=5+5) Change-Id: I2d9e7e138d29e960d20a203eb74dc2ec976a9d71 Reviewed-on: https://go-review.googlesource.com/131177 Run-TryBot: Ian Lance Taylor TryBot-Result: Gobot Gobot Reviewed-by: Ian Lance Taylor --- src/bytes/bytes.go | 28 +++++++++++++++------------- src/strings/strings.go | 28 +++++++++++++++------------- 2 files changed, 30 insertions(+), 26 deletions(-) (limited to 'src/bytes') diff --git a/src/bytes/bytes.go b/src/bytes/bytes.go index 77a7ce98e0..876fa3c1ed 100644 --- a/src/bytes/bytes.go +++ b/src/bytes/bytes.go @@ -849,21 +849,22 @@ func Index(s, sep []byte) int { if len(s) <= bytealg.MaxBruteForce { return bytealg.Index(s, sep) } - c := sep[0] + c0 := sep[0] + c1 := sep[1] i := 0 - t := s[:len(s)-n+1] + t := len(s) - n + 1 fails := 0 - for i < len(t) { - if t[i] != c { + for i < t { + if s[i] != c0 { // IndexByte is faster than bytealg.Index, so use it as long as // we're not getting lots of false positives. - o := IndexByte(t[i:], c) + o := IndexByte(s[i:t], c0) if o < 0 { return -1 } i += o } - if Equal(s[i:i+n], sep) { + if s[i+1] == c1 && Equal(s[i:i+n], sep) { return i } fails++ @@ -879,24 +880,25 @@ func Index(s, sep []byte) int { } return -1 } - c := sep[0] + c0 := sep[0] + c1 := sep[1] i := 0 fails := 0 - t := s[:len(s)-n+1] - for i < len(t) { - if t[i] != c { - o := IndexByte(t[i:], c) + t := len(s) - n + 1 + for i < t { + if s[i] != c0 { + o := IndexByte(s[i:t], c0) if o < 0 { break } i += o } - if Equal(s[i:i+n], sep) { + if s[i+1] == c1 && Equal(s[i:i+n], sep) { return i } i++ fails++ - if fails >= 4+i>>4 && i < len(t) { + if fails >= 4+i>>4 && i < t { // Give up on IndexByte, it isn't skipping ahead // far enough to be better than Rabin-Karp. // Experiments (using IndexPeriodic) suggest diff --git a/src/strings/strings.go b/src/strings/strings.go index df95715ec8..26aceda212 100644 --- a/src/strings/strings.go +++ b/src/strings/strings.go @@ -947,21 +947,22 @@ func Index(s, substr string) int { if len(s) <= bytealg.MaxBruteForce { return bytealg.IndexString(s, substr) } - c := substr[0] + c0 := substr[0] + c1 := substr[1] i := 0 - t := s[:len(s)-n+1] + t := len(s) - n + 1 fails := 0 - for i < len(t) { - if t[i] != c { + for i < t { + if s[i] != c0 { // IndexByte is faster than bytealg.IndexString, so use it as long as // we're not getting lots of false positives. - o := IndexByte(t[i:], c) + o := IndexByte(s[i:t], c0) if o < 0 { return -1 } i += o } - if s[i:i+n] == substr { + if s[i+1] == c1 && s[i:i+n] == substr { return i } fails++ @@ -977,24 +978,25 @@ func Index(s, substr string) int { } return -1 } - c := substr[0] + c0 := substr[0] + c1 := substr[1] i := 0 - t := s[:len(s)-n+1] + t := len(s) - n + 1 fails := 0 - for i < len(t) { - if t[i] != c { - o := IndexByte(t[i:], c) + for i < t { + if s[i] != c0 { + o := IndexByte(s[i:t], c0) if o < 0 { return -1 } i += o } - if s[i:i+n] == substr { + if s[i+1] == c1 && s[i:i+n] == substr { return i } i++ fails++ - if fails >= 4+i>>4 && i < len(t) { + if fails >= 4+i>>4 && i < t { // See comment in ../bytes/bytes_generic.go. j := indexRabinKarp(s[i:], substr) if j < 0 { -- cgit v1.3 From ebdc0b8d68e04ad383088c8b3ab963de4a9b5c5d Mon Sep 17 00:00:00 2001 From: Brad Fitzpatrick Date: Wed, 26 Sep 2018 20:19:11 +0000 Subject: bytes, strings: add ReplaceAll Credit to Harald Nordgren for the proposal in https://golang.org/cl/137456 and #27864. Fixes #27864 Change-Id: I80546683b0623124fe4627a71af88add2f6c1c27 Reviewed-on: https://go-review.googlesource.com/137855 Reviewed-by: Ian Lance Taylor --- src/bytes/bytes.go | 9 +++++++++ src/bytes/bytes_test.go | 6 ++++++ src/strings/strings.go | 9 +++++++++ src/strings/strings_test.go | 6 ++++++ 4 files changed, 30 insertions(+) (limited to 'src/bytes') diff --git a/src/bytes/bytes.go b/src/bytes/bytes.go index 876fa3c1ed..6492db088a 100644 --- a/src/bytes/bytes.go +++ b/src/bytes/bytes.go @@ -774,6 +774,15 @@ func Replace(s, old, new []byte, n int) []byte { return t[0:w] } +// ReplaceAll returns a copy of the slice s with all +// non-overlapping instances of old replaced by new. +// If old is empty, it matches at the beginning of the slice +// and after each UTF-8 sequence, yielding up to k+1 replacements +// for a k-rune slice. +func ReplaceAll(s, old, new []byte) []byte { + return Replace(s, old, new, -1) +} + // EqualFold reports whether s and t, interpreted as UTF-8 strings, // are equal under Unicode case-folding. func EqualFold(s, t []byte) bool { diff --git a/src/bytes/bytes_test.go b/src/bytes/bytes_test.go index 55a22bae22..f4c0ffd2a9 100644 --- a/src/bytes/bytes_test.go +++ b/src/bytes/bytes_test.go @@ -1362,6 +1362,12 @@ func TestReplace(t *testing.T) { if cap(in) == cap(out) && &in[:1][0] == &out[:1][0] { t.Errorf("Replace(%q, %q, %q, %d) didn't copy", tt.in, tt.old, tt.new, tt.n) } + if tt.n == -1 { + out := ReplaceAll(in, []byte(tt.old), []byte(tt.new)) + if s := string(out); s != tt.out { + t.Errorf("ReplaceAll(%q, %q, %q) = %q, want %q", tt.in, tt.old, tt.new, s, tt.out) + } + } } } diff --git a/src/strings/strings.go b/src/strings/strings.go index b033c38e91..00200e4e24 100644 --- a/src/strings/strings.go +++ b/src/strings/strings.go @@ -874,6 +874,15 @@ func Replace(s, old, new string, n int) string { return string(t[0:w]) } +// ReplaceAll returns a copy of the string s with all +// non-overlapping instances of old replaced by new. +// If old is empty, it matches at the beginning of the string +// and after each UTF-8 sequence, yielding up to k+1 replacements +// for a k-rune string. +func ReplaceAll(s, old, new string) string { + return Replace(s, old, new, -1) +} + // EqualFold reports whether s and t, interpreted as UTF-8 strings, // are equal under Unicode case-folding. func EqualFold(s, t string) bool { diff --git a/src/strings/strings_test.go b/src/strings/strings_test.go index 20bc484f39..bb6a5b931b 100644 --- a/src/strings/strings_test.go +++ b/src/strings/strings_test.go @@ -1243,6 +1243,12 @@ func TestReplace(t *testing.T) { if s := Replace(tt.in, tt.old, tt.new, tt.n); s != tt.out { t.Errorf("Replace(%q, %q, %q, %d) = %q, want %q", tt.in, tt.old, tt.new, tt.n, s, tt.out) } + if tt.n == -1 { + s := ReplaceAll(tt.in, tt.old, tt.new) + if s != tt.out { + t.Errorf("ReplaceAll(%q, %q, %q) = %q, want %q", tt.in, tt.old, tt.new, s, tt.out) + } + } } } -- cgit v1.3