From 691f5c34ad7b7d676644dd749bc4ddb5d20b90d0 Mon Sep 17 00:00:00 2001 From: erifan01 Date: Thu, 19 Jul 2018 09:55:17 +0000 Subject: strings, bytes: optimize function Index This change compares the first two characters instead of the first one, and if they match, the entire string is compared. Comparing the first two characters helps to filter out the case where the first character matches but the subsequent characters do not match, thereby improving the substring search speed in this case. Benchmarks with no effect or minimal impact (less than 5%) is not listed, the following are improved benchmarks: On arm64: strings: IndexPeriodic/IndexPeriodic16-8 172890.00ns +- 2% 124156.20ns +- 0% -28.19% (p=0.008 n=5+5) IndexPeriodic/IndexPeriodic32-8 78092.80ns +- 0% 65138.60ns +- 0% -16.59% (p=0.008 n=5+5) IndexPeriodic/IndexPeriodic64-8 42322.20ns +- 0% 34661.60ns +- 0% -18.10% (p=0.008 n=5+5) bytes: IndexPeriodic/IndexPeriodic16-8 183468.20ns +- 6% 123759.00ns +- 0% -32.54% (p=0.008 n=5+5) IndexPeriodic/IndexPeriodic32-8 84776.40ns +- 0% 63907.80ns +- 0% -24.62% (p=0.008 n=5+5) IndexPeriodic/IndexPeriodic64-8 45835.60ns +- 0% 34194.20ns +- 0% -25.40% (p=0.008 n=5+5) On amd64: strings: IndexPeriodic/IndexPeriodic8-16 219499.00ns +- 0% 178123.40ns +- 0% -18.85% (p=0.008 n=5+5) IndexPeriodic/IndexPeriodic16-16 109760.20ns +- 0% 88957.80ns +- 0% -18.95% (p=0.008 n=5+5) IndexPeriodic/IndexPeriodic32-16 54943.00ns +- 0% 44573.80ns +- 0% -18.87% (p=0.008 n=5+5) IndexPeriodic/IndexPeriodic64-16 29804.80ns +- 0% 24417.80ns +- 0% -18.07% (p=0.008 n=5+5) bytes: IndexPeriodic/IndexPeriodic8-16 226592.60ns +- 0% 181183.20ns +- 0% -20.04% (p=0.008 n=5+5) IndexPeriodic/IndexPeriodic16-16 111432.60ns +- 0% 90634.60ns +- 0% -18.66% (p=0.008 n=5+5) IndexPeriodic/IndexPeriodic32-16 55640.60ns +- 0% 45433.00ns +- 0% -18.35% (p=0.008 n=5+5) IndexPeriodic/IndexPeriodic64-16 30833.00ns +- 0% 24784.20ns +- 0% -19.62% (p=0.008 n=5+5) Change-Id: I2d9e7e138d29e960d20a203eb74dc2ec976a9d71 Reviewed-on: https://go-review.googlesource.com/131177 Run-TryBot: Ian Lance Taylor TryBot-Result: Gobot Gobot Reviewed-by: Ian Lance Taylor --- src/bytes/bytes.go | 28 +++++++++++++++------------- 1 file changed, 15 insertions(+), 13 deletions(-) (limited to 'src/bytes/bytes.go') diff --git a/src/bytes/bytes.go b/src/bytes/bytes.go index 77a7ce98e0..876fa3c1ed 100644 --- a/src/bytes/bytes.go +++ b/src/bytes/bytes.go @@ -849,21 +849,22 @@ func Index(s, sep []byte) int { if len(s) <= bytealg.MaxBruteForce { return bytealg.Index(s, sep) } - c := sep[0] + c0 := sep[0] + c1 := sep[1] i := 0 - t := s[:len(s)-n+1] + t := len(s) - n + 1 fails := 0 - for i < len(t) { - if t[i] != c { + for i < t { + if s[i] != c0 { // IndexByte is faster than bytealg.Index, so use it as long as // we're not getting lots of false positives. - o := IndexByte(t[i:], c) + o := IndexByte(s[i:t], c0) if o < 0 { return -1 } i += o } - if Equal(s[i:i+n], sep) { + if s[i+1] == c1 && Equal(s[i:i+n], sep) { return i } fails++ @@ -879,24 +880,25 @@ func Index(s, sep []byte) int { } return -1 } - c := sep[0] + c0 := sep[0] + c1 := sep[1] i := 0 fails := 0 - t := s[:len(s)-n+1] - for i < len(t) { - if t[i] != c { - o := IndexByte(t[i:], c) + t := len(s) - n + 1 + for i < t { + if s[i] != c0 { + o := IndexByte(s[i:t], c0) if o < 0 { break } i += o } - if Equal(s[i:i+n], sep) { + if s[i+1] == c1 && Equal(s[i:i+n], sep) { return i } i++ fails++ - if fails >= 4+i>>4 && i < len(t) { + if fails >= 4+i>>4 && i < t { // Give up on IndexByte, it isn't skipping ahead // far enough to be better than Rabin-Karp. // Experiments (using IndexPeriodic) suggest -- cgit v1.3 From ebdc0b8d68e04ad383088c8b3ab963de4a9b5c5d Mon Sep 17 00:00:00 2001 From: Brad Fitzpatrick Date: Wed, 26 Sep 2018 20:19:11 +0000 Subject: bytes, strings: add ReplaceAll Credit to Harald Nordgren for the proposal in https://golang.org/cl/137456 and #27864. Fixes #27864 Change-Id: I80546683b0623124fe4627a71af88add2f6c1c27 Reviewed-on: https://go-review.googlesource.com/137855 Reviewed-by: Ian Lance Taylor --- src/bytes/bytes.go | 9 +++++++++ src/bytes/bytes_test.go | 6 ++++++ src/strings/strings.go | 9 +++++++++ src/strings/strings_test.go | 6 ++++++ 4 files changed, 30 insertions(+) (limited to 'src/bytes/bytes.go') diff --git a/src/bytes/bytes.go b/src/bytes/bytes.go index 876fa3c1ed..6492db088a 100644 --- a/src/bytes/bytes.go +++ b/src/bytes/bytes.go @@ -774,6 +774,15 @@ func Replace(s, old, new []byte, n int) []byte { return t[0:w] } +// ReplaceAll returns a copy of the slice s with all +// non-overlapping instances of old replaced by new. +// If old is empty, it matches at the beginning of the slice +// and after each UTF-8 sequence, yielding up to k+1 replacements +// for a k-rune slice. +func ReplaceAll(s, old, new []byte) []byte { + return Replace(s, old, new, -1) +} + // EqualFold reports whether s and t, interpreted as UTF-8 strings, // are equal under Unicode case-folding. func EqualFold(s, t []byte) bool { diff --git a/src/bytes/bytes_test.go b/src/bytes/bytes_test.go index 55a22bae22..f4c0ffd2a9 100644 --- a/src/bytes/bytes_test.go +++ b/src/bytes/bytes_test.go @@ -1362,6 +1362,12 @@ func TestReplace(t *testing.T) { if cap(in) == cap(out) && &in[:1][0] == &out[:1][0] { t.Errorf("Replace(%q, %q, %q, %d) didn't copy", tt.in, tt.old, tt.new, tt.n) } + if tt.n == -1 { + out := ReplaceAll(in, []byte(tt.old), []byte(tt.new)) + if s := string(out); s != tt.out { + t.Errorf("ReplaceAll(%q, %q, %q) = %q, want %q", tt.in, tt.old, tt.new, s, tt.out) + } + } } } diff --git a/src/strings/strings.go b/src/strings/strings.go index b033c38e91..00200e4e24 100644 --- a/src/strings/strings.go +++ b/src/strings/strings.go @@ -874,6 +874,15 @@ func Replace(s, old, new string, n int) string { return string(t[0:w]) } +// ReplaceAll returns a copy of the string s with all +// non-overlapping instances of old replaced by new. +// If old is empty, it matches at the beginning of the string +// and after each UTF-8 sequence, yielding up to k+1 replacements +// for a k-rune string. +func ReplaceAll(s, old, new string) string { + return Replace(s, old, new, -1) +} + // EqualFold reports whether s and t, interpreted as UTF-8 strings, // are equal under Unicode case-folding. func EqualFold(s, t string) bool { diff --git a/src/strings/strings_test.go b/src/strings/strings_test.go index 20bc484f39..bb6a5b931b 100644 --- a/src/strings/strings_test.go +++ b/src/strings/strings_test.go @@ -1243,6 +1243,12 @@ func TestReplace(t *testing.T) { if s := Replace(tt.in, tt.old, tt.new, tt.n); s != tt.out { t.Errorf("Replace(%q, %q, %q, %d) = %q, want %q", tt.in, tt.old, tt.new, tt.n, s, tt.out) } + if tt.n == -1 { + s := ReplaceAll(tt.in, tt.old, tt.new) + if s != tt.out { + t.Errorf("ReplaceAll(%q, %q, %q) = %q, want %q", tt.in, tt.old, tt.new, s, tt.out) + } + } } } -- cgit v1.3