diff options
Diffstat (limited to 'src/strings')
| -rw-r--r-- | src/strings/strings.go | 42 | ||||
| -rw-r--r-- | src/strings/strings_test.go | 24 |
2 files changed, 58 insertions, 8 deletions
diff --git a/src/strings/strings.go b/src/strings/strings.go index fee161e4cc..646161fdda 100644 --- a/src/strings/strings.go +++ b/src/strings/strings.go @@ -520,8 +520,8 @@ func Map(mapping func(rune) rune, s string) string { // Repeat returns a new string consisting of count copies of the string s. // -// It panics if count is negative or if -// the result of (len(s) * count) overflows. +// It panics if count is negative or if the result of (len(s) * count) +// overflows. func Repeat(s string, count int) string { switch count { case 0: @@ -533,24 +533,50 @@ func Repeat(s string, count int) string { // Since we cannot return an error on overflow, // we should panic if the repeat will generate // an overflow. - // See Issue golang.org/issue/16237 + // See golang.org/issue/16237. if count < 0 { panic("strings: negative Repeat count") } else if len(s)*count/count != len(s) { panic("strings: Repeat count causes overflow") } + if len(s) == 0 { + return "" + } + n := len(s) * count + + // Past a certain chunk size it is counterproductive to use + // larger chunks as the source of the write, as when the source + // is too large we are basically just thrashing the CPU D-cache. + // So if the result length is larger than an empirically-found + // limit (8KB), we stop growing the source string once the limit + // is reached and keep reusing the same source string - that + // should therefore be always resident in the L1 cache - until we + // have completed the construction of the result. + // This yields significant speedups (up to +100%) in cases where + // the result length is large (roughly, over L2 cache size). + const chunkLimit = 8 * 1024 + chunkMax := n + if n > chunkLimit { + chunkMax = chunkLimit / len(s) * len(s) + if chunkMax == 0 { + chunkMax = len(s) + } + } + var b Builder b.Grow(n) b.WriteString(s) for b.Len() < n { - if b.Len() <= n/2 { - b.WriteString(b.String()) - } else { - b.WriteString(b.String()[:n-b.Len()]) - break + chunk := n - b.Len() + if chunk > b.Len() { + chunk = b.Len() + } + if chunk > chunkMax { + chunk = chunkMax } + b.WriteString(b.String()[:chunk]) } return b.String() } diff --git a/src/strings/strings_test.go b/src/strings/strings_test.go index 210bd9e44b..27489c2d16 100644 --- a/src/strings/strings_test.go +++ b/src/strings/strings_test.go @@ -1105,6 +1105,8 @@ func TestCaseConsistency(t *testing.T) { */ } +var longString = "a" + string(make([]byte, 1<<16)) + "z" + var RepeatTests = []struct { in, out string count int @@ -1116,6 +1118,9 @@ var RepeatTests = []struct { {"-", "-", 1}, {"-", "----------", 10}, {"abc ", "abc abc abc ", 3}, + // Tests for results over the chunkLimit + {string(rune(0)), string(make([]byte, 1<<16)), 1 << 16}, + {longString, longString + longString, 2}, } func TestRepeat(t *testing.T) { @@ -1885,6 +1890,25 @@ func BenchmarkRepeat(b *testing.B) { } } +func BenchmarkRepeatLarge(b *testing.B) { + s := Repeat("@", 8*1024) + for j := 8; j <= 30; j++ { + for _, k := range []int{1, 16, 4097} { + s := s[:k] + n := (1 << j) / k + if n == 0 { + continue + } + b.Run(fmt.Sprintf("%d/%d", 1<<j, k), func(b *testing.B) { + for i := 0; i < b.N; i++ { + Repeat(s, n) + } + b.SetBytes(int64(n * len(s))) + }) + } + } +} + func BenchmarkIndexAnyASCII(b *testing.B) { x := Repeat("#", 2048) // Never matches set cs := "0123456789abcdefghijklmnopqrstuvwxyz0123456789abcdefghijklmnopqrstuvwxyz" |
