2 files changed, 58 insertions, 8 deletions
diff --git a/src/strings/strings.go b/src/strings/strings.go
index fee161e4cc..646161fdda 100644
--- a/src/strings/strings.go
+++ b/src/strings/strings.go
@@ -520,8 +520,8 @@ func Map(mapping func(rune) rune, s string) string {
 
 // Repeat returns a new string consisting of count copies of the string s.
 //
-// It panics if count is negative or if
-// the result of (len(s) * count) overflows.
+// It panics if count is negative or if the result of (len(s) * count)
+// overflows.
 func Repeat(s string, count int) string {
 	switch count {
 	case 0:
@@ -533,24 +533,50 @@ func Repeat(s string, count int) string {
 	// Since we cannot return an error on overflow,
 	// we should panic if the repeat will generate
 	// an overflow.
-	// See Issue golang.org/issue/16237
+	// See golang.org/issue/16237.
 	if count < 0 {
 		panic("strings: negative Repeat count")
 	} else if len(s)*count/count != len(s) {
 		panic("strings: Repeat count causes overflow")
 	}
 
+	if len(s) == 0 {
+		return ""
+	}
+
 	n := len(s) * count
+
+	// Past a certain chunk size it is counterproductive to use
+	// larger chunks as the source of the write, as when the source
+	// is too large we are basically just thrashing the CPU D-cache.
+	// So if the result length is larger than an empirically-found
+	// limit (8KB), we stop growing the source string once the limit
+	// is reached and keep reusing the same source string - that
+	// should therefore be always resident in the L1 cache - until we
+	// have completed the construction of the result.
+	// This yields significant speedups (up to +100%) in cases where
+	// the result length is large (roughly, over L2 cache size).
+	const chunkLimit = 8 * 1024
+	chunkMax := n
+	if n > chunkLimit {
+		chunkMax = chunkLimit / len(s) * len(s)
+		if chunkMax == 0 {
+			chunkMax = len(s)
+		}
+	}
+
 	var b Builder
 	b.Grow(n)
 	b.WriteString(s)
 	for b.Len() < n {
-		if b.Len() <= n/2 {
-			b.WriteString(b.String())
-		} else {
-			b.WriteString(b.String()[:n-b.Len()])
-			break
+		chunk := n - b.Len()
+		if chunk > b.Len() {
+			chunk = b.Len()
+		}
+		if chunk > chunkMax {
+			chunk = chunkMax
 		}
+		b.WriteString(b.String()[:chunk])
 	}
 	return b.String()
 }
diff --git a/src/strings/strings_test.go b/src/strings/strings_test.go
index 210bd9e44b..27489c2d16 100644
--- a/src/strings/strings_test.go
+++ b/src/strings/strings_test.go
@@ -1105,6 +1105,8 @@ func TestCaseConsistency(t *testing.T) {
 	*/
 }
 
+var longString = "a" + string(make([]byte, 1<<16)) + "z"
+
 var RepeatTests = []struct {
 	in, out string
 	count   int
@@ -1116,6 +1118,9 @@ var RepeatTests = []struct {
 	{"-", "-", 1},
 	{"-", "----------", 10},
 	{"abc ", "abc abc abc ", 3},
+	// Tests for results over the chunkLimit
+	{string(rune(0)), string(make([]byte, 1<<16)), 1 << 16},
+	{longString, longString + longString, 2},
 }
 
 func TestRepeat(t *testing.T) {
@@ -1885,6 +1890,25 @@ func BenchmarkRepeat(b *testing.B) {
 	}
 }
 
+func BenchmarkRepeatLarge(b *testing.B) {
+	s := Repeat("@", 8*1024)
+	for j := 8; j <= 30; j++ {
+		for _, k := range []int{1, 16, 4097} {
+			s := s[:k]
+			n := (1 << j) / k
+			if n == 0 {
+				continue
+			}
+			b.Run(fmt.Sprintf("%d/%d", 1<<j, k), func(b *testing.B) {
+				for i := 0; i < b.N; i++ {
+					Repeat(s, n)
+				}
+				b.SetBytes(int64(n * len(s)))
+			})
+		}
+	}
+}
+
 func BenchmarkIndexAnyASCII(b *testing.B) {
 	x := Repeat("#", 2048) // Never matches set
 	cs := "0123456789abcdefghijklmnopqrstuvwxyz0123456789abcdefghijklmnopqrstuvwxyz"