From 4e5ed83e8d2fbbbc8f6524f40ab3b6733dc57a38 Mon Sep 17 00:00:00 2001 From: Keith Randall Date: Fri, 3 Jul 2020 11:28:50 -0700 Subject: runtime: use bit-parallel operations to compute heap bit summaries MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The new implementation is much faster in all cases. name old time/op new time/op delta PallocBitsSummarize/Unpacked00-16 142ns ± 1% 7ns ± 2% -94.75% (p=0.000 n=10+9) PallocBitsSummarize/UnpackedFFFFFFFFFFFFFFFF-16 172ns ± 0% 24ns ± 0% -86.02% (p=0.000 n=9+9) PallocBitsSummarize/UnpackedAA-16 145ns ± 0% 32ns ± 0% -78.16% (p=0.000 n=8+10) PallocBitsSummarize/UnpackedAAAAAAAAAAAAAAAA-16 172ns ± 0% 33ns ± 0% -80.95% (p=0.000 n=9+9) PallocBitsSummarize/Unpacked80000000AAAAAAAA-16 162ns ± 1% 60ns ± 0% -62.69% (p=0.000 n=10+9) PallocBitsSummarize/UnpackedAAAAAAAA00000001-16 163ns ± 0% 68ns ± 1% -58.47% (p=0.000 n=8+10) PallocBitsSummarize/UnpackedBBBBBBBBBBBBBBBB-16 172ns ± 0% 35ns ± 0% -79.70% (p=0.000 n=9+9) PallocBitsSummarize/Unpacked80000000BBBBBBBB-16 161ns ± 0% 63ns ± 0% -60.61% (p=0.000 n=8+10) PallocBitsSummarize/UnpackedBBBBBBBB00000001-16 163ns ± 0% 60ns ± 0% -63.14% (p=0.000 n=9+10) PallocBitsSummarize/UnpackedCCCCCCCCCCCCCCCC-16 172ns ± 0% 39ns ± 0% -77.41% (p=0.000 n=7+10) PallocBitsSummarize/Unpacked4444444444444444-16 172ns ± 0% 39ns ± 0% -77.42% (p=0.000 n=7+10) PallocBitsSummarize/Unpacked4040404040404040-16 173ns ± 2% 51ns ± 1% -70.55% (p=0.000 n=10+10) PallocBitsSummarize/Unpacked4000400040004000-16 160ns ± 1% 53ns ± 0% -66.78% (p=0.000 n=10+10) PallocBitsSummarize/Unpacked1000404044CCAAFF-16 169ns ± 1% 59ns ± 1% -65.28% (p=0.000 n=10+10) Change-Id: I94daa645b76a9cf9c93edeb2058d7132216fcb72 Reviewed-on: https://go-review.googlesource.com/c/go/+/240900 Run-TryBot: Keith Randall TryBot-Result: Gobot Gobot Reviewed-by: Michael Knyszek --- src/runtime/mpallocbits_test.go | 36 ++++++++++++++++++++++++------------ 1 file changed, 24 insertions(+), 12 deletions(-) (limited to 'src/runtime/mpallocbits_test.go') diff --git a/src/runtime/mpallocbits_test.go b/src/runtime/mpallocbits_test.go index 71a29f3b3a..42268a1698 100644 --- a/src/runtime/mpallocbits_test.go +++ b/src/runtime/mpallocbits_test.go @@ -101,7 +101,7 @@ func invertPallocBits(b *PallocBits) { // Ensures two packed summaries are identical, and reports a detailed description // of the difference if they're not. -func checkPallocSum(t *testing.T, got, want PallocSum) { +func checkPallocSum(t testing.TB, got, want PallocSum) { if got.Start() != want.Start() { t.Errorf("inconsistent start: got %d, want %d", got.Start(), want.Start()) } @@ -297,17 +297,29 @@ func TestPallocBitsSummarize(t *testing.T) { // Benchmarks how quickly we can summarize a PallocBits. func BenchmarkPallocBitsSummarize(b *testing.B) { - buf0 := new(PallocBits) - buf1 := new(PallocBits) - for i := 0; i < len(buf1); i++ { - buf1[i] = ^uint64(0) - } - bufa := new(PallocBits) - for i := 0; i < len(bufa); i++ { - bufa[i] = 0xaa - } - for _, buf := range []*PallocBits{buf0, buf1, bufa} { - b.Run(fmt.Sprintf("Unpacked%02X", buf[0]), func(b *testing.B) { + patterns := []uint64{ + 0, + ^uint64(0), + 0xaa, + 0xaaaaaaaaaaaaaaaa, + 0x80000000aaaaaaaa, + 0xaaaaaaaa00000001, + 0xbbbbbbbbbbbbbbbb, + 0x80000000bbbbbbbb, + 0xbbbbbbbb00000001, + 0xcccccccccccccccc, + 0x4444444444444444, + 0x4040404040404040, + 0x4000400040004000, + 0x1000404044ccaaff, + } + for _, p := range patterns { + buf := new(PallocBits) + for i := 0; i < len(buf); i++ { + buf[i] = p + } + b.Run(fmt.Sprintf("Unpacked%02X", p), func(b *testing.B) { + checkPallocSum(b, buf.Summarize(), SummarizeSlow(buf)) for i := 0; i < b.N; i++ { buf.Summarize() } -- cgit v1.3