diff options
| author | Keith Randall <khr@golang.org> | 2020-07-06 20:46:31 -0700 |
|---|---|---|
| committer | Keith Randall <khr@golang.org> | 2020-08-17 22:00:17 +0000 |
| commit | 8b8f926fc3f0e8f002d0a8e97aab9500e4db83a7 (patch) | |
| tree | d6b48dfe11fcc7a5ff93cf99fc625c79d5aec234 /src/runtime/mpallocbits_test.go | |
| parent | 4e5ed83e8d2fbbbc8f6524f40ab3b6733dc57a38 (diff) | |
| download | go-8b8f926fc3f0e8f002d0a8e97aab9500e4db83a7.tar.xz | |
runtime: bit parallel implementation of findBitRange64
Use a bit-parallel implementation of findBitRange64.
It uses a repeated shift-'N-and technique to erase all the
free marks that are too small for the allocation.
Also some small improvements to find1.
name old time/op new time/op delta
FindBitRange64/Pattern00Size2-16 4.19ns ± 0% 2.26ns ± 0% -46.04% (p=0.000 n=10+8)
FindBitRange64/Pattern00Size8-16 4.19ns ± 0% 2.12ns ± 0% -49.35% (p=0.000 n=9+10)
FindBitRange64/Pattern00Size32-16 4.20ns ± 0% 2.12ns ± 0% -49.49% (p=0.000 n=10+8)
FindBitRange64/PatternFFFFFFFFFFFFFFFFSize2-16 2.13ns ± 0% 2.27ns ± 0% +6.28% (p=0.000 n=10+10)
FindBitRange64/PatternFFFFFFFFFFFFFFFFSize8-16 2.13ns ± 0% 4.46ns ± 0% +109.39% (p=0.000 n=10+9)
FindBitRange64/PatternFFFFFFFFFFFFFFFFSize32-16 2.13ns ± 1% 5.58ns ± 0% +162.37% (p=0.000 n=10+9)
FindBitRange64/PatternAASize2-16 22.2ns ± 0% 2.3ns ± 0% -89.82% (p=0.000 n=9+8)
FindBitRange64/PatternAASize8-16 22.2ns ± 0% 2.1ns ± 1% -90.41% (p=0.000 n=9+10)
FindBitRange64/PatternAASize32-16 22.2ns ± 0% 2.1ns ± 1% -90.43% (p=0.000 n=10+10)
FindBitRange64/PatternAAAAAAAAAAAAAAAASize2-16 156ns ± 1% 2ns ± 0% -98.54% (p=0.000 n=10+10)
FindBitRange64/PatternAAAAAAAAAAAAAAAASize8-16 155ns ± 1% 2ns ± 0% -98.63% (p=0.000 n=10+8)
FindBitRange64/PatternAAAAAAAAAAAAAAAASize32-16 155ns ± 0% 2ns ± 1% -98.63% (p=0.000 n=8+10)
FindBitRange64/Pattern80000000AAAAAAAASize2-16 81.2ns ± 0% 2.3ns ± 1% -97.21% (p=0.000 n=10+10)
FindBitRange64/Pattern80000000AAAAAAAASize8-16 81.1ns ± 0% 2.1ns ± 0% -97.39% (p=0.000 n=10+9)
FindBitRange64/Pattern80000000AAAAAAAASize32-16 81.1ns ± 0% 2.1ns ± 0% -97.38% (p=0.000 n=10+10)
FindBitRange64/PatternAAAAAAAA00000001Size2-16 76.8ns ± 1% 2.3ns ± 0% -97.05% (p=0.000 n=10+10)
FindBitRange64/PatternAAAAAAAA00000001Size8-16 76.6ns ± 0% 2.1ns ± 0% -97.23% (p=0.000 n=8+10)
FindBitRange64/PatternAAAAAAAA00000001Size32-16 76.7ns ± 0% 2.1ns ± 0% -97.23% (p=0.000 n=9+9)
FindBitRange64/PatternBBBBBBBBBBBBBBBBSize2-16 2.13ns ± 0% 2.27ns ± 0% +6.57% (p=0.000 n=8+8)
FindBitRange64/PatternBBBBBBBBBBBBBBBBSize8-16 76.7ns ± 0% 2.9ns ± 0% -96.20% (p=0.000 n=9+10)
FindBitRange64/PatternBBBBBBBBBBBBBBBBSize32-16 76.7ns ± 0% 2.9ns ± 0% -96.20% (p=0.000 n=10+10)
FindBitRange64/Pattern80000000BBBBBBBBSize2-16 2.12ns ± 0% 2.27ns ± 1% +6.74% (p=0.000 n=10+10)
FindBitRange64/Pattern80000000BBBBBBBBSize8-16 44.8ns ± 0% 2.9ns ± 0% -93.49% (p=0.000 n=9+10)
FindBitRange64/Pattern80000000BBBBBBBBSize32-16 44.9ns ± 0% 2.9ns ± 0% -93.49% (p=0.000 n=10+8)
FindBitRange64/PatternBBBBBBBB00000001Size2-16 4.20ns ± 1% 2.27ns ± 1% -46.02% (p=0.000 n=10+10)
FindBitRange64/PatternBBBBBBBB00000001Size8-16 44.9ns ± 0% 2.9ns ± 1% -93.51% (p=0.000 n=10+9)
FindBitRange64/PatternBBBBBBBB00000001Size32-16 44.9ns ± 0% 2.9ns ± 0% -93.51% (p=0.000 n=10+9)
FindBitRange64/PatternCCCCCCCCCCCCCCCCSize2-16 4.19ns ± 0% 2.26ns ± 0% -46.10% (p=0.000 n=10+10)
FindBitRange64/PatternCCCCCCCCCCCCCCCCSize8-16 76.5ns ± 0% 2.9ns ± 0% -96.19% (p=0.000 n=8+7)
FindBitRange64/PatternCCCCCCCCCCCCCCCCSize32-16 76.5ns ± 0% 2.9ns ± 0% -96.19% (p=0.000 n=10+8)
FindBitRange64/Pattern4444444444444444Size2-16 76.4ns ± 0% 2.3ns ± 0% -97.04% (p=0.000 n=8+10)
FindBitRange64/Pattern4444444444444444Size8-16 76.5ns ± 0% 2.1ns ± 0% -97.23% (p=0.000 n=9+10)
FindBitRange64/Pattern4444444444444444Size32-16 76.5ns ± 0% 2.1ns ± 0% -97.23% (p=0.000 n=8+10)
FindBitRange64/Pattern4040404040404040Size2-16 40.3ns ± 0% 2.3ns ± 0% -94.38% (p=0.000 n=7+10)
FindBitRange64/Pattern4040404040404040Size8-16 40.2ns ± 0% 2.1ns ± 0% -94.75% (p=0.000 n=10+10)
FindBitRange64/Pattern4040404040404040Size32-16 40.2ns ± 0% 2.1ns ± 0% -94.76% (p=0.000 n=10+6)
FindBitRange64/Pattern4000400040004000Size2-16 22.2ns ± 0% 2.2ns ± 0% -89.86% (p=0.001 n=8+9)
FindBitRange64/Pattern4000400040004000Size8-16 22.2ns ± 0% 2.1ns ± 0% -90.52% (p=0.000 n=8+10)
FindBitRange64/Pattern4000400040004000Size32-16 22.2ns ± 1% 2.1ns ± 0% -90.50% (p=0.000 n=10+10)
The cases that slow down aren't really that slow, and those inputs
never actually occur (there's a short circuit before the call to
findBitRange64 for that case).
Change-Id: I50fae62915098032d8ce7fa57ef29eee9deb01ba
Reviewed-on: https://go-review.googlesource.com/c/go/+/241279
Reviewed-by: Michael Knyszek <mknyszek@google.com>
Diffstat (limited to 'src/runtime/mpallocbits_test.go')
| -rw-r--r-- | src/runtime/mpallocbits_test.go | 33 |
1 files changed, 31 insertions, 2 deletions
diff --git a/src/runtime/mpallocbits_test.go b/src/runtime/mpallocbits_test.go index 42268a1698..5095e24220 100644 --- a/src/runtime/mpallocbits_test.go +++ b/src/runtime/mpallocbits_test.go @@ -504,10 +504,9 @@ func TestFindBitRange64(t *testing.T) { t.Errorf("case (%016x, %d): got %d, want %d", x, n, i, result) } } - for i := uint(0); i <= 64; i++ { + for i := uint(1); i <= 64; i++ { check(^uint64(0), i, 0) } - check(0, 0, 0) for i := uint(1); i <= 64; i++ { check(0, i, ^uint(0)) } @@ -520,3 +519,33 @@ func TestFindBitRange64(t *testing.T) { check(0xffff03ff0107ffff, 16, 0) check(0x0fff03ff01079fff, 16, ^uint(0)) } + +func BenchmarkFindBitRange64(b *testing.B) { + patterns := []uint64{ + 0, + ^uint64(0), + 0xaa, + 0xaaaaaaaaaaaaaaaa, + 0x80000000aaaaaaaa, + 0xaaaaaaaa00000001, + 0xbbbbbbbbbbbbbbbb, + 0x80000000bbbbbbbb, + 0xbbbbbbbb00000001, + 0xcccccccccccccccc, + 0x4444444444444444, + 0x4040404040404040, + 0x4000400040004000, + } + sizes := []uint{ + 2, 8, 32, + } + for _, pattern := range patterns { + for _, size := range sizes { + b.Run(fmt.Sprintf("Pattern%02XSize%d", pattern, size), func(b *testing.B) { + for i := 0; i < b.N; i++ { + FindBitRange64(pattern, size) + } + }) + } + } +} |
