aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGavin Lam <gavin.oss@tutamail.com>2026-01-23 03:44:50 +0000
committerGopher Robot <gobot@golang.org>2026-01-23 14:41:52 -0800
commit27fcec4d8f3041e845762303d85d868cc3e351e8 (patch)
treed1e01667e89ba83ddf89296a22921b2e39f76b04
parent478d86446e88dc9e0b46e08914cb564d7c705d1e (diff)
downloadgo-27fcec4d8f3041e845762303d85d868cc3e351e8.tar.xz
runtime: speed up cheaprand and cheaprand64
The current cheaprand performs 128-bit multiplication on 64-bit numbers and truncate the result to 32 bits, which is inefficient. A 32-bit specific implementation is more performant because it performs 64-bit multiplication on 32-bit numbers instead. The current cheaprand64 involves two cheaprand calls. Implementing it as 64-bit wyrand is significantly faster. Since cheaprand64 discards one bit, I have preserved this behavior. The underlying uint64 function is made available as cheaprandu64. │ old │ new │ │ sec/op │ sec/op vs base │ Cheaprand-8 1.358n ± 0% 1.218n ± 0% -10.31% (n=100) Cheaprand64-8 2.424n ± 0% 1.391n ± 0% -42.62% (n=100) Blocksampled-8 8.347n ± 0% 2.022n ± 0% -75.78% (n=100) Fixes #77149 Change-Id: Ib0b5da4a642cd34d0401b03c1d343041f8230d11 GitHub-Last-Rev: 549d8d407e2bbcaecdee0b52cbf3a513dda637fb GitHub-Pull-Request: golang/go#77150 Reviewed-on: https://go-review.googlesource.com/c/go/+/735480 Auto-Submit: Keith Randall <khr@golang.org> Reviewed-by: Carlos Amedee <carlos@golang.org> Reviewed-by: Keith Randall <khr@google.com> Reviewed-by: Keith Randall <khr@golang.org> LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
-rw-r--r--src/runtime/export_test.go4
-rw-r--r--src/runtime/lock_spinbit.go12
-rw-r--r--src/runtime/mprof.go4
-rw-r--r--src/runtime/mprof_test.go16
-rw-r--r--src/runtime/rand.go47
-rw-r--r--src/runtime/rand_test.go12
-rw-r--r--src/runtime/runtime2.go5
7 files changed, 75 insertions, 25 deletions
diff --git a/src/runtime/export_test.go b/src/runtime/export_test.go
index 4f6ef9a3f2..fb0bfd3904 100644
--- a/src/runtime/export_test.go
+++ b/src/runtime/export_test.go
@@ -273,6 +273,10 @@ func CountPagesInUse() (pagesInUse, counted uintptr) {
return
}
+func Blocksampled(cycles, rate int64) bool { return blocksampled(cycles, rate) }
+
+func Cheaprand() uint32 { return cheaprand() }
+func Cheaprand64() int64 { return cheaprand64() }
func Fastrand() uint32 { return uint32(rand()) }
func Fastrand64() uint64 { return rand() }
func Fastrandn(n uint32) uint32 { return randn(n) }
diff --git a/src/runtime/lock_spinbit.go b/src/runtime/lock_spinbit.go
index 039ea6f565..590be8a0c5 100644
--- a/src/runtime/lock_spinbit.go
+++ b/src/runtime/lock_spinbit.go
@@ -327,16 +327,8 @@ func unlock2(l *mutex) {
// mutexSampleContention returns whether the current mutex operation should
// report any contention it discovers.
func mutexSampleContention() bool {
- if rate := int64(atomic.Load64(&mutexprofilerate)); rate <= 0 {
- return false
- } else {
- // TODO: have SetMutexProfileFraction do the clamping
- rate32 := uint32(rate)
- if int64(rate32) != rate {
- rate32 = ^uint32(0)
- }
- return cheaprandn(rate32) == 0
- }
+ rate := atomic.Load64(&mutexprofilerate)
+ return rate > 0 && cheaprandu64()%rate == 0
}
// unlock2Wake updates the list of Ms waiting on l, waking an M if necessary.
diff --git a/src/runtime/mprof.go b/src/runtime/mprof.go
index 0452b1b605..a4cfef72fa 100644
--- a/src/runtime/mprof.go
+++ b/src/runtime/mprof.go
@@ -696,8 +696,8 @@ func (prof *mLockProfile) recordUnlock(cycles int64) {
if cycles == 0 {
return
}
- prevScore := uint64(cheaprand64()) % uint64(prev)
- thisScore := uint64(cheaprand64()) % uint64(cycles)
+ prevScore := cheaprandu64() % uint64(prev)
+ thisScore := cheaprandu64() % uint64(cycles)
if prevScore > thisScore {
prof.cyclesLost += cycles
return
diff --git a/src/runtime/mprof_test.go b/src/runtime/mprof_test.go
new file mode 100644
index 0000000000..d819c11d1d
--- /dev/null
+++ b/src/runtime/mprof_test.go
@@ -0,0 +1,16 @@
+// Copyright 2026 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package runtime_test
+
+import (
+ . "runtime"
+ "testing"
+)
+
+func BenchmarkBlocksampled(b *testing.B) {
+ for b.Loop() {
+ Blocksampled(42, 1337)
+ }
+}
diff --git a/src/runtime/rand.go b/src/runtime/rand.go
index a30845b585..353e0e0f42 100644
--- a/src/runtime/rand.go
+++ b/src/runtime/rand.go
@@ -192,7 +192,8 @@ func mrandinit(mp *m) {
}
bootstrapRandReseed() // erase key we just extracted
mp.chacha8.Init64(seed)
- mp.cheaprand = rand()
+ mp.cheaprand = uint32(rand())
+ mp.cheaprand64 = rand()
}
// randn is like rand() % n but faster.
@@ -227,14 +228,12 @@ func randn(n uint32) uint32 {
func cheaprand() uint32 {
mp := getg().m
// Implement wyrand: https://github.com/wangyi-fudan/wyhash
- // Only the platform that bits.Mul64 can be lowered
- // by the compiler should be in this list.
- if goarch.IsAmd64|goarch.IsArm64|goarch.IsPpc64|
- goarch.IsPpc64le|goarch.IsMips64|goarch.IsMips64le|
- goarch.IsS390x|goarch.IsRiscv64|goarch.IsLoong64 == 1 {
- mp.cheaprand += 0xa0761d6478bd642f
- hi, lo := bits.Mul64(mp.cheaprand, mp.cheaprand^0xe7037ed1a0b428db)
- return uint32(hi ^ lo)
+ // Only the platform that supports 64-bit multiplication
+ // natively should be allowed.
+ if bits.UintSize == 64 {
+ mp.cheaprand += 0x53c5ca59
+ hi, lo := bits.Mul32(mp.cheaprand, mp.cheaprand^0x74743c1b)
+ return hi ^ lo
}
// Implement xorshift64+: 2 32-bit xorshift sequences added together.
@@ -242,7 +241,7 @@ func cheaprand() uint32 {
// Xorshift paper: https://www.jstatsoft.org/article/view/v008i14/xorshift.pdf
// This generator passes the SmallCrush suite, part of TestU01 framework:
// http://simul.iro.umontreal.ca/testu01/tu01.html
- t := (*[2]uint32)(unsafe.Pointer(&mp.cheaprand))
+ t := (*[2]uint32)(unsafe.Pointer(&mp.cheaprand64))
s1, s0 := t[0], t[1]
s1 ^= s1 << 17
s1 = s1 ^ s0 ^ s1>>7 ^ s0>>16
@@ -269,7 +268,33 @@ func cheaprand() uint32 {
//go:linkname cheaprand64
//go:nosplit
func cheaprand64() int64 {
- return int64(cheaprand())<<31 ^ int64(cheaprand())
+ return int64(cheaprandu64() & ^(uint64(1) << 63))
+}
+
+// cheaprandu64 is a non-cryptographic-quality 64-bit random generator
+// suitable for calling at very high frequency (such as during sampling decisions).
+// it is "cheap" in the sense of both expense and quality.
+//
+// cheaprandu64 must not be exported to other packages:
+// the rule is that other packages using runtime-provided
+// randomness must always use rand.
+//
+//go:nosplit
+func cheaprandu64() uint64 {
+ // Implement wyrand: https://github.com/wangyi-fudan/wyhash
+ // Only the platform that bits.Mul64 can be lowered
+ // by the compiler should be in this list.
+ if goarch.IsAmd64|goarch.IsArm64|goarch.IsPpc64|
+ goarch.IsPpc64le|goarch.IsMips64|goarch.IsMips64le|
+ goarch.IsS390x|goarch.IsRiscv64|goarch.IsLoong64 == 1 {
+ mp := getg().m
+ // Implement wyrand: https://github.com/wangyi-fudan/wyhash
+ mp.cheaprand64 += 0xa0761d6478bd642f
+ hi, lo := bits.Mul64(mp.cheaprand64, mp.cheaprand64^0xe7037ed1a0b428db)
+ return hi ^ lo
+ }
+
+ return uint64(cheaprand())<<32 | uint64(cheaprand())
}
// cheaprandn is like cheaprand() % n but faster.
diff --git a/src/runtime/rand_test.go b/src/runtime/rand_test.go
index baecb6984d..79b888fcfa 100644
--- a/src/runtime/rand_test.go
+++ b/src/runtime/rand_test.go
@@ -22,6 +22,18 @@ func TestReadRandom(t *testing.T) {
}
}
+func BenchmarkCheaprand(b *testing.B) {
+ for b.Loop() {
+ Cheaprand()
+ }
+}
+
+func BenchmarkCheaprand64(b *testing.B) {
+ for b.Loop() {
+ Cheaprand64()
+ }
+}
+
func BenchmarkFastrand(b *testing.B) {
b.RunParallel(func(pb *testing.PB) {
for pb.Next() {
diff --git a/src/runtime/runtime2.go b/src/runtime/runtime2.go
index be33932b24..bf47955d0d 100644
--- a/src/runtime/runtime2.go
+++ b/src/runtime/runtime2.go
@@ -715,8 +715,9 @@ type m struct {
mOS
- chacha8 chacha8rand.State
- cheaprand uint64
+ chacha8 chacha8rand.State
+ cheaprand uint32
+ cheaprand64 uint64
// Up to 10 locks held by this m, maintained by the lock ranking code.
locksHeldLen int