aboutsummaryrefslogtreecommitdiff
path: root/src/runtime/histogram.go
diff options
context:
space:
mode:
authorMichael Anthony Knyszek <mknyszek@google.com>2022-08-30 03:13:36 +0000
committerGopher Robot <gobot@golang.org>2022-09-16 16:32:01 +0000
commit87eda2a782db9b7ad2ec1fd335ed6c7472aa66bc (patch)
treeddce2915a770b3fa37fac8410352924a47055cfb /src/runtime/histogram.go
parent1fc83690e68de1ce252975c5fd3a232629d6a3d6 (diff)
downloadgo-87eda2a782db9b7ad2ec1fd335ed6c7472aa66bc.tar.xz
runtime: shrink time histogram buckets
There are lots of useless buckets with too much precision. Introduce a minimum level of precision with a minimum bucket bit. This cuts down on the size of a time histogram dramatically (~3x). Also, pick a smaller sub bucket count; we don't need 6% precision. Also, rename super-buckets to buckets to more closely line up with HDR histogram literature. Change-Id: I199449650e4b34f2a6dca3cf1d8edb071c6655c0 Reviewed-on: https://go-review.googlesource.com/c/go/+/427615 Run-TryBot: Michael Knyszek <mknyszek@google.com> Auto-Submit: Michael Knyszek <mknyszek@google.com> TryBot-Result: Gopher Robot <gobot@golang.org> Reviewed-by: Michael Pratt <mpratt@google.com>
Diffstat (limited to 'src/runtime/histogram.go')
-rw-r--r--src/runtime/histogram.go169
1 files changed, 95 insertions, 74 deletions
diff --git a/src/runtime/histogram.go b/src/runtime/histogram.go
index d2e6367c84..43dfe61901 100644
--- a/src/runtime/histogram.go
+++ b/src/runtime/histogram.go
@@ -12,63 +12,77 @@ import (
const (
// For the time histogram type, we use an HDR histogram.
- // Values are placed in super-buckets based solely on the most
- // significant set bit. Thus, super-buckets are power-of-2 sized.
+ // Values are placed in buckets based solely on the most
+ // significant set bit. Thus, buckets are power-of-2 sized.
// Values are then placed into sub-buckets based on the value of
// the next timeHistSubBucketBits most significant bits. Thus,
- // sub-buckets are linear within a super-bucket.
+ // sub-buckets are linear within a bucket.
//
// Therefore, the number of sub-buckets (timeHistNumSubBuckets)
// defines the error. This error may be computed as
// 1/timeHistNumSubBuckets*100%. For example, for 16 sub-buckets
- // per super-bucket the error is approximately 6%.
+ // per bucket the error is approximately 6%.
//
- // The number of super-buckets (timeHistNumSuperBuckets), on the
- // other hand, defines the range. To reserve room for sub-buckets,
- // bit timeHistSubBucketBits is the first bit considered for
- // super-buckets, so super-bucket indices are adjusted accordingly.
+ // The number of buckets (timeHistNumBuckets), on the
+ // other hand, defines the range. To avoid producing a large number
+ // of buckets that are close together, especially for small numbers
+ // (e.g. 1, 2, 3, 4, 5 ns) that aren't very useful, timeHistNumBuckets
+ // is defined in terms of the least significant bit (timeHistMinBucketBits)
+ // that needs to be set before we start bucketing and the most
+ // significant bit (timeHistMaxBucketBits) that we bucket before we just
+ // dump it into a catch-all bucket.
//
- // As an example, consider 45 super-buckets with 16 sub-buckets.
+ // As an example, consider the configuration:
//
- // 00110
- // ^----
- // │ ^
- // │ └---- Lowest 4 bits -> sub-bucket 6
- // └------- Bit 4 unset -> super-bucket 0
+ // timeHistMinBucketBits = 9
+ // timeHistMaxBucketBits = 48
+ // timeHistSubBucketBits = 2
//
- // 10110
- // ^----
- // │ ^
- // │ └---- Next 4 bits -> sub-bucket 6
- // └------- Bit 4 set -> super-bucket 1
- // 100010
- // ^----^
- // │ ^ └-- Lower bits ignored
- // │ └---- Next 4 bits -> sub-bucket 1
- // └------- Bit 5 set -> super-bucket 2
+ // Then:
//
- // Following this pattern, super-bucket 44 will have the bit 47 set. We don't
- // have any buckets for higher values, so the highest sub-bucket will
- // contain values of 2^48-1 nanoseconds or approx. 3 days. This range is
- // more than enough to handle durations produced by the runtime.
- timeHistSubBucketBits = 4
- timeHistNumSubBuckets = 1 << timeHistSubBucketBits
- timeHistNumSuperBuckets = 45
- timeHistTotalBuckets = timeHistNumSuperBuckets*timeHistNumSubBuckets + 1
+ // 011000001
+ // ^--
+ // │ ^
+ // │ └---- Next 2 bits -> sub-bucket 3
+ // └------- Bit 9 unset -> bucket 0
+ //
+ // 110000001
+ // ^--
+ // │ ^
+ // │ └---- Next 2 bits -> sub-bucket 2
+ // └------- Bit 9 set -> bucket 1
+ //
+ // 1000000010
+ // ^-- ^
+ // │ ^ └-- Lower bits ignored
+ // │ └---- Next 2 bits -> sub-bucket 0
+ // └------- Bit 10 set -> bucket 2
+ //
+ // Following this pattern, bucket 38 will have the bit 46 set. We don't
+ // have any buckets for higher values, so we spill the rest into an overflow
+ // bucket containing values of 2^47-1 nanoseconds or approx. 1 day or more.
+ // This range is more than enough to handle durations produced by the runtime.
+ timeHistMinBucketBits = 9
+ timeHistMaxBucketBits = 48 // Note that this is exclusive; 1 higher than the actual range.
+ timeHistSubBucketBits = 2
+ timeHistNumSubBuckets = 1 << timeHistSubBucketBits
+ timeHistNumBuckets = timeHistMaxBucketBits - timeHistMinBucketBits + 1
+ // Two extra buckets, one for underflow, one for overflow.
+ timeHistTotalBuckets = timeHistNumBuckets*timeHistNumSubBuckets + 2
)
// timeHistogram represents a distribution of durations in
// nanoseconds.
//
// The accuracy and range of the histogram is defined by the
-// timeHistSubBucketBits and timeHistNumSuperBuckets constants.
+// timeHistSubBucketBits and timeHistNumBuckets constants.
//
// It is an HDR histogram with exponentially-distributed
// buckets and linearly distributed sub-buckets.
//
// The histogram is safe for concurrent reads and writes.
type timeHistogram struct {
- counts [timeHistNumSuperBuckets * timeHistNumSubBuckets]atomic.Uint64
+ counts [timeHistNumBuckets * timeHistNumSubBuckets]atomic.Uint64
// underflow counts all the times we got a negative duration
// sample. Because of how time works on some platforms, it's
@@ -76,6 +90,10 @@ type timeHistogram struct {
// but we record them anyway because it's better to have some
// signal that it's happening than just missing samples.
underflow atomic.Uint64
+
+ // overflow counts all the times we got a duration that exceeded
+ // the range counts represents.
+ overflow atomic.Uint64
}
// record adds the given duration to the distribution.
@@ -85,36 +103,35 @@ type timeHistogram struct {
//
//go:nosplit
func (h *timeHistogram) record(duration int64) {
+ // If the duration is negative, capture that in underflow.
if duration < 0 {
h.underflow.Add(1)
return
}
- // The index of the exponential bucket is just the index
- // of the highest set bit adjusted for how many bits we
- // use for the subbucket. Note that it's timeHistSubBucketsBits-1
- // because we use the 0th bucket to hold values < timeHistNumSubBuckets.
- var superBucket, subBucket uint
- if duration >= timeHistNumSubBuckets {
- // At this point, we know the duration value will always be
- // at least timeHistSubBucketsBits long.
- superBucket = uint(sys.Len64(uint64(duration))) - timeHistSubBucketBits
- if superBucket*timeHistNumSubBuckets >= uint(len(h.counts)) {
- // The bucket index we got is larger than what we support, so
- // include this count in the highest bucket, which extends to
- // infinity.
- superBucket = timeHistNumSuperBuckets - 1
- subBucket = timeHistNumSubBuckets - 1
- } else {
- // The linear subbucket index is just the timeHistSubBucketsBits
- // bits after the top bit. To extract that value, shift down
- // the duration such that we leave the top bit and the next bits
- // intact, then extract the index.
- subBucket = uint((duration >> (superBucket - 1)) % timeHistNumSubBuckets)
- }
+ // bucketBit is the target bit for the bucket which is usually the
+ // highest 1 bit, but if we're less than the minimum, is the highest
+ // 1 bit of the minimum (which will be zero in the duration).
+ //
+ // bucket is the bucket index, which is the bucketBit minus the
+ // highest bit of the minimum, plus one to leave room for the catch-all
+ // bucket for samples lower than the minimum.
+ var bucketBit, bucket uint
+ if l := sys.Len64(uint64(duration)); l < timeHistMinBucketBits {
+ bucketBit = timeHistMinBucketBits
+ bucket = 0 // bucketBit - timeHistMinBucketBits
} else {
- subBucket = uint(duration)
+ bucketBit = uint(l)
+ bucket = bucketBit - timeHistMinBucketBits + 1
+ }
+ // If the bucket we computed is greater than the number of buckets,
+ // count that in overflow.
+ if bucket >= timeHistNumBuckets {
+ h.overflow.Add(1)
+ return
}
- h.counts[superBucket*timeHistNumSubBuckets+subBucket].Add(1)
+ // The sub-bucket index is just next timeHistSubBucketBits after the bucketBit.
+ subBucket := uint(duration>>(bucketBit-1-timeHistSubBucketBits)) % timeHistNumSubBuckets
+ h.counts[bucket*timeHistNumSubBuckets+subBucket].Add(1)
}
const (
@@ -137,33 +154,37 @@ func float64NegInf() float64 {
// not nanoseconds like the timeHistogram represents durations.
func timeHistogramMetricsBuckets() []float64 {
b := make([]float64, timeHistTotalBuckets+1)
+ // Underflow bucket.
b[0] = float64NegInf()
- // Super-bucket 0 has no bits above timeHistSubBucketBits
- // set, so just iterate over each bucket and assign the
- // incrementing bucket.
- for i := 0; i < timeHistNumSubBuckets; i++ {
- bucketNanos := uint64(i)
- b[i+1] = float64(bucketNanos) / 1e9
+
+ for j := 0; j < timeHistNumSubBuckets; j++ {
+ // No bucket bit for the first few buckets. Just sub-bucket bits after the
+ // min bucket bit.
+ bucketNanos := uint64(j) << (timeHistMinBucketBits - 1 - timeHistSubBucketBits)
+ // Convert nanoseconds to seconds via a division.
+ // These values will all be exactly representable by a float64.
+ b[j+1] = float64(bucketNanos) / 1e9
}
- // Generate the rest of the super-buckets. It's easier to reason
- // about if we cut out the 0'th bucket, so subtract one since
- // we just handled that bucket.
- for i := 0; i < timeHistNumSuperBuckets-1; i++ {
+ // Generate the rest of the buckets. It's easier to reason
+ // about if we cut out the 0'th bucket.
+ for i := timeHistMinBucketBits; i < timeHistMaxBucketBits; i++ {
for j := 0; j < timeHistNumSubBuckets; j++ {
- // Set the super-bucket bit.
- bucketNanos := uint64(1) << (i + timeHistSubBucketBits)
+ // Set the bucket bit.
+ bucketNanos := uint64(1) << (i - 1)
// Set the sub-bucket bits.
- bucketNanos |= uint64(j) << i
- // The index for this bucket is going to be the (i+1)'th super bucket
- // (note that we're starting from zero, but handled the first super-bucket
+ bucketNanos |= uint64(j) << (i - 1 - timeHistSubBucketBits)
+ // The index for this bucket is going to be the (i+1)'th bucket
+ // (note that we're starting from zero, but handled the first bucket
// earlier, so we need to compensate), and the j'th sub bucket.
// Add 1 because we left space for -Inf.
- bucketIndex := (i+1)*timeHistNumSubBuckets + j + 1
+ bucketIndex := (i-timeHistMinBucketBits+1)*timeHistNumSubBuckets + j + 1
// Convert nanoseconds to seconds via a division.
// These values will all be exactly representable by a float64.
b[bucketIndex] = float64(bucketNanos) / 1e9
}
}
+ // Overflow bucket.
+ b[len(b)-2] = float64(uint64(1)<<(timeHistMaxBucketBits-1)) / 1e9
b[len(b)-1] = float64Inf()
return b
}