aboutsummaryrefslogtreecommitdiff
path: root/src/internal/cpu
diff options
context:
space:
mode:
authorKlaus Post <klauspost@gmail.com>2025-07-23 11:23:52 +0000
committerGopher Robot <gobot@golang.org>2025-07-24 10:19:17 -0700
commit18dbe5b941e03a61cebbb441a9e4dfef43adf425 (patch)
tree60a83bb586be044fdeabc6099650d0ca9785289b /src/internal/cpu
parentc641900f72a595ff2e826367b64e3e418c265409 (diff)
downloadgo-18dbe5b941e03a61cebbb441a9e4dfef43adf425.tar.xz
hash/crc32: add AVX512 IEEE CRC32 calculation
Benchmark: goos: windows goarch: amd64 pkg: hash/crc32 cpu: AMD Ryzen 9 9950X 16-Core Processor benchmark old MB/s new MB/s speedup BenchmarkCRC32/poly=IEEE/size=15/align=0-32 1081.48 1089.42 1.01x BenchmarkCRC32/poly=IEEE/size=15/align=1-32 1085.87 1082.61 1.00x BenchmarkCRC32/poly=IEEE/size=40/align=0-32 2756.33 2752.37 1.00x BenchmarkCRC32/poly=IEEE/size=40/align=1-32 2758.27 2756.99 1.00x BenchmarkCRC32/poly=IEEE/size=512/align=0-32 18133.44 18076.52 1.00x BenchmarkCRC32/poly=IEEE/size=512/align=1-32 18151.05 18055.41 0.99x BenchmarkCRC32/poly=IEEE/size=1kB/align=0-32 19902.93 48581.07 2.44x BenchmarkCRC32/poly=IEEE/size=1kB/align=1-32 19966.99 48393.25 2.42x BenchmarkCRC32/poly=IEEE/size=4kB/align=0-32 21690.33 51679.25 2.38x BenchmarkCRC32/poly=IEEE/size=4kB/align=1-32 21655.30 51731.22 2.39x BenchmarkCRC32/poly=IEEE/size=32kB/align=0-32 22046.57 46406.90 2.10x BenchmarkCRC32/poly=IEEE/size=32kB/align=1-32 21986.22 46250.66 2.10x AVX512 are enabled above 1KB input size. This rather high limit is due to AVX512 may be slower to ramp up than the regular SSE4 implementation for smaller inputs. This is not reflected in the benchmarks, since consecutive calls means the CPU is "hot". The 'HasAVX512VPCLMULQDQ' name mirrors the one in golang.org/x/sys/cpu Change-Id: Id23685d8e3cc412b6d397a7d70056844bdb79271 Change-Id: Id23685d8e3cc412b6d397a7d70056844bdb79271 GitHub-Last-Rev: 6639f07b9febc7c96a7f3b402a2fd60f7be5e154 GitHub-Pull-Request: golang/go#74701 Reviewed-on: https://go-review.googlesource.com/c/go/+/689435 Reviewed-by: Keith Randall <khr@google.com> LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com> Reviewed-by: Michael Knyszek <mknyszek@google.com> Auto-Submit: Keith Randall <khr@golang.org> Auto-Submit: Michael Knyszek <mknyszek@google.com> Reviewed-by: Keith Randall <khr@golang.org>
Diffstat (limited to 'src/internal/cpu')
-rw-r--r--src/internal/cpu/cpu.go47
-rw-r--r--src/internal/cpu/cpu_x86.go8
2 files changed, 31 insertions, 24 deletions
diff --git a/src/internal/cpu/cpu.go b/src/internal/cpu/cpu.go
index 760dc0b469..6017b1acc9 100644
--- a/src/internal/cpu/cpu.go
+++ b/src/internal/cpu/cpu.go
@@ -26,29 +26,30 @@ var CacheLineSize uintptr = CacheLinePadSize
// in addition to the cpuid feature bit being set.
// The struct is padded to avoid false sharing.
var X86 struct {
- _ CacheLinePad
- HasAES bool
- HasADX bool
- HasAVX bool
- HasAVX2 bool
- HasAVX512F bool
- HasAVX512BW bool
- HasAVX512VL bool
- HasBMI1 bool
- HasBMI2 bool
- HasERMS bool
- HasFSRM bool
- HasFMA bool
- HasOSXSAVE bool
- HasPCLMULQDQ bool
- HasPOPCNT bool
- HasRDTSCP bool
- HasSHA bool
- HasSSE3 bool
- HasSSSE3 bool
- HasSSE41 bool
- HasSSE42 bool
- _ CacheLinePad
+ _ CacheLinePad
+ HasAES bool
+ HasADX bool
+ HasAVX bool
+ HasAVX2 bool
+ HasAVX512F bool
+ HasAVX512BW bool
+ HasAVX512VL bool
+ HasBMI1 bool
+ HasBMI2 bool
+ HasERMS bool
+ HasFSRM bool
+ HasFMA bool
+ HasOSXSAVE bool
+ HasPCLMULQDQ bool
+ HasPOPCNT bool
+ HasRDTSCP bool
+ HasSHA bool
+ HasSSE3 bool
+ HasSSSE3 bool
+ HasSSE41 bool
+ HasSSE42 bool
+ HasAVX512VPCLMULQDQ bool
+ _ CacheLinePad
}
// The booleans in ARM contain the correspondingly named cpu feature bit.
diff --git a/src/internal/cpu/cpu_x86.go b/src/internal/cpu/cpu_x86.go
index ee812076e9..69b9542ae2 100644
--- a/src/internal/cpu/cpu_x86.go
+++ b/src/internal/cpu/cpu_x86.go
@@ -40,6 +40,10 @@ const (
cpuid_SHA = 1 << 29
cpuid_AVX512BW = 1 << 30
cpuid_AVX512VL = 1 << 31
+
+ // ecx bits
+ cpuid_AVX512VPCLMULQDQ = 1 << 10
+
// edx bits
cpuid_FSRM = 1 << 4
// edx bits for CPUID 0x80000001
@@ -57,6 +61,7 @@ func doinit() {
{Name: "pclmulqdq", Feature: &X86.HasPCLMULQDQ},
{Name: "rdtscp", Feature: &X86.HasRDTSCP},
{Name: "sha", Feature: &X86.HasSHA},
+ {Name: "vpclmulqdq", Feature: &X86.HasAVX512VPCLMULQDQ},
}
level := getGOAMD64level()
if level < 2 {
@@ -139,7 +144,7 @@ func doinit() {
return
}
- _, ebx7, _, edx7 := cpuid(7, 0)
+ _, ebx7, ecx7, edx7 := cpuid(7, 0)
X86.HasBMI1 = isSet(ebx7, cpuid_BMI1)
X86.HasAVX2 = isSet(ebx7, cpuid_AVX2) && osSupportsAVX
X86.HasBMI2 = isSet(ebx7, cpuid_BMI2)
@@ -151,6 +156,7 @@ func doinit() {
if X86.HasAVX512F {
X86.HasAVX512BW = isSet(ebx7, cpuid_AVX512BW)
X86.HasAVX512VL = isSet(ebx7, cpuid_AVX512VL)
+ X86.HasAVX512VPCLMULQDQ = isSet(ecx7, cpuid_AVX512VPCLMULQDQ)
}
X86.HasFSRM = isSet(edx7, cpuid_FSRM)