diff options
| author | Klaus Post <klauspost@gmail.com> | 2025-07-23 11:23:52 +0000 |
|---|---|---|
| committer | Gopher Robot <gobot@golang.org> | 2025-07-24 10:19:17 -0700 |
| commit | 18dbe5b941e03a61cebbb441a9e4dfef43adf425 (patch) | |
| tree | 60a83bb586be044fdeabc6099650d0ca9785289b /src/internal/cpu | |
| parent | c641900f72a595ff2e826367b64e3e418c265409 (diff) | |
| download | go-18dbe5b941e03a61cebbb441a9e4dfef43adf425.tar.xz | |
hash/crc32: add AVX512 IEEE CRC32 calculation
Benchmark:
goos: windows
goarch: amd64
pkg: hash/crc32
cpu: AMD Ryzen 9 9950X 16-Core Processor
benchmark old MB/s new MB/s speedup
BenchmarkCRC32/poly=IEEE/size=15/align=0-32 1081.48 1089.42 1.01x
BenchmarkCRC32/poly=IEEE/size=15/align=1-32 1085.87 1082.61 1.00x
BenchmarkCRC32/poly=IEEE/size=40/align=0-32 2756.33 2752.37 1.00x
BenchmarkCRC32/poly=IEEE/size=40/align=1-32 2758.27 2756.99 1.00x
BenchmarkCRC32/poly=IEEE/size=512/align=0-32 18133.44 18076.52 1.00x
BenchmarkCRC32/poly=IEEE/size=512/align=1-32 18151.05 18055.41 0.99x
BenchmarkCRC32/poly=IEEE/size=1kB/align=0-32 19902.93 48581.07 2.44x
BenchmarkCRC32/poly=IEEE/size=1kB/align=1-32 19966.99 48393.25 2.42x
BenchmarkCRC32/poly=IEEE/size=4kB/align=0-32 21690.33 51679.25 2.38x
BenchmarkCRC32/poly=IEEE/size=4kB/align=1-32 21655.30 51731.22 2.39x
BenchmarkCRC32/poly=IEEE/size=32kB/align=0-32 22046.57 46406.90 2.10x
BenchmarkCRC32/poly=IEEE/size=32kB/align=1-32 21986.22 46250.66 2.10x
AVX512 are enabled above 1KB input size.
This rather high limit is due to AVX512 may be slower to ramp up
than the regular SSE4 implementation for smaller inputs.
This is not reflected in the benchmarks,
since consecutive calls means the CPU is "hot".
The 'HasAVX512VPCLMULQDQ' name mirrors the one in golang.org/x/sys/cpu
Change-Id: Id23685d8e3cc412b6d397a7d70056844bdb79271
Change-Id: Id23685d8e3cc412b6d397a7d70056844bdb79271
GitHub-Last-Rev: 6639f07b9febc7c96a7f3b402a2fd60f7be5e154
GitHub-Pull-Request: golang/go#74701
Reviewed-on: https://go-review.googlesource.com/c/go/+/689435
Reviewed-by: Keith Randall <khr@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Michael Knyszek <mknyszek@google.com>
Auto-Submit: Keith Randall <khr@golang.org>
Auto-Submit: Michael Knyszek <mknyszek@google.com>
Reviewed-by: Keith Randall <khr@golang.org>
Diffstat (limited to 'src/internal/cpu')
| -rw-r--r-- | src/internal/cpu/cpu.go | 47 | ||||
| -rw-r--r-- | src/internal/cpu/cpu_x86.go | 8 |
2 files changed, 31 insertions, 24 deletions
diff --git a/src/internal/cpu/cpu.go b/src/internal/cpu/cpu.go index 760dc0b469..6017b1acc9 100644 --- a/src/internal/cpu/cpu.go +++ b/src/internal/cpu/cpu.go @@ -26,29 +26,30 @@ var CacheLineSize uintptr = CacheLinePadSize // in addition to the cpuid feature bit being set. // The struct is padded to avoid false sharing. var X86 struct { - _ CacheLinePad - HasAES bool - HasADX bool - HasAVX bool - HasAVX2 bool - HasAVX512F bool - HasAVX512BW bool - HasAVX512VL bool - HasBMI1 bool - HasBMI2 bool - HasERMS bool - HasFSRM bool - HasFMA bool - HasOSXSAVE bool - HasPCLMULQDQ bool - HasPOPCNT bool - HasRDTSCP bool - HasSHA bool - HasSSE3 bool - HasSSSE3 bool - HasSSE41 bool - HasSSE42 bool - _ CacheLinePad + _ CacheLinePad + HasAES bool + HasADX bool + HasAVX bool + HasAVX2 bool + HasAVX512F bool + HasAVX512BW bool + HasAVX512VL bool + HasBMI1 bool + HasBMI2 bool + HasERMS bool + HasFSRM bool + HasFMA bool + HasOSXSAVE bool + HasPCLMULQDQ bool + HasPOPCNT bool + HasRDTSCP bool + HasSHA bool + HasSSE3 bool + HasSSSE3 bool + HasSSE41 bool + HasSSE42 bool + HasAVX512VPCLMULQDQ bool + _ CacheLinePad } // The booleans in ARM contain the correspondingly named cpu feature bit. diff --git a/src/internal/cpu/cpu_x86.go b/src/internal/cpu/cpu_x86.go index ee812076e9..69b9542ae2 100644 --- a/src/internal/cpu/cpu_x86.go +++ b/src/internal/cpu/cpu_x86.go @@ -40,6 +40,10 @@ const ( cpuid_SHA = 1 << 29 cpuid_AVX512BW = 1 << 30 cpuid_AVX512VL = 1 << 31 + + // ecx bits + cpuid_AVX512VPCLMULQDQ = 1 << 10 + // edx bits cpuid_FSRM = 1 << 4 // edx bits for CPUID 0x80000001 @@ -57,6 +61,7 @@ func doinit() { {Name: "pclmulqdq", Feature: &X86.HasPCLMULQDQ}, {Name: "rdtscp", Feature: &X86.HasRDTSCP}, {Name: "sha", Feature: &X86.HasSHA}, + {Name: "vpclmulqdq", Feature: &X86.HasAVX512VPCLMULQDQ}, } level := getGOAMD64level() if level < 2 { @@ -139,7 +144,7 @@ func doinit() { return } - _, ebx7, _, edx7 := cpuid(7, 0) + _, ebx7, ecx7, edx7 := cpuid(7, 0) X86.HasBMI1 = isSet(ebx7, cpuid_BMI1) X86.HasAVX2 = isSet(ebx7, cpuid_AVX2) && osSupportsAVX X86.HasBMI2 = isSet(ebx7, cpuid_BMI2) @@ -151,6 +156,7 @@ func doinit() { if X86.HasAVX512F { X86.HasAVX512BW = isSet(ebx7, cpuid_AVX512BW) X86.HasAVX512VL = isSet(ebx7, cpuid_AVX512VL) + X86.HasAVX512VPCLMULQDQ = isSet(ecx7, cpuid_AVX512VPCLMULQDQ) } X86.HasFSRM = isSet(edx7, cpuid_FSRM) |
