From 23f4f0db682fad0c8d61a5b5cdbdbad4cf1cd41f Mon Sep 17 00:00:00 2001 From: Ruslan Andreev Date: Tue, 15 Jun 2021 14:04:30 +0000 Subject: cmd/compile: add prefetch intrinsic support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This CL provide new intrinsics to emit prefetch instructions for AMD64 and ARM64 platforms: Prefetch - prefetches data from memory address to cache; PrefetchStreamed - prefetches data from memory address, with a hint that this data is being streamed. This patch also provides prefetch calls pointed by RSC inside scanobject and greyobject of GC mark logic. Performance results provided by Michael: https://perf.golang.org/search?q=upload:20210901.9 Benchmark parameters: tree2 -heapsize=1000000000 -cpus=8 tree -n=18 parser peano Benchmarks AMD64 (Xeon - Cascade Lake): name old time/op new time/op delta Tree2-8 36.1ms ± 6% 33.4ms ± 5% -7.65% (p=0.000 n=9+9) Tree-8 326ms ± 1% 324ms ± 1% -0.44% (p=0.006 n=9+10) Parser-8 2.75s ± 1% 2.71s ± 1% -1.47% (p=0.008 n=5+5) Peano-8 63.1ms ± 1% 63.0ms ± 1% ~ (p=0.730 n=9+9) [Geo mean] 213ms 207ms -2.45% Benchmarks ARM64 (Kunpeng 920): name old time/op new time/op delta Tree2-8 50.3ms ± 8% 44.1ms ± 5% -12.24% (p=0.000 n=10+9) Tree-8 494ms ± 1% 493ms ± 1% ~ (p=0.684 n=10+10) Parser-8 3.99s ± 1% 3.93s ± 1% -1.37% (p=0.016 n=5+5) Peano-8 84.4ms ± 0% 84.1ms ± 1% ~ (p=0.068 n=8+10) [Geo mean] 302ms 291ms -3.67% Change-Id: I43e10bc2f9512dc49d7631dd8843a79036fa43d0 Reviewed-on: https://go-review.googlesource.com/c/go/+/328289 Reviewed-by: Austin Clements Reviewed-by: Cherry Mui Run-TryBot: Austin Clements TryBot-Result: Go Bot --- src/runtime/internal/sys/intrinsics_common.go | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'src/runtime/internal') diff --git a/src/runtime/internal/sys/intrinsics_common.go b/src/runtime/internal/sys/intrinsics_common.go index 818d75ecc5..48d9759ca9 100644 --- a/src/runtime/internal/sys/intrinsics_common.go +++ b/src/runtime/internal/sys/intrinsics_common.go @@ -141,3 +141,18 @@ func TrailingZeros8(x uint8) int { func Len8(x uint8) int { return int(len8tab[x]) } + +// Prefetch prefetches data from memory addr to cache +// +// AMD64: Produce PREFETCHT0 instruction +// +// ARM64: Produce PRFM instruction with PLDL1KEEP option +func Prefetch(addr uintptr) {} + +// PrefetchStreamed prefetches data from memory addr, with a hint that this data is being streamed. +// That is, it is likely to be accessed very soon, but only once. If possible, this will avoid polluting the cache. +// +// AMD64: Produce PREFETCHNTA instruction +// +// ARM64: Produce PRFM instruction with PLDL1STRM option +func PrefetchStreamed(addr uintptr) {} -- cgit v1.3-5-g9baa