From 23f4f0db682fad0c8d61a5b5cdbdbad4cf1cd41f Mon Sep 17 00:00:00 2001 From: Ruslan Andreev Date: Tue, 15 Jun 2021 14:04:30 +0000 Subject: cmd/compile: add prefetch intrinsic support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This CL provide new intrinsics to emit prefetch instructions for AMD64 and ARM64 platforms: Prefetch - prefetches data from memory address to cache; PrefetchStreamed - prefetches data from memory address, with a hint that this data is being streamed. This patch also provides prefetch calls pointed by RSC inside scanobject and greyobject of GC mark logic. Performance results provided by Michael: https://perf.golang.org/search?q=upload:20210901.9 Benchmark parameters: tree2 -heapsize=1000000000 -cpus=8 tree -n=18 parser peano Benchmarks AMD64 (Xeon - Cascade Lake): name old time/op new time/op delta Tree2-8 36.1ms ± 6% 33.4ms ± 5% -7.65% (p=0.000 n=9+9) Tree-8 326ms ± 1% 324ms ± 1% -0.44% (p=0.006 n=9+10) Parser-8 2.75s ± 1% 2.71s ± 1% -1.47% (p=0.008 n=5+5) Peano-8 63.1ms ± 1% 63.0ms ± 1% ~ (p=0.730 n=9+9) [Geo mean] 213ms 207ms -2.45% Benchmarks ARM64 (Kunpeng 920): name old time/op new time/op delta Tree2-8 50.3ms ± 8% 44.1ms ± 5% -12.24% (p=0.000 n=10+9) Tree-8 494ms ± 1% 493ms ± 1% ~ (p=0.684 n=10+10) Parser-8 3.99s ± 1% 3.93s ± 1% -1.37% (p=0.016 n=5+5) Peano-8 84.4ms ± 0% 84.1ms ± 1% ~ (p=0.068 n=8+10) [Geo mean] 302ms 291ms -3.67% Change-Id: I43e10bc2f9512dc49d7631dd8843a79036fa43d0 Reviewed-on: https://go-review.googlesource.com/c/go/+/328289 Reviewed-by: Austin Clements Reviewed-by: Cherry Mui Run-TryBot: Austin Clements TryBot-Result: Go Bot --- src/cmd/internal/obj/arm64/obj7.go | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'src/cmd/internal/obj') diff --git a/src/cmd/internal/obj/arm64/obj7.go b/src/cmd/internal/obj/arm64/obj7.go index a043d0972c..ae8deede3a 100644 --- a/src/cmd/internal/obj/arm64/obj7.go +++ b/src/cmd/internal/obj/arm64/obj7.go @@ -51,6 +51,12 @@ var complements = []obj.As{ ACMNW: ACMPW, } +// noZRreplace is the set of instructions for which $0 in the To operand +// should NOT be replaced with REGZERO. +var noZRreplace = map[obj.As]bool{ + APRFM: true, +} + func (c *ctxt7) stacksplit(p *obj.Prog, framesize int32) *obj.Prog { // MOV g_stackguard(g), RT1 p = obj.Appendp(p, c.newprog) @@ -226,7 +232,7 @@ func progedit(ctxt *obj.Link, p *obj.Prog, newprog obj.ProgAlloc) { p.From.Type = obj.TYPE_REG p.From.Reg = REGZERO } - if p.To.Type == obj.TYPE_CONST && p.To.Offset == 0 { + if p.To.Type == obj.TYPE_CONST && p.To.Offset == 0 && !noZRreplace[p.As] { p.To.Type = obj.TYPE_REG p.To.Reg = REGZERO } -- cgit v1.3