aboutsummaryrefslogtreecommitdiff
path: root/src/cmd/internal/obj
diff options
context:
space:
mode:
authorRuslan Andreev <kels9009@gmail.com>2021-06-15 14:04:30 +0000
committerAustin Clements <austin@google.com>2021-09-07 20:27:30 +0000
commit23f4f0db682fad0c8d61a5b5cdbdbad4cf1cd41f (patch)
tree2c3d076097f9ff7e1289bb3e0623446694e7383c /src/cmd/internal/obj
parentd92101f452e10680ad4c8af2d5ad40d940b59214 (diff)
downloadgo-23f4f0db682fad0c8d61a5b5cdbdbad4cf1cd41f.tar.xz
cmd/compile: add prefetch intrinsic support
This CL provide new intrinsics to emit prefetch instructions for AMD64 and ARM64 platforms: Prefetch - prefetches data from memory address to cache; PrefetchStreamed - prefetches data from memory address, with a hint that this data is being streamed. This patch also provides prefetch calls pointed by RSC inside scanobject and greyobject of GC mark logic. Performance results provided by Michael: https://perf.golang.org/search?q=upload:20210901.9 Benchmark parameters: tree2 -heapsize=1000000000 -cpus=8 tree -n=18 parser peano Benchmarks AMD64 (Xeon - Cascade Lake): name old time/op new time/op delta Tree2-8 36.1ms ± 6% 33.4ms ± 5% -7.65% (p=0.000 n=9+9) Tree-8 326ms ± 1% 324ms ± 1% -0.44% (p=0.006 n=9+10) Parser-8 2.75s ± 1% 2.71s ± 1% -1.47% (p=0.008 n=5+5) Peano-8 63.1ms ± 1% 63.0ms ± 1% ~ (p=0.730 n=9+9) [Geo mean] 213ms 207ms -2.45% Benchmarks ARM64 (Kunpeng 920): name old time/op new time/op delta Tree2-8 50.3ms ± 8% 44.1ms ± 5% -12.24% (p=0.000 n=10+9) Tree-8 494ms ± 1% 493ms ± 1% ~ (p=0.684 n=10+10) Parser-8 3.99s ± 1% 3.93s ± 1% -1.37% (p=0.016 n=5+5) Peano-8 84.4ms ± 0% 84.1ms ± 1% ~ (p=0.068 n=8+10) [Geo mean] 302ms 291ms -3.67% Change-Id: I43e10bc2f9512dc49d7631dd8843a79036fa43d0 Reviewed-on: https://go-review.googlesource.com/c/go/+/328289 Reviewed-by: Austin Clements <austin@google.com> Reviewed-by: Cherry Mui <cherryyz@google.com> Run-TryBot: Austin Clements <austin@google.com> TryBot-Result: Go Bot <gobot@golang.org>
Diffstat (limited to 'src/cmd/internal/obj')
-rw-r--r--src/cmd/internal/obj/arm64/obj7.go8
1 files changed, 7 insertions, 1 deletions
diff --git a/src/cmd/internal/obj/arm64/obj7.go b/src/cmd/internal/obj/arm64/obj7.go
index a043d0972c..ae8deede3a 100644
--- a/src/cmd/internal/obj/arm64/obj7.go
+++ b/src/cmd/internal/obj/arm64/obj7.go
@@ -51,6 +51,12 @@ var complements = []obj.As{
ACMNW: ACMPW,
}
+// noZRreplace is the set of instructions for which $0 in the To operand
+// should NOT be replaced with REGZERO.
+var noZRreplace = map[obj.As]bool{
+ APRFM: true,
+}
+
func (c *ctxt7) stacksplit(p *obj.Prog, framesize int32) *obj.Prog {
// MOV g_stackguard(g), RT1
p = obj.Appendp(p, c.newprog)
@@ -226,7 +232,7 @@ func progedit(ctxt *obj.Link, p *obj.Prog, newprog obj.ProgAlloc) {
p.From.Type = obj.TYPE_REG
p.From.Reg = REGZERO
}
- if p.To.Type == obj.TYPE_CONST && p.To.Offset == 0 {
+ if p.To.Type == obj.TYPE_CONST && p.To.Offset == 0 && !noZRreplace[p.As] {
p.To.Type = obj.TYPE_REG
p.To.Reg = REGZERO
}