aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorvpachkov <slava.pach@gmail.com>2021-11-29 15:20:37 +0300
committerKeith Randall <khr@golang.org>2022-03-31 19:46:55 +0000
commit12acf9b0f009305eefc71d4cee8808cc244e86aa (patch)
tree74bfe36e1c31cbb32a0de6bf5ad8f803c9e53316 /src
parentcc3a3519af5b8b4cf26bf27133675776fdfcaeb9 (diff)
downloadgo-12acf9b0f009305eefc71d4cee8808cc244e86aa.tar.xz
runtime: remove AVX2less code in memclrNoHeapPointers for GOAMD64 >= 3
Optimize memclr by removing simple case loop along with the runtime check since AVX2 is guaranteed to be available when compiling with GOAMD64 >= 3 name old speed new speed delta Memclr/5-12 2.70GB/s ± 1% 2.73GB/s ± 1% ~ (p=0.056 n=5+5) Memclr/16-12 7.00GB/s ± 2% 7.03GB/s ± 1% ~ (p=1.000 n=5+5) Memclr/64-12 25.5GB/s ± 1% 25.5GB/s ± 1% ~ (p=0.548 n=5+5) Memclr/256-12 53.4GB/s ± 1% 52.7GB/s ± 2% ~ (p=0.222 n=5+5) Memclr/4096-12 109GB/s ± 1% 129GB/s ± 0% +18.57% (p=0.008 n=5+5) Memclr/65536-12 75.2GB/s ± 2% 78.3GB/s ± 3% +4.14% (p=0.008 n=5+5) Memclr/1M-12 53.5GB/s ± 2% 54.1GB/s ± 2% ~ (p=0.310 n=5+5) Memclr/4M-12 53.1GB/s ± 3% 52.9GB/s ± 2% ~ (p=1.000 n=5+5) Memclr/8M-12 44.6GB/s ± 3% 45.1GB/s ± 3% ~ (p=0.310 n=5+5) Memclr/16M-12 24.8GB/s ± 2% 24.2GB/s ± 2% ~ (p=0.056 n=5+5) Memclr/64M-12 38.3GB/s ± 1% 37.8GB/s ± 1% ~ (p=0.056 n=5+5) [Geo mean] 31.0GB/s 31.5GB/s +1.78% Change-Id: I6f3014f6338cb3b5a1b94503faa205f043fe2de8 Reviewed-on: https://go-review.googlesource.com/c/go/+/367494 Trust: Cherry Mui <cherryyz@google.com> Trust: Daniel Martí <mvdan@mvdan.cc> Run-TryBot: Daniel Martí <mvdan@mvdan.cc> TryBot-Result: Gopher Robot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
Diffstat (limited to 'src')
-rw-r--r--src/cmd/dist/build.go2
-rw-r--r--src/runtime/asm_amd64.h14
-rw-r--r--src/runtime/memclr_amd64.s4
3 files changed, 20 insertions, 0 deletions
diff --git a/src/cmd/dist/build.go b/src/cmd/dist/build.go
index d224cef2a8..db2ac1f2a6 100644
--- a/src/cmd/dist/build.go
+++ b/src/cmd/dist/build.go
@@ -732,6 +732,8 @@ func runInstall(pkg string, ch chan struct{}) {
pathf("%s/src/runtime/funcdata.h", goroot), 0)
copyfile(pathf("%s/pkg/include/asm_ppc64x.h", goroot),
pathf("%s/src/runtime/asm_ppc64x.h", goroot), 0)
+ copyfile(pathf("%s/pkg/include/asm_amd64.h", goroot),
+ pathf("%s/src/runtime/asm_amd64.h", goroot), 0)
}
// Generate any missing files; regenerate existing ones.
diff --git a/src/runtime/asm_amd64.h b/src/runtime/asm_amd64.h
new file mode 100644
index 0000000000..49e0ee2323
--- /dev/null
+++ b/src/runtime/asm_amd64.h
@@ -0,0 +1,14 @@
+// Copyright 2021 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Define features that are guaranteed to be supported by setting the AMD64 variable.
+// If a feature is supported, there's no need to check it at runtime every time.
+
+#ifdef GOAMD64_v3
+#define hasAVX2
+#endif
+
+#ifdef GOAMD64_v4
+#define hasAVX2
+#endif
diff --git a/src/runtime/memclr_amd64.s b/src/runtime/memclr_amd64.s
index 700bbd7b9b..26a6205e61 100644
--- a/src/runtime/memclr_amd64.s
+++ b/src/runtime/memclr_amd64.s
@@ -6,6 +6,7 @@
#include "go_asm.h"
#include "textflag.h"
+#include "asm_amd64.h"
// See memclrNoHeapPointers Go doc for important implementation constraints.
@@ -39,6 +40,8 @@ tail:
JBE _65through128
CMPQ BX, $256
JBE _129through256
+
+#ifndef hasAVX2
CMPB internal∕cpu·X86+const_offsetX86HasAVX2(SB), $1
JE loop_preheader_avx2
// TODO: for really big clears, use MOVNTDQ, even without AVX2.
@@ -65,6 +68,7 @@ loop:
CMPQ BX, $256
JAE loop
JMP tail
+#endif
loop_preheader_avx2:
VPXOR Y0, Y0, Y0