aboutsummaryrefslogtreecommitdiff
path: root/src/runtime/asm_amd64.s
diff options
context:
space:
mode:
authorCherry Mui <cherryyz@google.com>2025-12-08 12:14:24 -0500
committerCherry Mui <cherryyz@google.com>2025-12-08 14:10:09 -0800
commitf38e968abafde345fa470cb14d55b6f092af569f (patch)
tree6534497ff523ce1abc3faa77ed6819a3fc2b31ec /src/runtime/asm_amd64.s
parent144cf17d2c444a530d7c08c5870dc8e70bec2c72 (diff)
downloadgo-f38e968abafde345fa470cb14d55b6f092af569f.tar.xz
[dev.simd] cmd/compile: zero only low 128-bit of X15
Zeroing the upper part of X15 may make the CPU think it is "dirty" and slow down SSE operations. For now, just not zeroing the upper part, and construct a zero value on the fly if we need a 256- or 512-bit zero value. Maybe VZEROUPPER works better than explicitly zeroing X15, but we need to evaluate. Long term, we probably want to move more things from SSE to AVX. This essentially undoes CL 698237 and CL 698238, except keeping using X15 for 128-bit zeroing for SIMD. Change-Id: I1564e6332c4c57f9721397c92c7c734c5497534c Reviewed-on: https://go-review.googlesource.com/c/go/+/728240 LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com> Reviewed-by: David Chase <drchase@google.com>
Diffstat (limited to 'src/runtime/asm_amd64.s')
-rw-r--r--src/runtime/asm_amd64.s10
1 files changed, 0 insertions, 10 deletions
diff --git a/src/runtime/asm_amd64.s b/src/runtime/asm_amd64.s
index bf208a4d29..391d9bcd22 100644
--- a/src/runtime/asm_amd64.s
+++ b/src/runtime/asm_amd64.s
@@ -1093,11 +1093,6 @@ needm:
// there's no need to handle that. Clear R14 so that there's
// a bad value in there, in case needm tries to use it.
XORPS X15, X15
-#ifdef GOEXPERIMENT_simd
- CMPB internal∕cpu·X86+const_offsetX86HasAVX(SB), $1
- JNE 2(PC)
- VXORPS X15, X15, X15
-#endif
XORQ R14, R14
MOVQ $runtime·needAndBindM<ABIInternal>(SB), AX
CALL AX
@@ -1795,11 +1790,6 @@ TEXT ·sigpanic0(SB),NOSPLIT,$0-0
get_tls(R14)
MOVQ g(R14), R14
XORPS X15, X15
-#ifdef GOEXPERIMENT_simd
- CMPB internal∕cpu·X86+const_offsetX86HasAVX(SB), $1
- JNE 2(PC)
- VXORPS X15, X15, X15
-#endif
JMP ·sigpanic<ABIInternal>(SB)
// gcWriteBarrier informs the GC about heap pointer writes.