From 4c311aa38f6e354ec4d9f5882a16c36a2e4b0f36 Mon Sep 17 00:00:00 2001 From: Cherry Mui Date: Thu, 21 Aug 2025 14:37:18 -0400 Subject: [dev.simd] cmd/compile: ensure the whole X15 register is zeroed On AMD64, we reserve the X15 register as the zero register. Currently we use an SSE instruction to zero it, and we only use it in SSE contexts. When the machine supports AVX, the high bits of the register is not necessarily zeroed. Now that the compiler generates AVX code for SIMD, it would be great to have a zero register in the AVX context. This CL zeroes the whole X15 register if AVX is supported. Change-Id: I4dc803362f2e007b1614b90de435fbb7814cebc7 Reviewed-on: https://go-review.googlesource.com/c/go/+/698237 LUCI-TryBot-Result: Go LUCI Reviewed-by: Junyang Shao Reviewed-by: David Chase --- src/runtime/sys_linux_amd64.s | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'src/runtime/sys_linux_amd64.s') diff --git a/src/runtime/sys_linux_amd64.s b/src/runtime/sys_linux_amd64.s index 941f70b0e8..02505c2fb0 100644 --- a/src/runtime/sys_linux_amd64.s +++ b/src/runtime/sys_linux_amd64.s @@ -340,6 +340,9 @@ TEXT runtime·sigtramp(SB),NOSPLIT|TOPFRAME|NOFRAME,$0 get_tls(R12) MOVQ g(R12), R14 PXOR X15, X15 + CMPB internal∕cpu·X86+const_offsetX86HasAVX(SB), $1 + JNE 2(PC) + VXORPS X15, X15, X15 // Reserve space for spill slots. NOP SP // disable vet stack checking @@ -365,6 +368,9 @@ TEXT runtime·sigprofNonGoWrapper<>(SB),NOSPLIT|NOFRAME,$0 get_tls(R12) MOVQ g(R12), R14 PXOR X15, X15 + CMPB internal∕cpu·X86+const_offsetX86HasAVX(SB), $1 + JNE 2(PC) + VXORPS X15, X15, X15 // Reserve space for spill slots. NOP SP // disable vet stack checking -- cgit v1.3