diff options
Diffstat (limited to 'src/runtime')
| -rw-r--r-- | src/runtime/asm_amd64.s | 6 | ||||
| -rw-r--r-- | src/runtime/cpuflags.go | 3 | ||||
| -rw-r--r-- | src/runtime/cpuflags_amd64_test.go | 19 | ||||
| -rw-r--r-- | src/runtime/export_test.go | 2 | ||||
| -rw-r--r-- | src/runtime/mkpreempt.go | 2 | ||||
| -rw-r--r-- | src/runtime/os_darwin.go | 15 | ||||
| -rw-r--r-- | src/runtime/panic.go | 7 | ||||
| -rw-r--r-- | src/runtime/preempt_amd64.go | 16 | ||||
| -rw-r--r-- | src/runtime/preempt_amd64.s | 64 | ||||
| -rw-r--r-- | src/runtime/proc.go | 3 | ||||
| -rw-r--r-- | src/runtime/race_amd64.s | 3 | ||||
| -rw-r--r-- | src/runtime/sys_darwin_amd64.s | 3 | ||||
| -rw-r--r-- | src/runtime/sys_dragonfly_amd64.s | 3 | ||||
| -rw-r--r-- | src/runtime/sys_freebsd_amd64.s | 6 | ||||
| -rw-r--r-- | src/runtime/sys_linux_amd64.s | 6 | ||||
| -rw-r--r-- | src/runtime/sys_netbsd_amd64.s | 3 | ||||
| -rw-r--r-- | src/runtime/sys_openbsd_amd64.s | 3 | ||||
| -rw-r--r-- | src/runtime/sys_windows_amd64.s | 3 | ||||
| -rw-r--r-- | src/runtime/testdata/testprog/cpuflags_amd64.go | 18 | ||||
| -rw-r--r-- | src/runtime/testdata/testprog/cpuflags_amd64.s | 9 |
20 files changed, 173 insertions, 21 deletions
diff --git a/src/runtime/asm_amd64.s b/src/runtime/asm_amd64.s index f4244f6e06..7c746803a8 100644 --- a/src/runtime/asm_amd64.s +++ b/src/runtime/asm_amd64.s @@ -1049,6 +1049,9 @@ needm: // there's no need to handle that. Clear R14 so that there's // a bad value in there, in case needm tries to use it. XORPS X15, X15 + CMPB internal∕cpu·X86+const_offsetX86HasAVX(SB), $1 + JNE 2(PC) + VXORPS X15, X15, X15 XORQ R14, R14 MOVQ $runtime·needAndBindM<ABIInternal>(SB), AX CALL AX @@ -1746,6 +1749,9 @@ TEXT ·sigpanic0(SB),NOSPLIT,$0-0 get_tls(R14) MOVQ g(R14), R14 XORPS X15, X15 + CMPB internal∕cpu·X86+const_offsetX86HasAVX(SB), $1 + JNE 2(PC) + VXORPS X15, X15, X15 JMP ·sigpanic<ABIInternal>(SB) // gcWriteBarrier informs the GC about heap pointer writes. diff --git a/src/runtime/cpuflags.go b/src/runtime/cpuflags.go index 6452364b68..67ed081ef6 100644 --- a/src/runtime/cpuflags.go +++ b/src/runtime/cpuflags.go @@ -28,9 +28,10 @@ const ( var ( // Set in runtime.cpuinit. // TODO: deprecate these; use internal/cpu directly. + x86HasAVX bool + x86HasFMA bool x86HasPOPCNT bool x86HasSSE41 bool - x86HasFMA bool armHasVFPv4 bool diff --git a/src/runtime/cpuflags_amd64_test.go b/src/runtime/cpuflags_amd64_test.go new file mode 100644 index 0000000000..f238e7fdf2 --- /dev/null +++ b/src/runtime/cpuflags_amd64_test.go @@ -0,0 +1,19 @@ +// Copyright 2025 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package runtime_test + +import ( + "runtime" + "testing" +) + +func TestHasAVX(t *testing.T) { + t.Parallel() + output := runTestProg(t, "testprog", "CheckAVX") + ok := output == "OK\n" + if *runtime.X86HasAVX != ok { + t.Fatalf("x86HasAVX: %v, CheckAVX got:\n%s", *runtime.X86HasAVX, output) + } +} diff --git a/src/runtime/export_test.go b/src/runtime/export_test.go index 2db8add7e4..6e0360aaca 100644 --- a/src/runtime/export_test.go +++ b/src/runtime/export_test.go @@ -1978,6 +1978,8 @@ func TraceStack(gp *G, tab *TraceStackTable) { traceStack(0, gp, (*traceStackTable)(tab)) } +var X86HasAVX = &x86HasAVX + var DebugDecorateMappings = &debug.decoratemappings func SetVMANameSupported() bool { return setVMANameSupported() } diff --git a/src/runtime/mkpreempt.go b/src/runtime/mkpreempt.go index dc951cd9a9..ca1c6e4a0e 100644 --- a/src/runtime/mkpreempt.go +++ b/src/runtime/mkpreempt.go @@ -402,7 +402,7 @@ func genAMD64(g *gen) { // Create layouts for X, Y, and Z registers. const ( numXRegs = 16 - numZRegs = 16 // TODO: If we start using upper registers, change to 32 + numZRegs = 32 numKRegs = 8 ) lZRegs := layout{sp: xReg} // Non-GP registers diff --git a/src/runtime/os_darwin.go b/src/runtime/os_darwin.go index c908809c81..79cd2997c7 100644 --- a/src/runtime/os_darwin.go +++ b/src/runtime/os_darwin.go @@ -162,11 +162,22 @@ func sysctlbynameInt32(name []byte) (int32, int32) { return ret, out } -//go:linkname internal_cpu_getsysctlbyname internal/cpu.getsysctlbyname -func internal_cpu_getsysctlbyname(name []byte) (int32, int32) { +func sysctlbynameBytes(name, out []byte) int32 { + nout := uintptr(len(out)) + ret := sysctlbyname(&name[0], &out[0], &nout, nil, 0) + return ret +} + +//go:linkname internal_cpu_sysctlbynameInt32 internal/cpu.sysctlbynameInt32 +func internal_cpu_sysctlbynameInt32(name []byte) (int32, int32) { return sysctlbynameInt32(name) } +//go:linkname internal_cpu_sysctlbynameBytes internal/cpu.sysctlbynameBytes +func internal_cpu_sysctlbynameBytes(name, out []byte) int32 { + return sysctlbynameBytes(name, out) +} + const ( _CTL_HW = 6 _HW_NCPU = 3 diff --git a/src/runtime/panic.go b/src/runtime/panic.go index 89b0614f67..d467e9305d 100644 --- a/src/runtime/panic.go +++ b/src/runtime/panic.go @@ -341,6 +341,13 @@ func panicmemAddr(addr uintptr) { panic(errorAddressString{msg: "invalid memory address or nil pointer dereference", addr: addr}) } +var simdImmError = error(errorString("out-of-range immediate for simd intrinsic")) + +func panicSimdImm() { + panicCheck2("simd immediate error") + panic(simdImmError) +} + // Create a new deferred function fn, which has no arguments and results. // The compiler turns a defer statement into a call to this. func deferproc(fn func()) { diff --git a/src/runtime/preempt_amd64.go b/src/runtime/preempt_amd64.go index 88c0ddd34a..78dec40e1f 100644 --- a/src/runtime/preempt_amd64.go +++ b/src/runtime/preempt_amd64.go @@ -19,6 +19,22 @@ type xRegs struct { Z13 [64]byte Z14 [64]byte Z15 [64]byte + Z16 [64]byte + Z17 [64]byte + Z18 [64]byte + Z19 [64]byte + Z20 [64]byte + Z21 [64]byte + Z22 [64]byte + Z23 [64]byte + Z24 [64]byte + Z25 [64]byte + Z26 [64]byte + Z27 [64]byte + Z28 [64]byte + Z29 [64]byte + Z30 [64]byte + Z31 [64]byte K0 uint64 K1 uint64 K2 uint64 diff --git a/src/runtime/preempt_amd64.s b/src/runtime/preempt_amd64.s index c35de7f3b7..a5b949a242 100644 --- a/src/runtime/preempt_amd64.s +++ b/src/runtime/preempt_amd64.s @@ -95,14 +95,30 @@ saveAVX512: VMOVDQU64 Z13, 832(AX) VMOVDQU64 Z14, 896(AX) VMOVDQU64 Z15, 960(AX) - KMOVQ K0, 1024(AX) - KMOVQ K1, 1032(AX) - KMOVQ K2, 1040(AX) - KMOVQ K3, 1048(AX) - KMOVQ K4, 1056(AX) - KMOVQ K5, 1064(AX) - KMOVQ K6, 1072(AX) - KMOVQ K7, 1080(AX) + VMOVDQU64 Z16, 1024(AX) + VMOVDQU64 Z17, 1088(AX) + VMOVDQU64 Z18, 1152(AX) + VMOVDQU64 Z19, 1216(AX) + VMOVDQU64 Z20, 1280(AX) + VMOVDQU64 Z21, 1344(AX) + VMOVDQU64 Z22, 1408(AX) + VMOVDQU64 Z23, 1472(AX) + VMOVDQU64 Z24, 1536(AX) + VMOVDQU64 Z25, 1600(AX) + VMOVDQU64 Z26, 1664(AX) + VMOVDQU64 Z27, 1728(AX) + VMOVDQU64 Z28, 1792(AX) + VMOVDQU64 Z29, 1856(AX) + VMOVDQU64 Z30, 1920(AX) + VMOVDQU64 Z31, 1984(AX) + KMOVQ K0, 2048(AX) + KMOVQ K1, 2056(AX) + KMOVQ K2, 2064(AX) + KMOVQ K3, 2072(AX) + KMOVQ K4, 2080(AX) + KMOVQ K5, 2088(AX) + KMOVQ K6, 2096(AX) + KMOVQ K7, 2104(AX) JMP preempt preempt: CALL ·asyncPreempt2(SB) @@ -153,14 +169,30 @@ restoreAVX2: VMOVDQU 0(AX), Y0 JMP restoreGPs restoreAVX512: - KMOVQ 1080(AX), K7 - KMOVQ 1072(AX), K6 - KMOVQ 1064(AX), K5 - KMOVQ 1056(AX), K4 - KMOVQ 1048(AX), K3 - KMOVQ 1040(AX), K2 - KMOVQ 1032(AX), K1 - KMOVQ 1024(AX), K0 + KMOVQ 2104(AX), K7 + KMOVQ 2096(AX), K6 + KMOVQ 2088(AX), K5 + KMOVQ 2080(AX), K4 + KMOVQ 2072(AX), K3 + KMOVQ 2064(AX), K2 + KMOVQ 2056(AX), K1 + KMOVQ 2048(AX), K0 + VMOVDQU64 1984(AX), Z31 + VMOVDQU64 1920(AX), Z30 + VMOVDQU64 1856(AX), Z29 + VMOVDQU64 1792(AX), Z28 + VMOVDQU64 1728(AX), Z27 + VMOVDQU64 1664(AX), Z26 + VMOVDQU64 1600(AX), Z25 + VMOVDQU64 1536(AX), Z24 + VMOVDQU64 1472(AX), Z23 + VMOVDQU64 1408(AX), Z22 + VMOVDQU64 1344(AX), Z21 + VMOVDQU64 1280(AX), Z20 + VMOVDQU64 1216(AX), Z19 + VMOVDQU64 1152(AX), Z18 + VMOVDQU64 1088(AX), Z17 + VMOVDQU64 1024(AX), Z16 VMOVDQU64 960(AX), Z15 VMOVDQU64 896(AX), Z14 VMOVDQU64 832(AX), Z13 diff --git a/src/runtime/proc.go b/src/runtime/proc.go index 62e79e74e2..58fb4bd681 100644 --- a/src/runtime/proc.go +++ b/src/runtime/proc.go @@ -763,9 +763,10 @@ func cpuinit(env string) { // to guard execution of instructions that can not be assumed to be always supported. switch GOARCH { case "386", "amd64": + x86HasAVX = cpu.X86.HasAVX + x86HasFMA = cpu.X86.HasFMA x86HasPOPCNT = cpu.X86.HasPOPCNT x86HasSSE41 = cpu.X86.HasSSE41 - x86HasFMA = cpu.X86.HasFMA case "arm": armHasVFPv4 = cpu.ARM.HasVFPv4 diff --git a/src/runtime/race_amd64.s b/src/runtime/race_amd64.s index e19118bd54..23f2e59e3d 100644 --- a/src/runtime/race_amd64.s +++ b/src/runtime/race_amd64.s @@ -456,6 +456,9 @@ call: // Back to Go world, set special registers. // The g register (R14) is preserved in C. XORPS X15, X15 + CMPB internal∕cpu·X86+const_offsetX86HasAVX(SB), $1 + JNE 2(PC) + VXORPS X15, X15, X15 RET // C->Go callback thunk that allows to call runtime·racesymbolize from C code. diff --git a/src/runtime/sys_darwin_amd64.s b/src/runtime/sys_darwin_amd64.s index 99d67a9cfd..e4e1216d56 100644 --- a/src/runtime/sys_darwin_amd64.s +++ b/src/runtime/sys_darwin_amd64.s @@ -177,6 +177,9 @@ TEXT runtime·sigtramp(SB),NOSPLIT|TOPFRAME|NOFRAME,$0 get_tls(R12) MOVQ g(R12), R14 PXOR X15, X15 + CMPB internal∕cpu·X86+const_offsetX86HasAVX(SB), $1 + JNE 2(PC) + VXORPS X15, X15, X15 // Reserve space for spill slots. NOP SP // disable vet stack checking diff --git a/src/runtime/sys_dragonfly_amd64.s b/src/runtime/sys_dragonfly_amd64.s index a223c2cf76..84bf326aad 100644 --- a/src/runtime/sys_dragonfly_amd64.s +++ b/src/runtime/sys_dragonfly_amd64.s @@ -228,6 +228,9 @@ TEXT runtime·sigtramp(SB),NOSPLIT|TOPFRAME|NOFRAME,$0 get_tls(R12) MOVQ g(R12), R14 PXOR X15, X15 + CMPB internal∕cpu·X86+const_offsetX86HasAVX(SB), $1 + JNE 2(PC) + VXORPS X15, X15, X15 // Reserve space for spill slots. NOP SP // disable vet stack checking diff --git a/src/runtime/sys_freebsd_amd64.s b/src/runtime/sys_freebsd_amd64.s index 977ea093d2..a1fa3a6fa2 100644 --- a/src/runtime/sys_freebsd_amd64.s +++ b/src/runtime/sys_freebsd_amd64.s @@ -265,6 +265,9 @@ TEXT runtime·sigtramp(SB),NOSPLIT|TOPFRAME|NOFRAME,$0 get_tls(R12) MOVQ g(R12), R14 PXOR X15, X15 + CMPB internal∕cpu·X86+const_offsetX86HasAVX(SB), $1 + JNE 2(PC) + VXORPS X15, X15, X15 // Reserve space for spill slots. NOP SP // disable vet stack checking @@ -290,6 +293,9 @@ TEXT runtime·sigprofNonGoWrapper<>(SB),NOSPLIT|NOFRAME,$0 get_tls(R12) MOVQ g(R12), R14 PXOR X15, X15 + CMPB internal∕cpu·X86+const_offsetX86HasAVX(SB), $1 + JNE 2(PC) + VXORPS X15, X15, X15 // Reserve space for spill slots. NOP SP // disable vet stack checking diff --git a/src/runtime/sys_linux_amd64.s b/src/runtime/sys_linux_amd64.s index 941f70b0e8..02505c2fb0 100644 --- a/src/runtime/sys_linux_amd64.s +++ b/src/runtime/sys_linux_amd64.s @@ -340,6 +340,9 @@ TEXT runtime·sigtramp(SB),NOSPLIT|TOPFRAME|NOFRAME,$0 get_tls(R12) MOVQ g(R12), R14 PXOR X15, X15 + CMPB internal∕cpu·X86+const_offsetX86HasAVX(SB), $1 + JNE 2(PC) + VXORPS X15, X15, X15 // Reserve space for spill slots. NOP SP // disable vet stack checking @@ -365,6 +368,9 @@ TEXT runtime·sigprofNonGoWrapper<>(SB),NOSPLIT|NOFRAME,$0 get_tls(R12) MOVQ g(R12), R14 PXOR X15, X15 + CMPB internal∕cpu·X86+const_offsetX86HasAVX(SB), $1 + JNE 2(PC) + VXORPS X15, X15, X15 // Reserve space for spill slots. NOP SP // disable vet stack checking diff --git a/src/runtime/sys_netbsd_amd64.s b/src/runtime/sys_netbsd_amd64.s index 2f1ddcdc89..edc7f3d6ee 100644 --- a/src/runtime/sys_netbsd_amd64.s +++ b/src/runtime/sys_netbsd_amd64.s @@ -310,6 +310,9 @@ TEXT runtime·sigtramp(SB),NOSPLIT|TOPFRAME|NOFRAME,$0 get_tls(R12) MOVQ g(R12), R14 PXOR X15, X15 + CMPB internal∕cpu·X86+const_offsetX86HasAVX(SB), $1 + JNE 2(PC) + VXORPS X15, X15, X15 // Reserve space for spill slots. NOP SP // disable vet stack checking diff --git a/src/runtime/sys_openbsd_amd64.s b/src/runtime/sys_openbsd_amd64.s index ff0bc2416a..734dfe6478 100644 --- a/src/runtime/sys_openbsd_amd64.s +++ b/src/runtime/sys_openbsd_amd64.s @@ -64,6 +64,9 @@ TEXT runtime·sigtramp(SB),NOSPLIT|TOPFRAME|NOFRAME,$0 get_tls(R12) MOVQ g(R12), R14 PXOR X15, X15 + CMPB internal∕cpu·X86+const_offsetX86HasAVX(SB), $1 + JNE 2(PC) + VXORPS X15, X15, X15 // Reserve space for spill slots. NOP SP // disable vet stack checking diff --git a/src/runtime/sys_windows_amd64.s b/src/runtime/sys_windows_amd64.s index e438599910..b0b4d3cce6 100644 --- a/src/runtime/sys_windows_amd64.s +++ b/src/runtime/sys_windows_amd64.s @@ -32,6 +32,9 @@ TEXT sigtramp<>(SB),NOSPLIT,$0-0 // R14 is cleared in case there's a non-zero value in there // if called from a non-go thread. XORPS X15, X15 + CMPB internal∕cpu·X86+const_offsetX86HasAVX(SB), $1 + JNE 2(PC) + VXORPS X15, X15, X15 XORQ R14, R14 get_tls(AX) diff --git a/src/runtime/testdata/testprog/cpuflags_amd64.go b/src/runtime/testdata/testprog/cpuflags_amd64.go new file mode 100644 index 0000000000..d53eacbe99 --- /dev/null +++ b/src/runtime/testdata/testprog/cpuflags_amd64.go @@ -0,0 +1,18 @@ +// Copyright 2025 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package main + +import "fmt" + +func init() { + register("CheckAVX", CheckAVX) +} + +func CheckAVX() { + checkAVX() + fmt.Println("OK") +} + +func checkAVX() diff --git a/src/runtime/testdata/testprog/cpuflags_amd64.s b/src/runtime/testdata/testprog/cpuflags_amd64.s new file mode 100644 index 0000000000..1610c5729a --- /dev/null +++ b/src/runtime/testdata/testprog/cpuflags_amd64.s @@ -0,0 +1,9 @@ +// Copyright 2025 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "textflag.h" + +TEXT ·checkAVX(SB), NOSPLIT|NOFRAME, $0-0 + VXORPS X1, X2, X3 + RET |
