aboutsummaryrefslogtreecommitdiff
path: root/src/runtime
diff options
context:
space:
mode:
authorJunyang Shao <shaojunyang@google.com>2025-07-08 19:24:30 +0000
committerJunyang Shao <shaojunyang@google.com>2025-07-09 11:06:24 -0700
commit574854fd863377a9467625c45ec842fd7d5fc341 (patch)
tree6245b8d99183955d7371ea1f906461e3f12b4d4b /src/runtime
parent5429328b0cc6a6749c37a7a91ecee8b8eb644c2a (diff)
downloadgo-574854fd863377a9467625c45ec842fd7d5fc341.tar.xz
[dev.simd] runtime: save Z16-Z31 registers in async preempt
The register allocation will use the upper register soon, this CL is to enable that. Change-Id: I4d7285e08b95f4e6ebee72594dfbe8d1199f09ed Reviewed-on: https://go-review.googlesource.com/c/go/+/686498 TryBot-Bypass: David Chase <drchase@google.com> Reviewed-by: Cherry Mui <cherryyz@google.com> Commit-Queue: David Chase <drchase@google.com>
Diffstat (limited to 'src/runtime')
-rw-r--r--src/runtime/mkpreempt.go2
-rw-r--r--src/runtime/preempt_amd64.go16
-rw-r--r--src/runtime/preempt_amd64.s64
3 files changed, 65 insertions, 17 deletions
diff --git a/src/runtime/mkpreempt.go b/src/runtime/mkpreempt.go
index 2bd2ef07fa..7786f342b5 100644
--- a/src/runtime/mkpreempt.go
+++ b/src/runtime/mkpreempt.go
@@ -300,7 +300,7 @@ func genAMD64(g *gen) {
// Create layouts for X, Y, and Z registers.
const (
numXRegs = 16
- numZRegs = 16 // TODO: If we start using upper registers, change to 32
+ numZRegs = 32
numKRegs = 8
)
lZRegs := layout{sp: xReg} // Non-GP registers
diff --git a/src/runtime/preempt_amd64.go b/src/runtime/preempt_amd64.go
index 88c0ddd34a..78dec40e1f 100644
--- a/src/runtime/preempt_amd64.go
+++ b/src/runtime/preempt_amd64.go
@@ -19,6 +19,22 @@ type xRegs struct {
Z13 [64]byte
Z14 [64]byte
Z15 [64]byte
+ Z16 [64]byte
+ Z17 [64]byte
+ Z18 [64]byte
+ Z19 [64]byte
+ Z20 [64]byte
+ Z21 [64]byte
+ Z22 [64]byte
+ Z23 [64]byte
+ Z24 [64]byte
+ Z25 [64]byte
+ Z26 [64]byte
+ Z27 [64]byte
+ Z28 [64]byte
+ Z29 [64]byte
+ Z30 [64]byte
+ Z31 [64]byte
K0 uint64
K1 uint64
K2 uint64
diff --git a/src/runtime/preempt_amd64.s b/src/runtime/preempt_amd64.s
index c35de7f3b7..a5b949a242 100644
--- a/src/runtime/preempt_amd64.s
+++ b/src/runtime/preempt_amd64.s
@@ -95,14 +95,30 @@ saveAVX512:
VMOVDQU64 Z13, 832(AX)
VMOVDQU64 Z14, 896(AX)
VMOVDQU64 Z15, 960(AX)
- KMOVQ K0, 1024(AX)
- KMOVQ K1, 1032(AX)
- KMOVQ K2, 1040(AX)
- KMOVQ K3, 1048(AX)
- KMOVQ K4, 1056(AX)
- KMOVQ K5, 1064(AX)
- KMOVQ K6, 1072(AX)
- KMOVQ K7, 1080(AX)
+ VMOVDQU64 Z16, 1024(AX)
+ VMOVDQU64 Z17, 1088(AX)
+ VMOVDQU64 Z18, 1152(AX)
+ VMOVDQU64 Z19, 1216(AX)
+ VMOVDQU64 Z20, 1280(AX)
+ VMOVDQU64 Z21, 1344(AX)
+ VMOVDQU64 Z22, 1408(AX)
+ VMOVDQU64 Z23, 1472(AX)
+ VMOVDQU64 Z24, 1536(AX)
+ VMOVDQU64 Z25, 1600(AX)
+ VMOVDQU64 Z26, 1664(AX)
+ VMOVDQU64 Z27, 1728(AX)
+ VMOVDQU64 Z28, 1792(AX)
+ VMOVDQU64 Z29, 1856(AX)
+ VMOVDQU64 Z30, 1920(AX)
+ VMOVDQU64 Z31, 1984(AX)
+ KMOVQ K0, 2048(AX)
+ KMOVQ K1, 2056(AX)
+ KMOVQ K2, 2064(AX)
+ KMOVQ K3, 2072(AX)
+ KMOVQ K4, 2080(AX)
+ KMOVQ K5, 2088(AX)
+ KMOVQ K6, 2096(AX)
+ KMOVQ K7, 2104(AX)
JMP preempt
preempt:
CALL ·asyncPreempt2(SB)
@@ -153,14 +169,30 @@ restoreAVX2:
VMOVDQU 0(AX), Y0
JMP restoreGPs
restoreAVX512:
- KMOVQ 1080(AX), K7
- KMOVQ 1072(AX), K6
- KMOVQ 1064(AX), K5
- KMOVQ 1056(AX), K4
- KMOVQ 1048(AX), K3
- KMOVQ 1040(AX), K2
- KMOVQ 1032(AX), K1
- KMOVQ 1024(AX), K0
+ KMOVQ 2104(AX), K7
+ KMOVQ 2096(AX), K6
+ KMOVQ 2088(AX), K5
+ KMOVQ 2080(AX), K4
+ KMOVQ 2072(AX), K3
+ KMOVQ 2064(AX), K2
+ KMOVQ 2056(AX), K1
+ KMOVQ 2048(AX), K0
+ VMOVDQU64 1984(AX), Z31
+ VMOVDQU64 1920(AX), Z30
+ VMOVDQU64 1856(AX), Z29
+ VMOVDQU64 1792(AX), Z28
+ VMOVDQU64 1728(AX), Z27
+ VMOVDQU64 1664(AX), Z26
+ VMOVDQU64 1600(AX), Z25
+ VMOVDQU64 1536(AX), Z24
+ VMOVDQU64 1472(AX), Z23
+ VMOVDQU64 1408(AX), Z22
+ VMOVDQU64 1344(AX), Z21
+ VMOVDQU64 1280(AX), Z20
+ VMOVDQU64 1216(AX), Z19
+ VMOVDQU64 1152(AX), Z18
+ VMOVDQU64 1088(AX), Z17
+ VMOVDQU64 1024(AX), Z16
VMOVDQU64 960(AX), Z15
VMOVDQU64 896(AX), Z14
VMOVDQU64 832(AX), Z13