diff options
| author | Cherry Zhang <cherryyz@google.com> | 2021-01-29 13:46:34 -0500 |
|---|---|---|
| committer | Cherry Zhang <cherryyz@google.com> | 2021-02-03 22:44:53 +0000 |
| commit | 401d7e5a242f1007c2637a25111a6fa985728c08 (patch) | |
| tree | b2fdc485e38b26ed8e83386ba8467f363cf53ae6 /src/runtime | |
| parent | bfc7418e6d3a505fe348718fd113473c9d92b135 (diff) | |
| download | go-401d7e5a242f1007c2637a25111a6fa985728c08.tar.xz | |
[dev.regabi] cmd/compile: reserve X15 as zero register on AMD64
In ABIInternal, reserve X15 as constant zero, and use it to zero
memory. (Maybe there can be more use of it?)
The register is zeroed when transition to ABIInternal from ABI0.
Caveat: using X15 generates longer instructions than using X0.
Maybe we want to use X0?
Change-Id: I12d5ee92a01fc0b59dad4e5ab023ac71bc2a8b7d
Reviewed-on: https://go-review.googlesource.com/c/go/+/288093
Trust: Cherry Zhang <cherryyz@google.com>
Run-TryBot: Cherry Zhang <cherryyz@google.com>
TryBot-Result: Go Bot <gobot@golang.org>
Reviewed-by: David Chase <drchase@google.com>
Diffstat (limited to 'src/runtime')
| -rw-r--r-- | src/runtime/duff_amd64.s | 128 | ||||
| -rw-r--r-- | src/runtime/mkduff.go | 14 |
2 files changed, 71 insertions, 71 deletions
diff --git a/src/runtime/duff_amd64.s b/src/runtime/duff_amd64.s index 2ff5bf6dbc..df010f5853 100644 --- a/src/runtime/duff_amd64.s +++ b/src/runtime/duff_amd64.s @@ -5,100 +5,100 @@ #include "textflag.h" TEXT runtime·duffzero<ABIInternal>(SB), NOSPLIT, $0-0 - MOVUPS X0,(DI) - MOVUPS X0,16(DI) - MOVUPS X0,32(DI) - MOVUPS X0,48(DI) + MOVUPS X15,(DI) + MOVUPS X15,16(DI) + MOVUPS X15,32(DI) + MOVUPS X15,48(DI) LEAQ 64(DI),DI - MOVUPS X0,(DI) - MOVUPS X0,16(DI) - MOVUPS X0,32(DI) - MOVUPS X0,48(DI) + MOVUPS X15,(DI) + MOVUPS X15,16(DI) + MOVUPS X15,32(DI) + MOVUPS X15,48(DI) LEAQ 64(DI),DI - MOVUPS X0,(DI) - MOVUPS X0,16(DI) - MOVUPS X0,32(DI) - MOVUPS X0,48(DI) + MOVUPS X15,(DI) + MOVUPS X15,16(DI) + MOVUPS X15,32(DI) + MOVUPS X15,48(DI) LEAQ 64(DI),DI - MOVUPS X0,(DI) - MOVUPS X0,16(DI) - MOVUPS X0,32(DI) - MOVUPS X0,48(DI) + MOVUPS X15,(DI) + MOVUPS X15,16(DI) + MOVUPS X15,32(DI) + MOVUPS X15,48(DI) LEAQ 64(DI),DI - MOVUPS X0,(DI) - MOVUPS X0,16(DI) - MOVUPS X0,32(DI) - MOVUPS X0,48(DI) + MOVUPS X15,(DI) + MOVUPS X15,16(DI) + MOVUPS X15,32(DI) + MOVUPS X15,48(DI) LEAQ 64(DI),DI - MOVUPS X0,(DI) - MOVUPS X0,16(DI) - MOVUPS X0,32(DI) - MOVUPS X0,48(DI) + MOVUPS X15,(DI) + MOVUPS X15,16(DI) + MOVUPS X15,32(DI) + MOVUPS X15,48(DI) LEAQ 64(DI),DI - MOVUPS X0,(DI) - MOVUPS X0,16(DI) - MOVUPS X0,32(DI) - MOVUPS X0,48(DI) + MOVUPS X15,(DI) + MOVUPS X15,16(DI) + MOVUPS X15,32(DI) + MOVUPS X15,48(DI) LEAQ 64(DI),DI - MOVUPS X0,(DI) - MOVUPS X0,16(DI) - MOVUPS X0,32(DI) - MOVUPS X0,48(DI) + MOVUPS X15,(DI) + MOVUPS X15,16(DI) + MOVUPS X15,32(DI) + MOVUPS X15,48(DI) LEAQ 64(DI),DI - MOVUPS X0,(DI) - MOVUPS X0,16(DI) - MOVUPS X0,32(DI) - MOVUPS X0,48(DI) + MOVUPS X15,(DI) + MOVUPS X15,16(DI) + MOVUPS X15,32(DI) + MOVUPS X15,48(DI) LEAQ 64(DI),DI - MOVUPS X0,(DI) - MOVUPS X0,16(DI) - MOVUPS X0,32(DI) - MOVUPS X0,48(DI) + MOVUPS X15,(DI) + MOVUPS X15,16(DI) + MOVUPS X15,32(DI) + MOVUPS X15,48(DI) LEAQ 64(DI),DI - MOVUPS X0,(DI) - MOVUPS X0,16(DI) - MOVUPS X0,32(DI) - MOVUPS X0,48(DI) + MOVUPS X15,(DI) + MOVUPS X15,16(DI) + MOVUPS X15,32(DI) + MOVUPS X15,48(DI) LEAQ 64(DI),DI - MOVUPS X0,(DI) - MOVUPS X0,16(DI) - MOVUPS X0,32(DI) - MOVUPS X0,48(DI) + MOVUPS X15,(DI) + MOVUPS X15,16(DI) + MOVUPS X15,32(DI) + MOVUPS X15,48(DI) LEAQ 64(DI),DI - MOVUPS X0,(DI) - MOVUPS X0,16(DI) - MOVUPS X0,32(DI) - MOVUPS X0,48(DI) + MOVUPS X15,(DI) + MOVUPS X15,16(DI) + MOVUPS X15,32(DI) + MOVUPS X15,48(DI) LEAQ 64(DI),DI - MOVUPS X0,(DI) - MOVUPS X0,16(DI) - MOVUPS X0,32(DI) - MOVUPS X0,48(DI) + MOVUPS X15,(DI) + MOVUPS X15,16(DI) + MOVUPS X15,32(DI) + MOVUPS X15,48(DI) LEAQ 64(DI),DI - MOVUPS X0,(DI) - MOVUPS X0,16(DI) - MOVUPS X0,32(DI) - MOVUPS X0,48(DI) + MOVUPS X15,(DI) + MOVUPS X15,16(DI) + MOVUPS X15,32(DI) + MOVUPS X15,48(DI) LEAQ 64(DI),DI - MOVUPS X0,(DI) - MOVUPS X0,16(DI) - MOVUPS X0,32(DI) - MOVUPS X0,48(DI) + MOVUPS X15,(DI) + MOVUPS X15,16(DI) + MOVUPS X15,32(DI) + MOVUPS X15,48(DI) LEAQ 64(DI),DI RET diff --git a/src/runtime/mkduff.go b/src/runtime/mkduff.go index 94ae75fbfe..ef297f073e 100644 --- a/src/runtime/mkduff.go +++ b/src/runtime/mkduff.go @@ -62,15 +62,15 @@ func gen(arch string, tags, zero, copy func(io.Writer)) { func notags(w io.Writer) { fmt.Fprintln(w) } func zeroAMD64(w io.Writer) { - // X0: zero + // X15: zero // DI: ptr to memory to be zeroed // DI is updated as a side effect. - fmt.Fprintln(w, "TEXT runtime·duffzero(SB), NOSPLIT, $0-0") + fmt.Fprintln(w, "TEXT runtime·duffzero<ABIInternal>(SB), NOSPLIT, $0-0") for i := 0; i < 16; i++ { - fmt.Fprintln(w, "\tMOVUPS\tX0,(DI)") - fmt.Fprintln(w, "\tMOVUPS\tX0,16(DI)") - fmt.Fprintln(w, "\tMOVUPS\tX0,32(DI)") - fmt.Fprintln(w, "\tMOVUPS\tX0,48(DI)") + fmt.Fprintln(w, "\tMOVUPS\tX15,(DI)") + fmt.Fprintln(w, "\tMOVUPS\tX15,16(DI)") + fmt.Fprintln(w, "\tMOVUPS\tX15,32(DI)") + fmt.Fprintln(w, "\tMOVUPS\tX15,48(DI)") fmt.Fprintln(w, "\tLEAQ\t64(DI),DI") // We use lea instead of add, to avoid clobbering flags fmt.Fprintln(w) } @@ -84,7 +84,7 @@ func copyAMD64(w io.Writer) { // // This is equivalent to a sequence of MOVSQ but // for some reason that is 3.5x slower than this code. - fmt.Fprintln(w, "TEXT runtime·duffcopy(SB), NOSPLIT, $0-0") + fmt.Fprintln(w, "TEXT runtime·duffcopy<ABIInternal>(SB), NOSPLIT, $0-0") for i := 0; i < 64; i++ { fmt.Fprintln(w, "\tMOVUPS\t(SI), X0") fmt.Fprintln(w, "\tADDQ\t$16, SI") |
