diff options
| author | Meng Zhuo <mengzhuo1203@gmail.com> | 2018-03-06 02:47:09 +0000 |
|---|---|---|
| committer | Brad Fitzpatrick <bradfitz@golang.org> | 2018-03-06 04:14:59 +0000 |
| commit | 8916773a3d6faabed5d500f8bf0d89b7048aecde (patch) | |
| tree | 86645b95e3274cc6a29f46be0c3e5490877c5d53 /src/runtime | |
| parent | baf3eb1625d64d42d7eacd7d29ad6d1e682553a0 (diff) | |
| download | go-8916773a3d6faabed5d500f8bf0d89b7048aecde.tar.xz | |
runtime, cmd/compile: use ldp for DUFFCOPY on ARM64
name old time/op new time/op delta
CopyFat8 2.15ns ± 1% 2.19ns ± 6% ~ (p=0.171 n=8+9)
CopyFat12 2.15ns ± 0% 2.17ns ± 2% ~ (p=0.137 n=8+10)
CopyFat16 2.17ns ± 3% 2.15ns ± 0% ~ (p=0.211 n=10+10)
CopyFat24 2.16ns ± 1% 2.15ns ± 0% ~ (p=0.087 n=10+10)
CopyFat32 11.5ns ± 0% 12.8ns ± 2% +10.87% (p=0.000 n=8+10)
CopyFat64 20.2ns ± 2% 12.9ns ± 0% -36.11% (p=0.000 n=10+10)
CopyFat128 37.2ns ± 0% 21.5ns ± 0% -42.20% (p=0.000 n=10+10)
CopyFat256 71.6ns ± 0% 38.7ns ± 0% -45.95% (p=0.000 n=10+10)
CopyFat512 140ns ± 0% 73ns ± 0% -47.86% (p=0.000 n=10+9)
CopyFat520 142ns ± 0% 74ns ± 0% -47.54% (p=0.000 n=10+10)
CopyFat1024 277ns ± 0% 141ns ± 0% -49.10% (p=0.000 n=10+10)
Change-Id: If54bc571add5db674d5e081579c87e80153d0a5a
Reviewed-on: https://go-review.googlesource.com/97395
Reviewed-by: Cherry Zhang <cherryyz@google.com>
Diffstat (limited to 'src/runtime')
| -rw-r--r-- | src/runtime/duff_arm64.s | 450 | ||||
| -rw-r--r-- | src/runtime/memmove_test.go | 7 | ||||
| -rw-r--r-- | src/runtime/mkduff.go | 11 |
3 files changed, 142 insertions, 326 deletions
diff --git a/src/runtime/duff_arm64.s b/src/runtime/duff_arm64.s index 54e6b9967e..3739c3945a 100644 --- a/src/runtime/duff_arm64.s +++ b/src/runtime/duff_arm64.s @@ -71,389 +71,197 @@ TEXT runtime·duffzero(SB), NOSPLIT|NOFRAME, $0-0 STP (ZR, ZR), (R16) RET -TEXT runtime·duffcopy(SB), NOSPLIT, $0-0 - MOVD.P 8(R16), R27 - MOVD.P R27, 8(R17) +TEXT runtime·duffcopy(SB), NOSPLIT|NOFRAME, $0-0 + LDP.P 16(R16), (R26, R27) + STP.P (R26, R27), 16(R17) - MOVD.P 8(R16), R27 - MOVD.P R27, 8(R17) + LDP.P 16(R16), (R26, R27) + STP.P (R26, R27), 16(R17) - MOVD.P 8(R16), R27 - MOVD.P R27, 8(R17) + LDP.P 16(R16), (R26, R27) + STP.P (R26, R27), 16(R17) - MOVD.P 8(R16), R27 - MOVD.P R27, 8(R17) + LDP.P 16(R16), (R26, R27) + STP.P (R26, R27), 16(R17) - MOVD.P 8(R16), R27 - MOVD.P R27, 8(R17) + LDP.P 16(R16), (R26, R27) + STP.P (R26, R27), 16(R17) - MOVD.P 8(R16), R27 - MOVD.P R27, 8(R17) + LDP.P 16(R16), (R26, R27) + STP.P (R26, R27), 16(R17) - MOVD.P 8(R16), R27 - MOVD.P R27, 8(R17) + LDP.P 16(R16), (R26, R27) + STP.P (R26, R27), 16(R17) - MOVD.P 8(R16), R27 - MOVD.P R27, 8(R17) + LDP.P 16(R16), (R26, R27) + STP.P (R26, R27), 16(R17) - MOVD.P 8(R16), R27 - MOVD.P R27, 8(R17) + LDP.P 16(R16), (R26, R27) + STP.P (R26, R27), 16(R17) - MOVD.P 8(R16), R27 - MOVD.P R27, 8(R17) + LDP.P 16(R16), (R26, R27) + STP.P (R26, R27), 16(R17) - MOVD.P 8(R16), R27 - MOVD.P R27, 8(R17) + LDP.P 16(R16), (R26, R27) + STP.P (R26, R27), 16(R17) - MOVD.P 8(R16), R27 - MOVD.P R27, 8(R17) + LDP.P 16(R16), (R26, R27) + STP.P (R26, R27), 16(R17) - MOVD.P 8(R16), R27 - MOVD.P R27, 8(R17) + LDP.P 16(R16), (R26, R27) + STP.P (R26, R27), 16(R17) - MOVD.P 8(R16), R27 - MOVD.P R27, 8(R17) + LDP.P 16(R16), (R26, R27) + STP.P (R26, R27), 16(R17) - MOVD.P 8(R16), R27 - MOVD.P R27, 8(R17) + LDP.P 16(R16), (R26, R27) + STP.P (R26, R27), 16(R17) - MOVD.P 8(R16), R27 - MOVD.P R27, 8(R17) + LDP.P 16(R16), (R26, R27) + STP.P (R26, R27), 16(R17) - MOVD.P 8(R16), R27 - MOVD.P R27, 8(R17) + LDP.P 16(R16), (R26, R27) + STP.P (R26, R27), 16(R17) - MOVD.P 8(R16), R27 - MOVD.P R27, 8(R17) + LDP.P 16(R16), (R26, R27) + STP.P (R26, R27), 16(R17) - MOVD.P 8(R16), R27 - MOVD.P R27, 8(R17) + LDP.P 16(R16), (R26, R27) + STP.P (R26, R27), 16(R17) - MOVD.P 8(R16), R27 - MOVD.P R27, 8(R17) + LDP.P 16(R16), (R26, R27) + STP.P (R26, R27), 16(R17) - MOVD.P 8(R16), R27 - MOVD.P R27, 8(R17) + LDP.P 16(R16), (R26, R27) + STP.P (R26, R27), 16(R17) - MOVD.P 8(R16), R27 - MOVD.P R27, 8(R17) + LDP.P 16(R16), (R26, R27) + STP.P (R26, R27), 16(R17) - MOVD.P 8(R16), R27 - MOVD.P R27, 8(R17) + LDP.P 16(R16), (R26, R27) + STP.P (R26, R27), 16(R17) - MOVD.P 8(R16), R27 - MOVD.P R27, 8(R17) + LDP.P 16(R16), (R26, R27) + STP.P (R26, R27), 16(R17) - MOVD.P 8(R16), R27 - MOVD.P R27, 8(R17) + LDP.P 16(R16), (R26, R27) + STP.P (R26, R27), 16(R17) - MOVD.P 8(R16), R27 - MOVD.P R27, 8(R17) + LDP.P 16(R16), (R26, R27) + STP.P (R26, R27), 16(R17) - MOVD.P 8(R16), R27 - MOVD.P R27, 8(R17) + LDP.P 16(R16), (R26, R27) + STP.P (R26, R27), 16(R17) - MOVD.P 8(R16), R27 - MOVD.P R27, 8(R17) + LDP.P 16(R16), (R26, R27) + STP.P (R26, R27), 16(R17) - MOVD.P 8(R16), R27 - MOVD.P R27, 8(R17) + LDP.P 16(R16), (R26, R27) + STP.P (R26, R27), 16(R17) - MOVD.P 8(R16), R27 - MOVD.P R27, 8(R17) + LDP.P 16(R16), (R26, R27) + STP.P (R26, R27), 16(R17) - MOVD.P 8(R16), R27 - MOVD.P R27, 8(R17) + LDP.P 16(R16), (R26, R27) + STP.P (R26, R27), 16(R17) - MOVD.P 8(R16), R27 - MOVD.P R27, 8(R17) + LDP.P 16(R16), (R26, R27) + STP.P (R26, R27), 16(R17) - MOVD.P 8(R16), R27 - MOVD.P R27, 8(R17) + LDP.P 16(R16), (R26, R27) + STP.P (R26, R27), 16(R17) - MOVD.P 8(R16), R27 - MOVD.P R27, 8(R17) + LDP.P 16(R16), (R26, R27) + STP.P (R26, R27), 16(R17) - MOVD.P 8(R16), R27 - MOVD.P R27, 8(R17) + LDP.P 16(R16), (R26, R27) + STP.P (R26, R27), 16(R17) - MOVD.P 8(R16), R27 - MOVD.P R27, 8(R17) + LDP.P 16(R16), (R26, R27) + STP.P (R26, R27), 16(R17) - MOVD.P 8(R16), R27 - MOVD.P R27, 8(R17) + LDP.P 16(R16), (R26, R27) + STP.P (R26, R27), 16(R17) - MOVD.P 8(R16), R27 - MOVD.P R27, 8(R17) + LDP.P 16(R16), (R26, R27) + STP.P (R26, R27), 16(R17) - MOVD.P 8(R16), R27 - MOVD.P R27, 8(R17) + LDP.P 16(R16), (R26, R27) + STP.P (R26, R27), 16(R17) - MOVD.P 8(R16), R27 - MOVD.P R27, 8(R17) + LDP.P 16(R16), (R26, R27) + STP.P (R26, R27), 16(R17) - MOVD.P 8(R16), R27 - MOVD.P R27, 8(R17) + LDP.P 16(R16), (R26, R27) + STP.P (R26, R27), 16(R17) - MOVD.P 8(R16), R27 - MOVD.P R27, 8(R17) + LDP.P 16(R16), (R26, R27) + STP.P (R26, R27), 16(R17) - MOVD.P 8(R16), R27 - MOVD.P R27, 8(R17) + LDP.P 16(R16), (R26, R27) + STP.P (R26, R27), 16(R17) - MOVD.P 8(R16), R27 - MOVD.P R27, 8(R17) + LDP.P 16(R16), (R26, R27) + STP.P (R26, R27), 16(R17) - MOVD.P 8(R16), R27 - MOVD.P R27, 8(R17) + LDP.P 16(R16), (R26, R27) + STP.P (R26, R27), 16(R17) - MOVD.P 8(R16), R27 - MOVD.P R27, 8(R17) + LDP.P 16(R16), (R26, R27) + STP.P (R26, R27), 16(R17) - MOVD.P 8(R16), R27 - MOVD.P R27, 8(R17) + LDP.P 16(R16), (R26, R27) + STP.P (R26, R27), 16(R17) - MOVD.P 8(R16), R27 - MOVD.P R27, 8(R17) + LDP.P 16(R16), (R26, R27) + STP.P (R26, R27), 16(R17) - MOVD.P 8(R16), R27 - MOVD.P R27, 8(R17) + LDP.P 16(R16), (R26, R27) + STP.P (R26, R27), 16(R17) - MOVD.P 8(R16), R27 - MOVD.P R27, 8(R17) + LDP.P 16(R16), (R26, R27) + STP.P (R26, R27), 16(R17) - MOVD.P 8(R16), R27 - MOVD.P R27, 8(R17) + LDP.P 16(R16), (R26, R27) + STP.P (R26, R27), 16(R17) - MOVD.P 8(R16), R27 - MOVD.P R27, 8(R17) + LDP.P 16(R16), (R26, R27) + STP.P (R26, R27), 16(R17) - MOVD.P 8(R16), R27 - MOVD.P R27, 8(R17) + LDP.P 16(R16), (R26, R27) + STP.P (R26, R27), 16(R17) - MOVD.P 8(R16), R27 - MOVD.P R27, 8(R17) + LDP.P 16(R16), (R26, R27) + STP.P (R26, R27), 16(R17) - MOVD.P 8(R16), R27 - MOVD.P R27, 8(R17) + LDP.P 16(R16), (R26, R27) + STP.P (R26, R27), 16(R17) - MOVD.P 8(R16), R27 - MOVD.P R27, 8(R17) + LDP.P 16(R16), (R26, R27) + STP.P (R26, R27), 16(R17) - MOVD.P 8(R16), R27 - MOVD.P R27, 8(R17) + LDP.P 16(R16), (R26, R27) + STP.P (R26, R27), 16(R17) - MOVD.P 8(R16), R27 - MOVD.P R27, 8(R17) + LDP.P 16(R16), (R26, R27) + STP.P (R26, R27), 16(R17) - MOVD.P 8(R16), R27 - MOVD.P R27, 8(R17) + LDP.P 16(R16), (R26, R27) + STP.P (R26, R27), 16(R17) - MOVD.P 8(R16), R27 - MOVD.P R27, 8(R17) + LDP.P 16(R16), (R26, R27) + STP.P (R26, R27), 16(R17) - MOVD.P 8(R16), R27 - MOVD.P R27, 8(R17) + LDP.P 16(R16), (R26, R27) + STP.P (R26, R27), 16(R17) - MOVD.P 8(R16), R27 - MOVD.P R27, 8(R17) + LDP.P 16(R16), (R26, R27) + STP.P (R26, R27), 16(R17) - MOVD.P 8(R16), R27 - MOVD.P R27, 8(R17) + LDP.P 16(R16), (R26, R27) + STP.P (R26, R27), 16(R17) - MOVD.P 8(R16), R27 - MOVD.P R27, 8(R17) - - MOVD.P 8(R16), R27 - MOVD.P R27, 8(R17) - - MOVD.P 8(R16), R27 - MOVD.P R27, 8(R17) - - MOVD.P 8(R16), R27 - MOVD.P R27, 8(R17) - - MOVD.P 8(R16), R27 - MOVD.P R27, 8(R17) - - MOVD.P 8(R16), R27 - MOVD.P R27, 8(R17) - - MOVD.P 8(R16), R27 - MOVD.P R27, 8(R17) - - MOVD.P 8(R16), R27 - MOVD.P R27, 8(R17) - - MOVD.P 8(R16), R27 - MOVD.P R27, 8(R17) - - MOVD.P 8(R16), R27 - MOVD.P R27, 8(R17) - - MOVD.P 8(R16), R27 - MOVD.P R27, 8(R17) - - MOVD.P 8(R16), R27 - MOVD.P R27, 8(R17) - - MOVD.P 8(R16), R27 - MOVD.P R27, 8(R17) - - MOVD.P 8(R16), R27 - MOVD.P R27, 8(R17) - - MOVD.P 8(R16), R27 - MOVD.P R27, 8(R17) - - MOVD.P 8(R16), R27 - MOVD.P R27, 8(R17) - - MOVD.P 8(R16), R27 - MOVD.P R27, 8(R17) - - MOVD.P 8(R16), R27 - MOVD.P R27, 8(R17) - - MOVD.P 8(R16), R27 - MOVD.P R27, 8(R17) - - MOVD.P 8(R16), R27 - MOVD.P R27, 8(R17) - - MOVD.P 8(R16), R27 - MOVD.P R27, 8(R17) - - MOVD.P 8(R16), R27 - MOVD.P R27, 8(R17) - - MOVD.P 8(R16), R27 - MOVD.P R27, 8(R17) - - MOVD.P 8(R16), R27 - MOVD.P R27, 8(R17) - - MOVD.P 8(R16), R27 - MOVD.P R27, 8(R17) - - MOVD.P 8(R16), R27 - MOVD.P R27, 8(R17) - - MOVD.P 8(R16), R27 - MOVD.P R27, 8(R17) - - MOVD.P 8(R16), R27 - MOVD.P R27, 8(R17) - - MOVD.P 8(R16), R27 - MOVD.P R27, 8(R17) - - MOVD.P 8(R16), R27 - MOVD.P R27, 8(R17) - - MOVD.P 8(R16), R27 - MOVD.P R27, 8(R17) - - MOVD.P 8(R16), R27 - MOVD.P R27, 8(R17) - - MOVD.P 8(R16), R27 - MOVD.P R27, 8(R17) - - MOVD.P 8(R16), R27 - MOVD.P R27, 8(R17) - - MOVD.P 8(R16), R27 - MOVD.P R27, 8(R17) - - MOVD.P 8(R16), R27 - MOVD.P R27, 8(R17) - - MOVD.P 8(R16), R27 - MOVD.P R27, 8(R17) - - MOVD.P 8(R16), R27 - MOVD.P R27, 8(R17) - - MOVD.P 8(R16), R27 - MOVD.P R27, 8(R17) - - MOVD.P 8(R16), R27 - MOVD.P R27, 8(R17) - - MOVD.P 8(R16), R27 - MOVD.P R27, 8(R17) - - MOVD.P 8(R16), R27 - MOVD.P R27, 8(R17) - - MOVD.P 8(R16), R27 - MOVD.P R27, 8(R17) - - MOVD.P 8(R16), R27 - MOVD.P R27, 8(R17) - - MOVD.P 8(R16), R27 - MOVD.P R27, 8(R17) - - MOVD.P 8(R16), R27 - MOVD.P R27, 8(R17) - - MOVD.P 8(R16), R27 - MOVD.P R27, 8(R17) - - MOVD.P 8(R16), R27 - MOVD.P R27, 8(R17) - - MOVD.P 8(R16), R27 - MOVD.P R27, 8(R17) - - MOVD.P 8(R16), R27 - MOVD.P R27, 8(R17) - - MOVD.P 8(R16), R27 - MOVD.P R27, 8(R17) - - MOVD.P 8(R16), R27 - MOVD.P R27, 8(R17) - - MOVD.P 8(R16), R27 - MOVD.P R27, 8(R17) - - MOVD.P 8(R16), R27 - MOVD.P R27, 8(R17) - - MOVD.P 8(R16), R27 - MOVD.P R27, 8(R17) - - MOVD.P 8(R16), R27 - MOVD.P R27, 8(R17) - - MOVD.P 8(R16), R27 - MOVD.P R27, 8(R17) - - MOVD.P 8(R16), R27 - MOVD.P R27, 8(R17) - - MOVD.P 8(R16), R27 - MOVD.P R27, 8(R17) - - MOVD.P 8(R16), R27 - MOVD.P R27, 8(R17) - - MOVD.P 8(R16), R27 - MOVD.P R27, 8(R17) - - MOVD.P 8(R16), R27 - MOVD.P R27, 8(R17) - - MOVD.P 8(R16), R27 - MOVD.P R27, 8(R17) - - MOVD.P 8(R16), R27 - MOVD.P R27, 8(R17) - - MOVD.P 8(R16), R27 - MOVD.P R27, 8(R17) + LDP.P 16(R16), (R26, R27) + STP.P (R26, R27), 16(R17) RET diff --git a/src/runtime/memmove_test.go b/src/runtime/memmove_test.go index 62de604e69..b490cd815f 100644 --- a/src/runtime/memmove_test.go +++ b/src/runtime/memmove_test.go @@ -450,6 +450,13 @@ func BenchmarkCopyFat512(b *testing.B) { _ = y } } +func BenchmarkCopyFat520(b *testing.B) { + var x [520 / 4]uint32 + for i := 0; i < b.N; i++ { + y := x + _ = y + } +} func BenchmarkCopyFat1024(b *testing.B) { var x [1024 / 4]uint32 for i := 0; i < b.N; i++ { diff --git a/src/runtime/mkduff.go b/src/runtime/mkduff.go index fcc7f83197..b7c7e2689c 100644 --- a/src/runtime/mkduff.go +++ b/src/runtime/mkduff.go @@ -164,12 +164,13 @@ func zeroARM64(w io.Writer) { func copyARM64(w io.Writer) { // R16 (aka REGRT1): ptr to source memory // R17 (aka REGRT2): ptr to destination memory - // R27 (aka REGTMP): scratch space + // R26, R27 (aka REGTMP): scratch space // R16 and R17 are updated as a side effect - fmt.Fprintln(w, "TEXT runtime·duffcopy(SB), NOSPLIT, $0-0") - for i := 0; i < 128; i++ { - fmt.Fprintln(w, "\tMOVD.P\t8(R16), R27") - fmt.Fprintln(w, "\tMOVD.P\tR27, 8(R17)") + fmt.Fprintln(w, "TEXT runtime·duffcopy(SB), NOSPLIT|NOFRAME, $0-0") + + for i := 0; i < 64; i++ { + fmt.Fprintln(w, "\tLDP.P\t16(R16), (R26, R27)") + fmt.Fprintln(w, "\tSTP.P\t(R26, R27), 16(R17)") fmt.Fprintln(w) } fmt.Fprintln(w, "\tRET") |
