From 5cf281a9b791f0f10efd1574934cbb19ea1b33da Mon Sep 17 00:00:00 2001 From: Ilya Tocar Date: Tue, 22 Sep 2015 14:32:05 +0300 Subject: runtime: optimize duffcopy on amd64 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use movups to copy 16 bytes at a time. Results (haswell): name old time/op new time/op delta CopyFat8-48 0.62ns ± 3% 0.63ns ± 3% ~ (p=0.535 n=20+20) CopyFat12-48 0.92ns ± 2% 0.93ns ± 3% ~ (p=0.594 n=17+18) CopyFat16-48 1.23ns ± 2% 1.23ns ± 2% ~ (p=0.839 n=20+19) CopyFat24-48 1.85ns ± 2% 1.84ns ± 0% -0.48% (p=0.014 n=19+20) CopyFat32-48 2.45ns ± 0% 2.45ns ± 1% ~ (p=1.000 n=16+16) CopyFat64-48 3.30ns ± 2% 2.14ns ± 1% -35.00% (p=0.000 n=20+18) CopyFat128-48 6.05ns ± 0% 3.98ns ± 0% -34.22% (p=0.000 n=18+17) CopyFat256-48 11.9ns ± 3% 7.7ns ± 0% -35.87% (p=0.000 n=20+17) CopyFat512-48 23.0ns ± 2% 15.1ns ± 2% -34.52% (p=0.000 n=20+18) CopyFat1024-48 44.8ns ± 1% 29.8ns ± 2% -33.48% (p=0.000 n=17+19) Change-Id: I8a78773c656d400726a020894461e00c59f896bf Reviewed-on: https://go-review.googlesource.com/14836 Run-TryBot: Brad Fitzpatrick TryBot-Result: Gobot Gobot Reviewed-by: Keith Randall --- src/runtime/mkduff.go | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'src/runtime/mkduff.go') diff --git a/src/runtime/mkduff.go b/src/runtime/mkduff.go index 41caa72d6d..918766650f 100644 --- a/src/runtime/mkduff.go +++ b/src/runtime/mkduff.go @@ -84,11 +84,11 @@ func copyAMD64(w io.Writer) { // for some reason that is 3.5x slower than this code. // The STOSQ in duffzero seem fine, though. fmt.Fprintln(w, "TEXT runtime·duffcopy(SB), NOSPLIT, $0-0") - for i := 0; i < 128; i++ { - fmt.Fprintln(w, "\tMOVQ\t(SI), CX") - fmt.Fprintln(w, "\tADDQ\t$8, SI") - fmt.Fprintln(w, "\tMOVQ\tCX, (DI)") - fmt.Fprintln(w, "\tADDQ\t$8, DI") + for i := 0; i < 64; i++ { + fmt.Fprintln(w, "\tMOVUPS\t(SI), X0") + fmt.Fprintln(w, "\tADDQ\t$16, SI") + fmt.Fprintln(w, "\tMOVUPS\tX0, (DI)") + fmt.Fprintln(w, "\tADDQ\t$16, DI") fmt.Fprintln(w) } fmt.Fprintln(w, "\tRET") -- cgit v1.3