diff options
| author | Michael Munday <munday@ca.ibm.com> | 2017-03-13 14:39:17 -0400 |
|---|---|---|
| committer | Lynn Boger <laboger@linux.vnet.ibm.com> | 2017-03-20 20:01:29 +0000 |
| commit | 17570a9afb5dc2d7d11eb3e132917e8d153a1ec9 (patch) | |
| tree | 424875a1d4d0d6dc96f592695683e6c0a3999dae /src/cmd/internal/obj/ppc64 | |
| parent | 01ac5b8dcfe5342af3770b0834220b87ea328fad (diff) | |
| download | go-17570a9afb5dc2d7d11eb3e132917e8d153a1ec9.tar.xz | |
cmd/compile: emit fused multiply-{add,subtract} on ppc64x
A follow on to CL 36963 adding support for ppc64x.
Performance changes (as posted on the issue):
poly1305:
benchmark old ns/op new ns/op delta
Benchmark64-16 172 151 -12.21%
Benchmark1K-16 1828 1523 -16.68%
Benchmark64Unaligned-16 172 151 -12.21%
Benchmark1KUnaligned-16 1827 1523 -16.64%
math:
BenchmarkAcos-16 43.9 39.9 -9.11%
BenchmarkAcosh-16 57.0 45.8 -19.65%
BenchmarkAsin-16 35.8 33.0 -7.82%
BenchmarkAsinh-16 68.6 60.8 -11.37%
BenchmarkAtan-16 19.8 16.2 -18.18%
BenchmarkAtanh-16 65.5 57.5 -12.21%
BenchmarkAtan2-16 45.4 34.2 -24.67%
BenchmarkGamma-16 37.6 26.0 -30.85%
BenchmarkLgamma-16 40.0 28.2 -29.50%
BenchmarkLog1p-16 35.1 29.1 -17.09%
BenchmarkSin-16 22.7 18.4 -18.94%
BenchmarkSincos-16 31.7 23.7 -25.24%
BenchmarkSinh-16 146 131 -10.27%
BenchmarkY0-16 130 107 -17.69%
BenchmarkY1-16 127 107 -15.75%
BenchmarkYn-16 278 235 -15.47%
Updates #17895.
Change-Id: I1c16199715d20c9c4bd97c4a950bcfa69eb688c1
Reviewed-on: https://go-review.googlesource.com/38095
Reviewed-by: Carlos Eduardo Seo <cseo@linux.vnet.ibm.com>
Reviewed-by: Lynn Boger <laboger@linux.vnet.ibm.com>
Diffstat (limited to 'src/cmd/internal/obj/ppc64')
| -rw-r--r-- | src/cmd/internal/obj/ppc64/asm9.go | 2 |
1 files changed, 1 insertions, 1 deletions
diff --git a/src/cmd/internal/obj/ppc64/asm9.go b/src/cmd/internal/obj/ppc64/asm9.go index 90b796dd67..0fdce94b7c 100644 --- a/src/cmd/internal/obj/ppc64/asm9.go +++ b/src/cmd/internal/obj/ppc64/asm9.go @@ -2765,7 +2765,7 @@ func asmout(ctxt *obj.Link, p *obj.Prog, o *Optab, out []uint32) { } o1 = AOP_RRR(oprrr(ctxt, p.As), uint32(p.To.Reg), 0, uint32(r)) - case 34: /* FMADDx fra,frb,frc,frd (d=a*b+c); FSELx a<0? (d=b): (d=c) */ + case 34: /* FMADDx fra,frb,frc,frt (t=a*c±b) */ o1 = AOP_RRR(oprrr(ctxt, p.As), uint32(p.To.Reg), uint32(p.From.Reg), uint32(p.Reg)) | (uint32(p.From3.Reg)&31)<<6 case 35: /* mov r,lext/lauto/loreg ==> cau $(v>>16),sb,r'; store o(r') */ |
