diff options
| author | Carlos Eduardo Seo <cseo@linux.vnet.ibm.com> | 2017-08-07 15:44:38 -0300 |
|---|---|---|
| committer | Lynn Boger <laboger@linux.vnet.ibm.com> | 2017-11-06 21:56:18 +0000 |
| commit | be943df58860e7dec008ebb8d68428d54e311b94 (patch) | |
| tree | 69a3bce9cb554f691b8bfa73d8c079444f3c877f /src/cmd/internal/obj/ppc64 | |
| parent | 4fcc835971ad63cf913ebe074ef6191e35a44ab9 (diff) | |
| download | go-be943df58860e7dec008ebb8d68428d54e311b94.tar.xz | |
runtime: improve IndexByte for ppc64x
This change adds a better implementation of IndexByte in asm that uses the
vector registers/instructions on ppc64x.
benchmark old ns/op new ns/op delta
BenchmarkIndexByte/10-8 9.70 9.37 -3.40%
BenchmarkIndexByte/32-8 10.9 10.9 +0.00%
BenchmarkIndexByte/4K-8 254 92.8 -63.46%
BenchmarkIndexByte/4M-8 249246 118435 -52.48%
BenchmarkIndexByte/64M-8 10737987 7383096 -31.24%
benchmark old MB/s new MB/s speedup
BenchmarkIndexByte/10-8 1030.63 1067.24 1.04x
BenchmarkIndexByte/32-8 2922.69 2928.53 1.00x
BenchmarkIndexByte/4K-8 16065.95 44156.45 2.75x
BenchmarkIndexByte/4M-8 16827.96 35414.21 2.10x
BenchmarkIndexByte/64M-8 6249.67 9089.53 1.45x
Change-Id: I81dbdd620f7bb4e395ce4d1f2a14e8e91e39f9a1
Reviewed-on: https://go-review.googlesource.com/71710
Run-TryBot: Lynn Boger <laboger@linux.vnet.ibm.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Lynn Boger <laboger@linux.vnet.ibm.com>
Diffstat (limited to 'src/cmd/internal/obj/ppc64')
| -rw-r--r-- | src/cmd/internal/obj/ppc64/a.out.go | 2 | ||||
| -rw-r--r-- | src/cmd/internal/obj/ppc64/anames.go | 2 | ||||
| -rw-r--r-- | src/cmd/internal/obj/ppc64/asm9.go | 11 |
3 files changed, 15 insertions, 0 deletions
diff --git a/src/cmd/internal/obj/ppc64/a.out.go b/src/cmd/internal/obj/ppc64/a.out.go index 6b5a1b4351..e684281774 100644 --- a/src/cmd/internal/obj/ppc64/a.out.go +++ b/src/cmd/internal/obj/ppc64/a.out.go @@ -859,6 +859,8 @@ const ( AVCMPNEZB AVCMPNEZBCC AVPERM + AVBPERMQ + AVBPERMD AVSEL AVSPLT AVSPLTB diff --git a/src/cmd/internal/obj/ppc64/anames.go b/src/cmd/internal/obj/ppc64/anames.go index 142b53eadd..b7ca133057 100644 --- a/src/cmd/internal/obj/ppc64/anames.go +++ b/src/cmd/internal/obj/ppc64/anames.go @@ -474,6 +474,8 @@ var Anames = []string{ "VCMPNEZB", "VCMPNEZBCC", "VPERM", + "VBPERMQ", + "VBPERMD", "VSEL", "VSPLT", "VSPLTB", diff --git a/src/cmd/internal/obj/ppc64/asm9.go b/src/cmd/internal/obj/ppc64/asm9.go index 2b8efe846d..1f488d5e4d 100644 --- a/src/cmd/internal/obj/ppc64/asm9.go +++ b/src/cmd/internal/obj/ppc64/asm9.go @@ -421,6 +421,9 @@ var optab = []Optab{ /* Vector permute */ {AVPERM, C_VREG, C_VREG, C_VREG, C_VREG, 83, 4, 0}, /* vector permute, va-form */ + /* Vector bit permute */ + {AVBPERMQ, C_VREG, C_VREG, C_NONE, C_VREG, 82, 4, 0}, /* vector bit permute, vx-form */ + /* Vector select */ {AVSEL, C_VREG, C_VREG, C_VREG, C_VREG, 83, 4, 0}, /* vector select, va-form */ @@ -1378,6 +1381,9 @@ func buildop(ctxt *obj.Link) { case AVPERM: /* vperm */ opset(AVPERM, r0) + case AVBPERMQ: /* vbpermq, vbpermd */ + opset(AVBPERMD, r0) + case AVSEL: /* vsel */ opset(AVSEL, r0) @@ -4165,6 +4171,11 @@ func (c *ctxt9) oprrr(a obj.As) uint32 { case AVSRAD: return OPVX(4, 964, 0, 0) /* vsrad - v2.07 */ + case AVBPERMQ: + return OPVC(4, 1356, 0, 0) /* vbpermq - v2.07 */ + case AVBPERMD: + return OPVC(4, 1484, 0, 0) /* vbpermd - v3.00 */ + case AVCLZB: return OPVX(4, 1794, 0, 0) /* vclzb - v2.07 */ case AVCLZH: |
