aboutsummaryrefslogtreecommitdiff
path: root/src/cmd/internal/obj/ppc64
diff options
context:
space:
mode:
authorCarlos Eduardo Seo <cseo@linux.vnet.ibm.com>2017-08-07 15:44:38 -0300
committerLynn Boger <laboger@linux.vnet.ibm.com>2017-11-06 21:56:18 +0000
commitbe943df58860e7dec008ebb8d68428d54e311b94 (patch)
tree69a3bce9cb554f691b8bfa73d8c079444f3c877f /src/cmd/internal/obj/ppc64
parent4fcc835971ad63cf913ebe074ef6191e35a44ab9 (diff)
downloadgo-be943df58860e7dec008ebb8d68428d54e311b94.tar.xz
runtime: improve IndexByte for ppc64x
This change adds a better implementation of IndexByte in asm that uses the vector registers/instructions on ppc64x. benchmark old ns/op new ns/op delta BenchmarkIndexByte/10-8 9.70 9.37 -3.40% BenchmarkIndexByte/32-8 10.9 10.9 +0.00% BenchmarkIndexByte/4K-8 254 92.8 -63.46% BenchmarkIndexByte/4M-8 249246 118435 -52.48% BenchmarkIndexByte/64M-8 10737987 7383096 -31.24% benchmark old MB/s new MB/s speedup BenchmarkIndexByte/10-8 1030.63 1067.24 1.04x BenchmarkIndexByte/32-8 2922.69 2928.53 1.00x BenchmarkIndexByte/4K-8 16065.95 44156.45 2.75x BenchmarkIndexByte/4M-8 16827.96 35414.21 2.10x BenchmarkIndexByte/64M-8 6249.67 9089.53 1.45x Change-Id: I81dbdd620f7bb4e395ce4d1f2a14e8e91e39f9a1 Reviewed-on: https://go-review.googlesource.com/71710 Run-TryBot: Lynn Boger <laboger@linux.vnet.ibm.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Lynn Boger <laboger@linux.vnet.ibm.com>
Diffstat (limited to 'src/cmd/internal/obj/ppc64')
-rw-r--r--src/cmd/internal/obj/ppc64/a.out.go2
-rw-r--r--src/cmd/internal/obj/ppc64/anames.go2
-rw-r--r--src/cmd/internal/obj/ppc64/asm9.go11
3 files changed, 15 insertions, 0 deletions
diff --git a/src/cmd/internal/obj/ppc64/a.out.go b/src/cmd/internal/obj/ppc64/a.out.go
index 6b5a1b4351..e684281774 100644
--- a/src/cmd/internal/obj/ppc64/a.out.go
+++ b/src/cmd/internal/obj/ppc64/a.out.go
@@ -859,6 +859,8 @@ const (
AVCMPNEZB
AVCMPNEZBCC
AVPERM
+ AVBPERMQ
+ AVBPERMD
AVSEL
AVSPLT
AVSPLTB
diff --git a/src/cmd/internal/obj/ppc64/anames.go b/src/cmd/internal/obj/ppc64/anames.go
index 142b53eadd..b7ca133057 100644
--- a/src/cmd/internal/obj/ppc64/anames.go
+++ b/src/cmd/internal/obj/ppc64/anames.go
@@ -474,6 +474,8 @@ var Anames = []string{
"VCMPNEZB",
"VCMPNEZBCC",
"VPERM",
+ "VBPERMQ",
+ "VBPERMD",
"VSEL",
"VSPLT",
"VSPLTB",
diff --git a/src/cmd/internal/obj/ppc64/asm9.go b/src/cmd/internal/obj/ppc64/asm9.go
index 2b8efe846d..1f488d5e4d 100644
--- a/src/cmd/internal/obj/ppc64/asm9.go
+++ b/src/cmd/internal/obj/ppc64/asm9.go
@@ -421,6 +421,9 @@ var optab = []Optab{
/* Vector permute */
{AVPERM, C_VREG, C_VREG, C_VREG, C_VREG, 83, 4, 0}, /* vector permute, va-form */
+ /* Vector bit permute */
+ {AVBPERMQ, C_VREG, C_VREG, C_NONE, C_VREG, 82, 4, 0}, /* vector bit permute, vx-form */
+
/* Vector select */
{AVSEL, C_VREG, C_VREG, C_VREG, C_VREG, 83, 4, 0}, /* vector select, va-form */
@@ -1378,6 +1381,9 @@ func buildop(ctxt *obj.Link) {
case AVPERM: /* vperm */
opset(AVPERM, r0)
+ case AVBPERMQ: /* vbpermq, vbpermd */
+ opset(AVBPERMD, r0)
+
case AVSEL: /* vsel */
opset(AVSEL, r0)
@@ -4165,6 +4171,11 @@ func (c *ctxt9) oprrr(a obj.As) uint32 {
case AVSRAD:
return OPVX(4, 964, 0, 0) /* vsrad - v2.07 */
+ case AVBPERMQ:
+ return OPVC(4, 1356, 0, 0) /* vbpermq - v2.07 */
+ case AVBPERMD:
+ return OPVC(4, 1484, 0, 0) /* vbpermd - v3.00 */
+
case AVCLZB:
return OPVX(4, 1794, 0, 0) /* vclzb - v2.07 */
case AVCLZH: