aboutsummaryrefslogtreecommitdiff
path: root/src/cmd
diff options
context:
space:
mode:
authorBalaram Makam <bmakam.qdt@qualcommdatacenter.com>2018-04-02 16:22:08 -0400
committerCherry Zhang <cherryyz@google.com>2018-04-04 18:37:24 +0000
commitd7c7d88b2c991c744f3cc1eb5865218f52ef98b0 (patch)
treef082928972693ea1e6ca7aa417cd6db5ae5fcd2d /src/cmd
parente6ab614fda5ce7533bda81f2b0de0f9fe18139bf (diff)
downloadgo-d7c7d88b2c991c744f3cc1eb5865218f52ef98b0.tar.xz
cmd/compile: intrinsify math/big.mulWW on ARM64
Performance numbers on amberwing: pkg: math/big name old time/op new time/op delta QuoRem 3.08µs ± 0% 2.93µs ± 1% -4.89% (p=0.008 n=5+5) ModSqrt225_Tonelli 721µs ± 0% 718µs ± 0% -0.46% (p=0.008 n=5+5) ModSqrt224_3Mod4 218µs ± 0% 217µs ± 0% -0.27% (p=0.008 n=5+5) ModSqrt5430_Tonelli 2.91s ± 0% 2.91s ± 0% ~ (p=0.222 n=5+5) ModSqrt5430_3Mod4 970ms ± 0% 970ms ± 0% ~ (p=0.151 n=5+5) Sqrt 45.9µs ± 0% 43.8µs ± 0% -4.63% (p=0.008 n=5+5) IntSqr/1 19.9ns ± 0% 17.3ns ± 0% -13.07% (p=0.008 n=5+5) IntSqr/2 52.6ns ± 0% 50.8ns ± 0% -3.35% (p=0.008 n=5+5) IntSqr/3 70.4ns ± 0% 69.4ns ± 0% ~ (p=0.079 n=4+5) IntSqr/5 103ns ± 0% 99ns ± 0% -3.98% (p=0.008 n=5+5) IntSqr/8 179ns ± 0% 178ns ± 0% -0.56% (p=0.008 n=5+5) IntSqr/10 272ns ± 0% 272ns ± 0% ~ (all equal) IntSqr/20 763ns ± 0% 787ns ± 0% +3.15% (p=0.016 n=5+4) IntSqr/30 1.25µs ± 1% 1.29µs ± 1% +3.27% (p=0.008 n=5+5) IntSqr/50 2.64µs ± 0% 2.71µs ± 0% +2.61% (p=0.008 n=5+5) IntSqr/80 5.67µs ± 0% 5.72µs ± 0% +0.88% (p=0.008 n=5+5) IntSqr/100 8.05µs ± 0% 8.09µs ± 0% +0.45% (p=0.008 n=5+5) IntSqr/200 28.0µs ± 0% 28.1µs ± 0% ~ (p=0.151 n=5+5) IntSqr/300 59.4µs ± 0% 59.6µs ± 0% +0.36% (p=0.008 n=5+5) IntSqr/500 141µs ± 0% 141µs ± 0% +0.08% (p=0.008 n=5+5) IntSqr/800 280µs ± 0% 280µs ± 0% -0.12% (p=0.008 n=5+5) IntSqr/1000 429µs ± 0% 428µs ± 0% -0.27% (p=0.008 n=5+5) pkg: crypto-ecdsa name old time/op new time/op delta SignP384 7.85ms ± 1% 7.61ms ± 1% -3.12% (p=0.008 n=5+5) Change-Id: I1ab30856cc0e570f6312f0bd8914779b55adbc16 Reviewed-on: https://go-review.googlesource.com/104135 Reviewed-by: Cherry Zhang <cherryyz@google.com> Run-TryBot: Cherry Zhang <cherryyz@google.com> TryBot-Result: Gobot Gobot <gobot@golang.org>
Diffstat (limited to 'src/cmd')
-rw-r--r--src/cmd/compile/internal/arm64/ssa.go15
-rw-r--r--src/cmd/compile/internal/gc/ssa.go2
-rw-r--r--src/cmd/compile/internal/ssa/gen/ARM64.rules3
-rw-r--r--src/cmd/compile/internal/ssa/gen/ARM64Ops.go2
-rw-r--r--src/cmd/compile/internal/ssa/opGen.go16
-rw-r--r--src/cmd/compile/internal/ssa/rewriteARM64.go16
6 files changed, 52 insertions, 2 deletions
diff --git a/src/cmd/compile/internal/arm64/ssa.go b/src/cmd/compile/internal/arm64/ssa.go
index 4984f9a007..b72ead7368 100644
--- a/src/cmd/compile/internal/arm64/ssa.go
+++ b/src/cmd/compile/internal/arm64/ssa.go
@@ -413,6 +413,21 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
p.Reg = v.Args[0].Reg()
p.To.Type = obj.TYPE_REG
p.To.Reg = v.Reg()
+ case ssa.OpARM64LoweredMuluhilo:
+ r0 := v.Args[0].Reg()
+ r1 := v.Args[1].Reg()
+ p := s.Prog(arm64.AUMULH)
+ p.From.Type = obj.TYPE_REG
+ p.From.Reg = r1
+ p.Reg = r0
+ p.To.Type = obj.TYPE_REG
+ p.To.Reg = v.Reg0()
+ p1 := s.Prog(arm64.AMUL)
+ p1.From.Type = obj.TYPE_REG
+ p1.From.Reg = r1
+ p1.Reg = r0
+ p1.To.Type = obj.TYPE_REG
+ p1.To.Reg = v.Reg1()
case ssa.OpARM64LoweredAtomicExchange64,
ssa.OpARM64LoweredAtomicExchange32:
// LDAXR (Rarg0), Rout
diff --git a/src/cmd/compile/internal/gc/ssa.go b/src/cmd/compile/internal/gc/ssa.go
index 7a2de3c8fb..02e8a62467 100644
--- a/src/cmd/compile/internal/gc/ssa.go
+++ b/src/cmd/compile/internal/gc/ssa.go
@@ -3207,7 +3207,7 @@ func init() {
func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
return s.newValue2(ssa.OpMul64uhilo, types.NewTuple(types.Types[TUINT64], types.Types[TUINT64]), args[0], args[1])
},
- sys.ArchAMD64)
+ sys.ArchAMD64, sys.ArchARM64)
add("math/big", "divWW",
func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
return s.newValue3(ssa.OpDiv128u, types.NewTuple(types.Types[TUINT64], types.Types[TUINT64]), args[0], args[1], args[2])
diff --git a/src/cmd/compile/internal/ssa/gen/ARM64.rules b/src/cmd/compile/internal/ssa/gen/ARM64.rules
index 5eaf76cc8c..edeadfd1d2 100644
--- a/src/cmd/compile/internal/ssa/gen/ARM64.rules
+++ b/src/cmd/compile/internal/ssa/gen/ARM64.rules
@@ -29,6 +29,7 @@
(Hmul64u x y) -> (UMULH x y)
(Hmul32 x y) -> (SRAconst (MULL <typ.Int64> x y) [32])
(Hmul32u x y) -> (SRAconst (UMULL <typ.UInt64> x y) [32])
+(Mul64uhilo x y) -> (LoweredMuluhilo x y)
(Div64 x y) -> (DIV x y)
(Div64u x y) -> (UDIV x y)
@@ -1791,4 +1792,4 @@
(FSUBS a (FNMULS x y)) -> (FMADDS a x y)
(FSUBD a (FNMULD x y)) -> (FMADDD a x y)
(FSUBS (FNMULS x y) a) -> (FNMADDS a x y)
-(FSUBD (FNMULD x y) a) -> (FNMADDD a x y) \ No newline at end of file
+(FSUBD (FNMULD x y) a) -> (FNMADDD a x y)
diff --git a/src/cmd/compile/internal/ssa/gen/ARM64Ops.go b/src/cmd/compile/internal/ssa/gen/ARM64Ops.go
index b311359721..ec75ca38c6 100644
--- a/src/cmd/compile/internal/ssa/gen/ARM64Ops.go
+++ b/src/cmd/compile/internal/ssa/gen/ARM64Ops.go
@@ -142,6 +142,7 @@ func init() {
gp21nog = regInfo{inputs: []regMask{gp, gp}, outputs: []regMask{gp}}
gp2flags = regInfo{inputs: []regMask{gpg, gpg}}
gp2flags1 = regInfo{inputs: []regMask{gp, gp}, outputs: []regMask{gp}}
+ gp22 = regInfo{inputs: []regMask{gpg, gpg}, outputs: []regMask{gp, gp}}
gpload = regInfo{inputs: []regMask{gpspsbg}, outputs: []regMask{gp}}
gpstore = regInfo{inputs: []regMask{gpspsbg, gpg}}
gpstore0 = regInfo{inputs: []regMask{gpspsbg}}
@@ -203,6 +204,7 @@ func init() {
{name: "EON", argLength: 2, reg: gp21, asm: "EON"}, // arg0 ^ ^arg1
{name: "ORN", argLength: 2, reg: gp21, asm: "ORN"}, // arg0 | ^arg1
+ {name: "LoweredMuluhilo", argLength: 2, reg: gp22, resultNotInArgs: true}, // arg0 * arg1, returns (hi, lo)
// unary ops
{name: "MVN", argLength: 1, reg: gp11, asm: "MVN"}, // ^arg0
{name: "NEG", argLength: 1, reg: gp11, asm: "NEG"}, // -arg0
diff --git a/src/cmd/compile/internal/ssa/opGen.go b/src/cmd/compile/internal/ssa/opGen.go
index 32c595382a..ef3875ec02 100644
--- a/src/cmd/compile/internal/ssa/opGen.go
+++ b/src/cmd/compile/internal/ssa/opGen.go
@@ -1050,6 +1050,7 @@ const (
OpARM64BIC
OpARM64EON
OpARM64ORN
+ OpARM64LoweredMuluhilo
OpARM64MVN
OpARM64NEG
OpARM64FNEGS
@@ -13579,6 +13580,21 @@ var opcodeTable = [...]opInfo{
},
},
{
+ name: "LoweredMuluhilo",
+ argLen: 2,
+ resultNotInArgs: true,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, 805044223}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30
+ {1, 805044223}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30
+ },
+ outputs: []outputInfo{
+ {0, 670826495}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 R30
+ {1, 670826495}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 R30
+ },
+ },
+ },
+ {
name: "MVN",
argLen: 1,
asm: arm64.AMVN,
diff --git a/src/cmd/compile/internal/ssa/rewriteARM64.go b/src/cmd/compile/internal/ssa/rewriteARM64.go
index 9508b46072..407719e744 100644
--- a/src/cmd/compile/internal/ssa/rewriteARM64.go
+++ b/src/cmd/compile/internal/ssa/rewriteARM64.go
@@ -591,6 +591,8 @@ func rewriteValueARM64(v *Value) bool {
return rewriteValueARM64_OpMul64_0(v)
case OpMul64F:
return rewriteValueARM64_OpMul64F_0(v)
+ case OpMul64uhilo:
+ return rewriteValueARM64_OpMul64uhilo_0(v)
case OpMul8:
return rewriteValueARM64_OpMul8_0(v)
case OpNeg16:
@@ -18906,6 +18908,20 @@ func rewriteValueARM64_OpMul64F_0(v *Value) bool {
return true
}
}
+func rewriteValueARM64_OpMul64uhilo_0(v *Value) bool {
+ // match: (Mul64uhilo x y)
+ // cond:
+ // result: (LoweredMuluhilo x y)
+ for {
+ _ = v.Args[1]
+ x := v.Args[0]
+ y := v.Args[1]
+ v.reset(OpARM64LoweredMuluhilo)
+ v.AddArg(x)
+ v.AddArg(y)
+ return true
+ }
+}
func rewriteValueARM64_OpMul8_0(v *Value) bool {
// match: (Mul8 x y)
// cond: