diff options
| author | Michael Munday <mike.munday@lowrisc.org> | 2024-07-17 23:54:43 +0100 |
|---|---|---|
| committer | Gopher Robot <gobot@golang.org> | 2025-08-05 08:27:15 -0700 |
| commit | fcc036f03b07e58f76ed94bc9a9483ddef96f81c (patch) | |
| tree | 8cea8827babe439b7b0a42f5588fc9713b9adf35 /src | |
| parent | 7a1679d7ae32dd8a01bd355413ee77ba517f5f43 (diff) | |
| download | go-fcc036f03b07e58f76ed94bc9a9483ddef96f81c.tar.xz | |
cmd/compile: optimise float <-> int register moves on riscv64
Use the FMV* instructions to move values between the floating point and
integer register files.
Note: I'm unsure why there is a slowdown in the Float32bits benchmark,
I've checked and an FMVXS instruction is being used as expected. There
are multiple loads and other instructions in the main loop.
goos: linux
goarch: riscv64
pkg: math
cpu: Spacemit(R) X60
│ fmv-before.txt │ fmv-after.txt │
│ sec/op │ sec/op vs base │
Acos 122.7n ± 0% 122.7n ± 0% ~ (p=1.000 n=10)
Acosh 197.2n ± 0% 191.5n ± 0% -2.89% (p=0.000 n=10)
Asin 122.7n ± 0% 122.7n ± 0% ~ (p=0.474 n=10)
Asinh 231.0n ± 0% 224.1n ± 0% -2.99% (p=0.000 n=10)
Atan 91.39n ± 0% 91.41n ± 0% ~ (p=0.465 n=10)
Atanh 210.3n ± 0% 203.4n ± 0% -3.26% (p=0.000 n=10)
Atan2 149.6n ± 0% 149.6n ± 0% ~ (p=0.721 n=10)
Cbrt 176.5n ± 0% 165.9n ± 0% -6.01% (p=0.000 n=10)
Ceil 25.67n ± 0% 24.42n ± 0% -4.87% (p=0.000 n=10)
Copysign 3.756n ± 0% 3.756n ± 0% ~ (p=0.149 n=10)
Cos 95.15n ± 0% 95.15n ± 0% ~ (p=0.374 n=10)
Cosh 228.6n ± 0% 224.7n ± 0% -1.71% (p=0.000 n=10)
Erf 115.2n ± 0% 115.2n ± 0% ~ (p=0.474 n=10)
Erfc 116.4n ± 0% 116.4n ± 0% ~ (p=0.628 n=10)
Erfinv 133.3n ± 0% 133.3n ± 0% ~ (p=1.000 n=10)
Erfcinv 133.3n ± 0% 133.3n ± 0% ~ (p=1.000 n=10)
Exp 194.1n ± 0% 190.3n ± 0% -1.93% (p=0.000 n=10)
ExpGo 204.7n ± 0% 200.3n ± 0% -2.15% (p=0.000 n=10)
Expm1 137.7n ± 0% 135.2n ± 0% -1.82% (p=0.000 n=10)
Exp2 173.4n ± 0% 169.0n ± 0% -2.54% (p=0.000 n=10)
Exp2Go 182.8n ± 0% 178.4n ± 0% -2.41% (p=0.000 n=10)
Abs 3.756n ± 0% 3.756n ± 0% ~ (p=0.157 n=10)
Dim 12.52n ± 0% 12.52n ± 0% ~ (p=0.737 n=10)
Floor 25.67n ± 0% 24.42n ± 0% -4.87% (p=0.000 n=10)
Max 21.29n ± 0% 20.03n ± 0% -5.92% (p=0.000 n=10)
Min 21.28n ± 0% 20.04n ± 0% -5.85% (p=0.000 n=10)
Mod 344.9n ± 0% 319.2n ± 0% -7.45% (p=0.000 n=10)
Frexp 55.71n ± 0% 48.85n ± 0% -12.30% (p=0.000 n=10)
Gamma 165.9n ± 0% 167.8n ± 0% +1.15% (p=0.000 n=10)
Hypot 73.24n ± 0% 70.74n ± 0% -3.41% (p=0.000 n=10)
HypotGo 84.50n ± 0% 82.63n ± 0% -2.21% (p=0.000 n=10)
Ilogb 49.45n ± 0% 45.70n ± 0% -7.59% (p=0.000 n=10)
J0 556.5n ± 0% 544.0n ± 0% -2.25% (p=0.000 n=10)
J1 555.3n ± 0% 542.8n ± 0% -2.24% (p=0.000 n=10)
Jn 1.181µ ± 0% 1.156µ ± 0% -2.12% (p=0.000 n=10)
Ldexp 59.47n ± 0% 53.84n ± 0% -9.47% (p=0.000 n=10)
Lgamma 167.2n ± 0% 154.6n ± 0% -7.51% (p=0.000 n=10)
Log 160.9n ± 0% 154.6n ± 0% -3.92% (p=0.000 n=10)
Logb 49.45n ± 0% 45.70n ± 0% -7.58% (p=0.000 n=10)
Log1p 147.1n ± 0% 137.1n ± 0% -6.80% (p=0.000 n=10)
Log10 162.1n ± 1% 154.6n ± 0% -4.63% (p=0.000 n=10)
Log2 66.99n ± 0% 60.72n ± 0% -9.36% (p=0.000 n=10)
Modf 29.42n ± 0% 26.29n ± 0% -10.64% (p=0.000 n=10)
Nextafter32 41.95n ± 0% 37.88n ± 0% -9.70% (p=0.000 n=10)
Nextafter64 38.82n ± 0% 33.49n ± 0% -13.73% (p=0.000 n=10)
PowInt 252.3n ± 0% 237.3n ± 0% -5.95% (p=0.000 n=10)
PowFrac 615.5n ± 0% 589.7n ± 0% -4.19% (p=0.000 n=10)
Pow10Pos 10.64n ± 0% 10.64n ± 0% ~ (p=1.000 n=10)
Pow10Neg 24.42n ± 0% 15.02n ± 0% -38.49% (p=0.000 n=10)
Round 21.91n ± 0% 18.16n ± 0% -17.12% (p=0.000 n=10)
RoundToEven 24.42n ± 0% 21.29n ± 0% -12.84% (p=0.000 n=10)
Remainder 308.0n ± 0% 291.2n ± 0% -5.44% (p=0.000 n=10)
Signbit 10.02n ± 0% 10.02n ± 0% ~ (p=1.000 n=10)
Sin 102.7n ± 0% 102.7n ± 0% ~ (p=0.211 n=10)
Sincos 124.0n ± 1% 123.3n ± 0% -0.56% (p=0.002 n=10)
Sinh 239.1n ± 0% 234.7n ± 0% -1.84% (p=0.000 n=10)
SqrtIndirect 2.504n ± 0% 2.504n ± 0% ~ (p=0.303 n=10)
SqrtLatency 15.03n ± 0% 15.02n ± 0% ~ (p=0.598 n=10)
SqrtIndirectLatency 15.02n ± 0% 15.02n ± 0% ~ (p=0.907 n=10)
SqrtGoLatency 165.3n ± 0% 157.2n ± 0% -4.90% (p=0.000 n=10)
SqrtPrime 3.801µ ± 0% 3.802µ ± 0% ~ (p=1.000 n=10)
Tan 125.2n ± 0% 125.2n ± 0% ~ (p=0.458 n=10)
Tanh 244.2n ± 0% 239.9n ± 0% -1.76% (p=0.000 n=10)
Trunc 25.67n ± 0% 24.42n ± 0% -4.87% (p=0.000 n=10)
Y0 550.2n ± 0% 538.1n ± 0% -2.21% (p=0.000 n=10)
Y1 552.8n ± 0% 540.6n ± 0% -2.21% (p=0.000 n=10)
Yn 1.168µ ± 0% 1.143µ ± 0% -2.14% (p=0.000 n=10)
Float64bits 8.139n ± 0% 4.385n ± 0% -46.13% (p=0.000 n=10)
Float64frombits 7.512n ± 0% 3.759n ± 0% -49.96% (p=0.000 n=10)
Float32bits 8.138n ± 0% 9.393n ± 0% +15.42% (p=0.000 n=10)
Float32frombits 7.513n ± 0% 3.757n ± 0% -49.98% (p=0.000 n=10)
FMA 3.756n ± 0% 3.756n ± 0% ~ (p=0.246 n=10)
geomean 77.43n 72.42n -6.47%
Change-Id: I8dac69b1d17cb3d2af78d1c844d2b5d80000d667
Reviewed-on: https://go-review.googlesource.com/c/go/+/599235
Reviewed-by: Keith Randall <khr@google.com>
Auto-Submit: Michael Munday <mikemndy@gmail.com>
Reviewed-by: Dmitri Shuralyov <dmitshur@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Keith Randall <khr@golang.org>
Diffstat (limited to 'src')
| -rw-r--r-- | src/cmd/compile/internal/riscv64/ssa.go | 2 | ||||
| -rw-r--r-- | src/cmd/compile/internal/ssa/_gen/RISCV64.rules | 23 | ||||
| -rw-r--r-- | src/cmd/compile/internal/ssa/_gen/RISCV64Ops.go | 6 | ||||
| -rw-r--r-- | src/cmd/compile/internal/ssa/opGen.go | 28 | ||||
| -rw-r--r-- | src/cmd/compile/internal/ssa/rewriteRISCV64.go | 312 |
5 files changed, 368 insertions, 3 deletions
diff --git a/src/cmd/compile/internal/riscv64/ssa.go b/src/cmd/compile/internal/riscv64/ssa.go index f54ea47c88..ed20782a29 100644 --- a/src/cmd/compile/internal/riscv64/ssa.go +++ b/src/cmd/compile/internal/riscv64/ssa.go @@ -417,7 +417,7 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) { p.To.Type = obj.TYPE_REG p.To.Reg = r case ssa.OpRISCV64FSQRTS, ssa.OpRISCV64FNEGS, ssa.OpRISCV64FABSD, ssa.OpRISCV64FSQRTD, ssa.OpRISCV64FNEGD, - ssa.OpRISCV64FMVSX, ssa.OpRISCV64FMVDX, + ssa.OpRISCV64FMVSX, ssa.OpRISCV64FMVXS, ssa.OpRISCV64FMVDX, ssa.OpRISCV64FMVXD, ssa.OpRISCV64FCVTSW, ssa.OpRISCV64FCVTSL, ssa.OpRISCV64FCVTWS, ssa.OpRISCV64FCVTLS, ssa.OpRISCV64FCVTDW, ssa.OpRISCV64FCVTDL, ssa.OpRISCV64FCVTWD, ssa.OpRISCV64FCVTLD, ssa.OpRISCV64FCVTDS, ssa.OpRISCV64FCVTSD, ssa.OpRISCV64NOT, ssa.OpRISCV64NEG, ssa.OpRISCV64NEGW, ssa.OpRISCV64CLZ, ssa.OpRISCV64CLZW, ssa.OpRISCV64CTZ, ssa.OpRISCV64CTZW, diff --git a/src/cmd/compile/internal/ssa/_gen/RISCV64.rules b/src/cmd/compile/internal/ssa/_gen/RISCV64.rules index a99a16adff..69bf1c7c9e 100644 --- a/src/cmd/compile/internal/ssa/_gen/RISCV64.rules +++ b/src/cmd/compile/internal/ssa/_gen/RISCV64.rules @@ -299,6 +299,11 @@ (base.Op != OpSB || !config.ctxt.Flag_dynlink) => (MOV(B|BU|H|HU|W|WU|D)load [off1+off2] {mergeSym(sym1,sym2)} base mem) +(FMOV(W|D)load [off1] {sym1} (MOVaddr [off2] {sym2} base) mem) && + is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) && + (base.Op != OpSB || !config.ctxt.Flag_dynlink) => + (FMOV(W|D)load [off1+off2] {mergeSym(sym1,sym2)} base mem) + (MOV(B|H|W|D)store [off1] {sym1} (MOVaddr [off2] {sym2} base) val mem) && is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_dynlink) => @@ -309,15 +314,26 @@ (base.Op != OpSB || !config.ctxt.Flag_dynlink) => (MOV(B|H|W|D)storezero [off1+off2] {mergeSym(sym1,sym2)} base mem) +(FMOV(W|D)store [off1] {sym1} (MOVaddr [off2] {sym2} base) val mem) && + is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) && + (base.Op != OpSB || !config.ctxt.Flag_dynlink) => + (FMOV(W|D)store [off1+off2] {mergeSym(sym1,sym2)} base val mem) + (MOV(B|BU|H|HU|W|WU|D)load [off1] {sym} (ADDI [off2] base) mem) && is32Bit(int64(off1)+off2) => (MOV(B|BU|H|HU|W|WU|D)load [off1+int32(off2)] {sym} base mem) +(FMOV(W|D)load [off1] {sym} (ADDI [off2] base) mem) && is32Bit(int64(off1)+off2) => + (FMOV(W|D)load [off1+int32(off2)] {sym} base mem) + (MOV(B|H|W|D)store [off1] {sym} (ADDI [off2] base) val mem) && is32Bit(int64(off1)+off2) => (MOV(B|H|W|D)store [off1+int32(off2)] {sym} base val mem) (MOV(B|H|W|D)storezero [off1] {sym} (ADDI [off2] base) mem) && is32Bit(int64(off1)+off2) => (MOV(B|H|W|D)storezero [off1+int32(off2)] {sym} base mem) +(FMOV(W|D)store [off1] {sym} (ADDI [off2] base) val mem) && is32Bit(int64(off1)+off2) => + (FMOV(W|D)store [off1+int32(off2)] {sym} base val mem) + // Similarly, fold ADDI into MOVaddr to avoid confusing live variable analysis // with OffPtr -> ADDI. (ADDI [c] (MOVaddr [d] {s} x)) && is32Bit(c+int64(d)) => (MOVaddr [int32(c)+d] {s} x) @@ -701,6 +717,13 @@ (MOVHUreg <t> x:(MOVHload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) => @x.Block (MOVHUload <t> [off] {sym} ptr mem) (MOVWUreg <t> x:(MOVWload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) => @x.Block (MOVWUload <t> [off] {sym} ptr mem) +// Replace load from same location as preceding store with copy. +(MOVDload [off] {sym} ptr1 (FMOVDstore [off] {sym} ptr2 x _)) && isSamePtr(ptr1, ptr2) => (FMVXD x) +(FMOVDload [off] {sym} ptr1 (MOVDstore [off] {sym} ptr2 x _)) && isSamePtr(ptr1, ptr2) => (FMVDX x) +(MOVWload [off] {sym} ptr1 (FMOVWstore [off] {sym} ptr2 x _)) && isSamePtr(ptr1, ptr2) => (FMVXS x) +(MOVWUload [off] {sym} ptr1 (FMOVWstore [off] {sym} ptr2 x _)) && isSamePtr(ptr1, ptr2) => (MOVWUreg (FMVXS x)) +(FMOVWload [off] {sym} ptr1 (MOVWstore [off] {sym} ptr2 x _)) && isSamePtr(ptr1, ptr2) => (FMVSX x) + // If a register move has only 1 use, just use the same register without emitting instruction // MOVnop does not emit an instruction, only for ensuring the type. (MOVDreg x) && x.Uses == 1 => (MOVDnop x) diff --git a/src/cmd/compile/internal/ssa/_gen/RISCV64Ops.go b/src/cmd/compile/internal/ssa/_gen/RISCV64Ops.go index c12bc47621..d468a00b0f 100644 --- a/src/cmd/compile/internal/ssa/_gen/RISCV64Ops.go +++ b/src/cmd/compile/internal/ssa/_gen/RISCV64Ops.go @@ -453,7 +453,8 @@ func init() { {name: "FNMSUBS", argLength: 3, reg: fp31, asm: "FNMSUBS", commutative: true, typ: "Float32"}, // -(arg0 * arg1) - arg2 {name: "FSQRTS", argLength: 1, reg: fp11, asm: "FSQRTS", typ: "Float32"}, // sqrt(arg0) {name: "FNEGS", argLength: 1, reg: fp11, asm: "FNEGS", typ: "Float32"}, // -arg0 - {name: "FMVSX", argLength: 1, reg: gpfp, asm: "FMVSX", typ: "Float32"}, // reinterpret arg0 as float + {name: "FMVSX", argLength: 1, reg: gpfp, asm: "FMVSX", typ: "Float32"}, // reinterpret arg0 as float32 + {name: "FMVXS", argLength: 1, reg: fpgp, asm: "FMVXS", typ: "Int32"}, // reinterpret arg0 as int32, sign extended to 64 bits {name: "FCVTSW", argLength: 1, reg: gpfp, asm: "FCVTSW", typ: "Float32"}, // float32(low 32 bits of arg0) {name: "FCVTSL", argLength: 1, reg: gpfp, asm: "FCVTSL", typ: "Float32"}, // float32(arg0) {name: "FCVTWS", argLength: 1, reg: fpgp, asm: "FCVTWS", typ: "Int32"}, // int32(arg0) @@ -480,7 +481,8 @@ func init() { {name: "FNEGD", argLength: 1, reg: fp11, asm: "FNEGD", typ: "Float64"}, // -arg0 {name: "FABSD", argLength: 1, reg: fp11, asm: "FABSD", typ: "Float64"}, // abs(arg0) {name: "FSGNJD", argLength: 2, reg: fp21, asm: "FSGNJD", typ: "Float64"}, // copy sign of arg1 to arg0 - {name: "FMVDX", argLength: 1, reg: gpfp, asm: "FMVDX", typ: "Float64"}, // reinterpret arg0 as float + {name: "FMVDX", argLength: 1, reg: gpfp, asm: "FMVDX", typ: "Float64"}, // reinterpret arg0 as float64 + {name: "FMVXD", argLength: 1, reg: fpgp, asm: "FMVXD", typ: "Int64"}, // reinterpret arg0 as int64 {name: "FCVTDW", argLength: 1, reg: gpfp, asm: "FCVTDW", typ: "Float64"}, // float64(low 32 bits of arg0) {name: "FCVTDL", argLength: 1, reg: gpfp, asm: "FCVTDL", typ: "Float64"}, // float64(arg0) {name: "FCVTWD", argLength: 1, reg: fpgp, asm: "FCVTWD", typ: "Int32"}, // int32(arg0) diff --git a/src/cmd/compile/internal/ssa/opGen.go b/src/cmd/compile/internal/ssa/opGen.go index b9c5b1f77c..60f5278d7b 100644 --- a/src/cmd/compile/internal/ssa/opGen.go +++ b/src/cmd/compile/internal/ssa/opGen.go @@ -2600,6 +2600,7 @@ const ( OpRISCV64FSQRTS OpRISCV64FNEGS OpRISCV64FMVSX + OpRISCV64FMVXS OpRISCV64FCVTSW OpRISCV64FCVTSL OpRISCV64FCVTWS @@ -2625,6 +2626,7 @@ const ( OpRISCV64FABSD OpRISCV64FSGNJD OpRISCV64FMVDX + OpRISCV64FMVXD OpRISCV64FCVTDW OpRISCV64FCVTDL OpRISCV64FCVTWD @@ -34986,6 +34988,19 @@ var opcodeTable = [...]opInfo{ }, }, { + name: "FMVXS", + argLen: 1, + asm: riscv.AFMVXS, + reg: regInfo{ + inputs: []inputInfo{ + {0, 9223372034707292160}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31 + }, + outputs: []outputInfo{ + {0, 1006632944}, // X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30 + }, + }, + }, + { name: "FCVTSW", argLen: 1, asm: riscv.AFCVTSW, @@ -35346,6 +35361,19 @@ var opcodeTable = [...]opInfo{ }, }, { + name: "FMVXD", + argLen: 1, + asm: riscv.AFMVXD, + reg: regInfo{ + inputs: []inputInfo{ + {0, 9223372034707292160}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31 + }, + outputs: []outputInfo{ + {0, 1006632944}, // X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30 + }, + }, + }, + { name: "FCVTDW", argLen: 1, asm: riscv.AFCVTDW, diff --git a/src/cmd/compile/internal/ssa/rewriteRISCV64.go b/src/cmd/compile/internal/ssa/rewriteRISCV64.go index bbdb817900..0dd952f512 100644 --- a/src/cmd/compile/internal/ssa/rewriteRISCV64.go +++ b/src/cmd/compile/internal/ssa/rewriteRISCV64.go @@ -517,6 +517,14 @@ func rewriteValueRISCV64(v *Value) bool { return rewriteValueRISCV64_OpRISCV64FMADDD(v) case OpRISCV64FMADDS: return rewriteValueRISCV64_OpRISCV64FMADDS(v) + case OpRISCV64FMOVDload: + return rewriteValueRISCV64_OpRISCV64FMOVDload(v) + case OpRISCV64FMOVDstore: + return rewriteValueRISCV64_OpRISCV64FMOVDstore(v) + case OpRISCV64FMOVWload: + return rewriteValueRISCV64_OpRISCV64FMOVWload(v) + case OpRISCV64FMOVWstore: + return rewriteValueRISCV64_OpRISCV64FMOVWstore(v) case OpRISCV64FMSUBD: return rewriteValueRISCV64_OpRISCV64FMSUBD(v) case OpRISCV64FMSUBS: @@ -3844,6 +3852,250 @@ func rewriteValueRISCV64_OpRISCV64FMADDS(v *Value) bool { } return false } +func rewriteValueRISCV64_OpRISCV64FMOVDload(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + config := b.Func.Config + // match: (FMOVDload [off1] {sym1} (MOVaddr [off2] {sym2} base) mem) + // cond: is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_dynlink) + // result: (FMOVDload [off1+off2] {mergeSym(sym1,sym2)} base mem) + for { + off1 := auxIntToInt32(v.AuxInt) + sym1 := auxToSym(v.Aux) + if v_0.Op != OpRISCV64MOVaddr { + break + } + off2 := auxIntToInt32(v_0.AuxInt) + sym2 := auxToSym(v_0.Aux) + base := v_0.Args[0] + mem := v_1 + if !(is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_dynlink)) { + break + } + v.reset(OpRISCV64FMOVDload) + v.AuxInt = int32ToAuxInt(off1 + off2) + v.Aux = symToAux(mergeSym(sym1, sym2)) + v.AddArg2(base, mem) + return true + } + // match: (FMOVDload [off1] {sym} (ADDI [off2] base) mem) + // cond: is32Bit(int64(off1)+off2) + // result: (FMOVDload [off1+int32(off2)] {sym} base mem) + for { + off1 := auxIntToInt32(v.AuxInt) + sym := auxToSym(v.Aux) + if v_0.Op != OpRISCV64ADDI { + break + } + off2 := auxIntToInt64(v_0.AuxInt) + base := v_0.Args[0] + mem := v_1 + if !(is32Bit(int64(off1) + off2)) { + break + } + v.reset(OpRISCV64FMOVDload) + v.AuxInt = int32ToAuxInt(off1 + int32(off2)) + v.Aux = symToAux(sym) + v.AddArg2(base, mem) + return true + } + // match: (FMOVDload [off] {sym} ptr1 (MOVDstore [off] {sym} ptr2 x _)) + // cond: isSamePtr(ptr1, ptr2) + // result: (FMVDX x) + for { + off := auxIntToInt32(v.AuxInt) + sym := auxToSym(v.Aux) + ptr1 := v_0 + if v_1.Op != OpRISCV64MOVDstore || auxIntToInt32(v_1.AuxInt) != off || auxToSym(v_1.Aux) != sym { + break + } + x := v_1.Args[1] + ptr2 := v_1.Args[0] + if !(isSamePtr(ptr1, ptr2)) { + break + } + v.reset(OpRISCV64FMVDX) + v.AddArg(x) + return true + } + return false +} +func rewriteValueRISCV64_OpRISCV64FMOVDstore(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + config := b.Func.Config + // match: (FMOVDstore [off1] {sym1} (MOVaddr [off2] {sym2} base) val mem) + // cond: is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_dynlink) + // result: (FMOVDstore [off1+off2] {mergeSym(sym1,sym2)} base val mem) + for { + off1 := auxIntToInt32(v.AuxInt) + sym1 := auxToSym(v.Aux) + if v_0.Op != OpRISCV64MOVaddr { + break + } + off2 := auxIntToInt32(v_0.AuxInt) + sym2 := auxToSym(v_0.Aux) + base := v_0.Args[0] + val := v_1 + mem := v_2 + if !(is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_dynlink)) { + break + } + v.reset(OpRISCV64FMOVDstore) + v.AuxInt = int32ToAuxInt(off1 + off2) + v.Aux = symToAux(mergeSym(sym1, sym2)) + v.AddArg3(base, val, mem) + return true + } + // match: (FMOVDstore [off1] {sym} (ADDI [off2] base) val mem) + // cond: is32Bit(int64(off1)+off2) + // result: (FMOVDstore [off1+int32(off2)] {sym} base val mem) + for { + off1 := auxIntToInt32(v.AuxInt) + sym := auxToSym(v.Aux) + if v_0.Op != OpRISCV64ADDI { + break + } + off2 := auxIntToInt64(v_0.AuxInt) + base := v_0.Args[0] + val := v_1 + mem := v_2 + if !(is32Bit(int64(off1) + off2)) { + break + } + v.reset(OpRISCV64FMOVDstore) + v.AuxInt = int32ToAuxInt(off1 + int32(off2)) + v.Aux = symToAux(sym) + v.AddArg3(base, val, mem) + return true + } + return false +} +func rewriteValueRISCV64_OpRISCV64FMOVWload(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + config := b.Func.Config + // match: (FMOVWload [off1] {sym1} (MOVaddr [off2] {sym2} base) mem) + // cond: is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_dynlink) + // result: (FMOVWload [off1+off2] {mergeSym(sym1,sym2)} base mem) + for { + off1 := auxIntToInt32(v.AuxInt) + sym1 := auxToSym(v.Aux) + if v_0.Op != OpRISCV64MOVaddr { + break + } + off2 := auxIntToInt32(v_0.AuxInt) + sym2 := auxToSym(v_0.Aux) + base := v_0.Args[0] + mem := v_1 + if !(is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_dynlink)) { + break + } + v.reset(OpRISCV64FMOVWload) + v.AuxInt = int32ToAuxInt(off1 + off2) + v.Aux = symToAux(mergeSym(sym1, sym2)) + v.AddArg2(base, mem) + return true + } + // match: (FMOVWload [off1] {sym} (ADDI [off2] base) mem) + // cond: is32Bit(int64(off1)+off2) + // result: (FMOVWload [off1+int32(off2)] {sym} base mem) + for { + off1 := auxIntToInt32(v.AuxInt) + sym := auxToSym(v.Aux) + if v_0.Op != OpRISCV64ADDI { + break + } + off2 := auxIntToInt64(v_0.AuxInt) + base := v_0.Args[0] + mem := v_1 + if !(is32Bit(int64(off1) + off2)) { + break + } + v.reset(OpRISCV64FMOVWload) + v.AuxInt = int32ToAuxInt(off1 + int32(off2)) + v.Aux = symToAux(sym) + v.AddArg2(base, mem) + return true + } + // match: (FMOVWload [off] {sym} ptr1 (MOVWstore [off] {sym} ptr2 x _)) + // cond: isSamePtr(ptr1, ptr2) + // result: (FMVSX x) + for { + off := auxIntToInt32(v.AuxInt) + sym := auxToSym(v.Aux) + ptr1 := v_0 + if v_1.Op != OpRISCV64MOVWstore || auxIntToInt32(v_1.AuxInt) != off || auxToSym(v_1.Aux) != sym { + break + } + x := v_1.Args[1] + ptr2 := v_1.Args[0] + if !(isSamePtr(ptr1, ptr2)) { + break + } + v.reset(OpRISCV64FMVSX) + v.AddArg(x) + return true + } + return false +} +func rewriteValueRISCV64_OpRISCV64FMOVWstore(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + config := b.Func.Config + // match: (FMOVWstore [off1] {sym1} (MOVaddr [off2] {sym2} base) val mem) + // cond: is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_dynlink) + // result: (FMOVWstore [off1+off2] {mergeSym(sym1,sym2)} base val mem) + for { + off1 := auxIntToInt32(v.AuxInt) + sym1 := auxToSym(v.Aux) + if v_0.Op != OpRISCV64MOVaddr { + break + } + off2 := auxIntToInt32(v_0.AuxInt) + sym2 := auxToSym(v_0.Aux) + base := v_0.Args[0] + val := v_1 + mem := v_2 + if !(is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_dynlink)) { + break + } + v.reset(OpRISCV64FMOVWstore) + v.AuxInt = int32ToAuxInt(off1 + off2) + v.Aux = symToAux(mergeSym(sym1, sym2)) + v.AddArg3(base, val, mem) + return true + } + // match: (FMOVWstore [off1] {sym} (ADDI [off2] base) val mem) + // cond: is32Bit(int64(off1)+off2) + // result: (FMOVWstore [off1+int32(off2)] {sym} base val mem) + for { + off1 := auxIntToInt32(v.AuxInt) + sym := auxToSym(v.Aux) + if v_0.Op != OpRISCV64ADDI { + break + } + off2 := auxIntToInt64(v_0.AuxInt) + base := v_0.Args[0] + val := v_1 + mem := v_2 + if !(is32Bit(int64(off1) + off2)) { + break + } + v.reset(OpRISCV64FMOVWstore) + v.AuxInt = int32ToAuxInt(off1 + int32(off2)) + v.Aux = symToAux(sym) + v.AddArg3(base, val, mem) + return true + } + return false +} func rewriteValueRISCV64_OpRISCV64FMSUBD(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] @@ -4977,6 +5229,25 @@ func rewriteValueRISCV64_OpRISCV64MOVDload(v *Value) bool { v.AddArg2(base, mem) return true } + // match: (MOVDload [off] {sym} ptr1 (FMOVDstore [off] {sym} ptr2 x _)) + // cond: isSamePtr(ptr1, ptr2) + // result: (FMVXD x) + for { + off := auxIntToInt32(v.AuxInt) + sym := auxToSym(v.Aux) + ptr1 := v_0 + if v_1.Op != OpRISCV64FMOVDstore || auxIntToInt32(v_1.AuxInt) != off || auxToSym(v_1.Aux) != sym { + break + } + x := v_1.Args[1] + ptr2 := v_1.Args[0] + if !(isSamePtr(ptr1, ptr2)) { + break + } + v.reset(OpRISCV64FMVXD) + v.AddArg(x) + return true + } return false } func rewriteValueRISCV64_OpRISCV64MOVDnop(v *Value) bool { @@ -5658,6 +5929,7 @@ func rewriteValueRISCV64_OpRISCV64MOVWUload(v *Value) bool { v_0 := v.Args[0] b := v.Block config := b.Func.Config + typ := &b.Func.Config.Types // match: (MOVWUload [off1] {sym1} (MOVaddr [off2] {sym2} base) mem) // cond: is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_dynlink) // result: (MOVWUload [off1+off2] {mergeSym(sym1,sym2)} base mem) @@ -5701,6 +5973,27 @@ func rewriteValueRISCV64_OpRISCV64MOVWUload(v *Value) bool { v.AddArg2(base, mem) return true } + // match: (MOVWUload [off] {sym} ptr1 (FMOVWstore [off] {sym} ptr2 x _)) + // cond: isSamePtr(ptr1, ptr2) + // result: (MOVWUreg (FMVXS x)) + for { + off := auxIntToInt32(v.AuxInt) + sym := auxToSym(v.Aux) + ptr1 := v_0 + if v_1.Op != OpRISCV64FMOVWstore || auxIntToInt32(v_1.AuxInt) != off || auxToSym(v_1.Aux) != sym { + break + } + x := v_1.Args[1] + ptr2 := v_1.Args[0] + if !(isSamePtr(ptr1, ptr2)) { + break + } + v.reset(OpRISCV64MOVWUreg) + v0 := b.NewValue0(v_1.Pos, OpRISCV64FMVXS, typ.Int32) + v0.AddArg(x) + v.AddArg(v0) + return true + } return false } func rewriteValueRISCV64_OpRISCV64MOVWUreg(v *Value) bool { @@ -5891,6 +6184,25 @@ func rewriteValueRISCV64_OpRISCV64MOVWload(v *Value) bool { v.AddArg2(base, mem) return true } + // match: (MOVWload [off] {sym} ptr1 (FMOVWstore [off] {sym} ptr2 x _)) + // cond: isSamePtr(ptr1, ptr2) + // result: (FMVXS x) + for { + off := auxIntToInt32(v.AuxInt) + sym := auxToSym(v.Aux) + ptr1 := v_0 + if v_1.Op != OpRISCV64FMOVWstore || auxIntToInt32(v_1.AuxInt) != off || auxToSym(v_1.Aux) != sym { + break + } + x := v_1.Args[1] + ptr2 := v_1.Args[0] + if !(isSamePtr(ptr1, ptr2)) { + break + } + v.reset(OpRISCV64FMVXS) + v.AddArg(x) + return true + } return false } func rewriteValueRISCV64_OpRISCV64MOVWreg(v *Value) bool { |
