aboutsummaryrefslogtreecommitdiff
path: root/src/cmd/compile/internal/ssa
diff options
context:
space:
mode:
authorGuoqi Chen <chenguoqi@loongson.cn>2023-04-25 03:27:23 +0800
committerGopher Robot <gobot@golang.org>2023-10-20 00:01:44 +0000
commit3754ca0af299bd2e7d2fdcd5b8fbdb1aaaf0990e (patch)
tree4260799bae9d32edb1835f318500991e504c9288 /src/cmd/compile/internal/ssa
parentc729dc187aeebf10b062602e09727182d695ca52 (diff)
downloadgo-3754ca0af299bd2e7d2fdcd5b8fbdb1aaaf0990e.tar.xz
cmd/compile: improve the implementation of Lowered{Move,Zero} on linux/loong64
Like the CL 487295, when implementing Lowered{Move,Zero}, 8 is first subtracted from Rarg0 (parameter Ptr), and then the offset of 8 is added during subsequent operations on Rarg0. This operation is meaningless, so delete it. Change LoweredMove's Rarg0 register to R20, consistent with duffcopy. goos: linux goarch: loong64 pkg: runtime cpu: Loongson-3C5000 @ 2200.00MHz │ old.bench │ new.bench │ │ sec/op │ sec/op vs base │ Memmove/15 19.10n ± 0% 19.10n ± 0% ~ (p=0.483 n=15) MemmoveUnalignedDst/15 25.02n ± 0% 25.02n ± 0% ~ (p=0.741 n=15) MemmoveUnalignedDst/32 48.22n ± 0% 48.22n ± 0% ~ (p=1.000 n=15) ¹ MemmoveUnalignedDst/64 90.57n ± 0% 90.52n ± 0% ~ (p=0.212 n=15) MemmoveUnalignedDstOverlap/32 44.12n ± 0% 44.13n ± 0% +0.02% (p=0.000 n=15) MemmoveUnalignedDstOverlap/64 87.79n ± 0% 87.80n ± 0% +0.01% (p=0.002 n=15) MemmoveUnalignedSrc/0 3.639n ± 0% 3.639n ± 0% ~ (p=1.000 n=15) ¹ MemmoveUnalignedSrc/1 7.733n ± 0% 7.733n ± 0% ~ (p=1.000 n=15) MemmoveUnalignedSrc/2 9.097n ± 0% 9.097n ± 0% ~ (p=1.000 n=15) MemmoveUnalignedSrc/3 10.46n ± 0% 10.46n ± 0% ~ (p=1.000 n=15) ¹ MemmoveUnalignedSrc/4 11.83n ± 0% 11.83n ± 0% ~ (p=1.000 n=15) ¹ MemmoveUnalignedSrc/64 93.71n ± 0% 93.70n ± 0% ~ (p=0.128 n=15) Memclr/4096 699.1n ± 0% 699.1n ± 0% ~ (p=0.682 n=15) Memclr/65536 11.18µ ± 0% 11.18µ ± 0% -0.01% (p=0.000 n=15) Memclr/1M 175.2µ ± 0% 175.2µ ± 0% ~ (p=0.191 n=15) Memclr/4M 661.8µ ± 0% 662.0µ ± 0% ~ (p=0.486 n=15) MemclrUnaligned/4_5 19.39n ± 0% 20.47n ± 0% +5.57% (p=0.000 n=15) MemclrUnaligned/4_16 22.29n ± 0% 21.38n ± 0% -4.08% (p=0.000 n=15) MemclrUnaligned/4_64 30.58n ± 0% 29.81n ± 0% -2.52% (p=0.000 n=15) MemclrUnaligned/4_65536 11.19µ ± 0% 11.20µ ± 0% +0.02% (p=0.000 n=15) GoMemclr/5 12.73n ± 0% 12.73n ± 0% ~ (p=0.261 n=15) GoMemclr/16 10.01n ± 0% 10.00n ± 0% ~ (p=0.264 n=15) GoMemclr/256 50.94n ± 0% 50.94n ± 0% ~ (p=0.372 n=15) ClearFat15 14.95n ± 0% 15.01n ± 4% ~ (p=0.925 n=15) ClearFat1032 125.5n ± 0% 125.6n ± 0% +0.08% (p=0.000 n=15) CopyFat64 10.58n ± 0% 10.01n ± 0% -5.39% (p=0.000 n=15) CopyFat1040 244.3n ± 0% 155.6n ± 0% -36.31% (p=0.000 n=15) Issue18740/2byte 29.82µ ± 0% 29.82µ ± 0% ~ (p=0.648 n=30) Issue18740/4byte 18.18µ ± 0% 18.18µ ± 0% -0.02% (p=0.001 n=30) Issue18740/8byte 8.395µ ± 0% 8.395µ ± 0% ~ (p=0.401 n=30) geomean 154.5n 151.8n -1.70% ¹ all samples are equal Change-Id: Ia3f3c8b25e1e93c97ab72328651de78ca9dec016 Reviewed-on: https://go-review.googlesource.com/c/go/+/488515 Reviewed-by: Keith Randall <khr@golang.org> Reviewed-by: Bryan Mills <bcmills@google.com> Auto-Submit: Ian Lance Taylor <iant@golang.org> Reviewed-by: WANG Xuerui <git@xen0n.name> Reviewed-by: xiaodong liu <teaofmoli@gmail.com> Reviewed-by: Keith Randall <khr@google.com> Reviewed-by: Meidan Li <limeidan@loongson.cn> LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Diffstat (limited to 'src/cmd/compile/internal/ssa')
-rw-r--r--src/cmd/compile/internal/ssa/_gen/LOONG64Ops.go28
-rw-r--r--src/cmd/compile/internal/ssa/opGen.go6
2 files changed, 15 insertions, 19 deletions
diff --git a/src/cmd/compile/internal/ssa/_gen/LOONG64Ops.go b/src/cmd/compile/internal/ssa/_gen/LOONG64Ops.go
index ee887e7ede..3442fc8d7c 100644
--- a/src/cmd/compile/internal/ssa/_gen/LOONG64Ops.go
+++ b/src/cmd/compile/internal/ssa/_gen/LOONG64Ops.go
@@ -321,10 +321,9 @@ func init() {
// arg2 = mem
// auxint = alignment
// returns mem
- // SUBV $8, R19
- // MOVV R0, 8(R19)
- // ADDV $8, R19
- // BNE Rarg1, R19, -2(PC)
+ // MOVx R0, (R19)
+ // ADDV $sz, R19
+ // BGEU Rarg1, R19, -2(PC)
{
name: "LoweredZero",
aux: "Int64",
@@ -333,32 +332,31 @@ func init() {
inputs: []regMask{buildReg("R19"), gp},
clobbers: buildReg("R19"),
},
- clobberFlags: true,
+ typ: "Mem",
faultOnNilArg0: true,
},
// large or unaligned move
- // arg0 = address of dst memory (in R4, changed as side effect)
+ // arg0 = address of dst memory (in R20, changed as side effect)
// arg1 = address of src memory (in R19, changed as side effect)
// arg2 = address of the last element of src
// arg3 = mem
// auxint = alignment
// returns mem
- // SUBV $8, R19
- // MOVV 8(R19), Rtmp
- // MOVV Rtmp, (R4)
- // ADDV $8, R19
- // ADDV $8, R4
- // BNE Rarg2, R19, -4(PC)
+ // MOVx (R19), Rtmp
+ // MOVx Rtmp, (R20)
+ // ADDV $sz, R19
+ // ADDV $sz, R20
+ // BGEU Rarg2, R19, -4(PC)
{
name: "LoweredMove",
aux: "Int64",
argLength: 4,
reg: regInfo{
- inputs: []regMask{buildReg("R4"), buildReg("R19"), gp},
- clobbers: buildReg("R19 R4"),
+ inputs: []regMask{buildReg("R20"), buildReg("R19"), gp},
+ clobbers: buildReg("R19 R20"),
},
- clobberFlags: true,
+ typ: "Mem",
faultOnNilArg0: true,
faultOnNilArg1: true,
},
diff --git a/src/cmd/compile/internal/ssa/opGen.go b/src/cmd/compile/internal/ssa/opGen.go
index ded1bc648c..b2af30a37d 100644
--- a/src/cmd/compile/internal/ssa/opGen.go
+++ b/src/cmd/compile/internal/ssa/opGen.go
@@ -24565,7 +24565,6 @@ var opcodeTable = [...]opInfo{
name: "LoweredZero",
auxType: auxInt64,
argLen: 3,
- clobberFlags: true,
faultOnNilArg0: true,
reg: regInfo{
inputs: []inputInfo{
@@ -24579,16 +24578,15 @@ var opcodeTable = [...]opInfo{
name: "LoweredMove",
auxType: auxInt64,
argLen: 4,
- clobberFlags: true,
faultOnNilArg0: true,
faultOnNilArg1: true,
reg: regInfo{
inputs: []inputInfo{
- {0, 8}, // R4
+ {0, 524288}, // R20
{1, 262144}, // R19
{2, 1070596088}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R23 R24 R25 R26 R27 R28 R29 R31
},
- clobbers: 262152, // R4 R19
+ clobbers: 786432, // R19 R20
},
},
{