diff options
| author | Xiaolin Zhao <zhaoxiaolin@loongson.cn> | 2024-11-14 11:35:39 +0800 |
|---|---|---|
| committer | abner chenc <chenguoqi@loongson.cn> | 2025-03-10 17:55:10 -0700 |
| commit | 2a772a2fe7db5602a2932c63a0278ed45e8762cc (patch) | |
| tree | 601d35b885347b62aae54d0f34cde3f39a5a663d /test/codegen | |
| parent | 2abe5ceb1968edb882c8318af674c4133dd0bb75 (diff) | |
| download | go-2a772a2fe7db5602a2932c63a0278ed45e8762cc.tar.xz | |
cmd/compile: optimize shifts of int32 and uint32 on loong64
goos: linux
goarch: loong64
pkg: math/bits
cpu: Loongson-3A6000-HV @ 2500.00MHz
| bench.old | bench.new |
| sec/op | sec/op vs base |
LeadingZeros 1.100n ± 1% 1.101n ± 0% ~ (p=0.566 n=10)
LeadingZeros8 1.501n ± 0% 1.502n ± 0% +0.07% (p=0.000 n=10)
LeadingZeros16 1.501n ± 0% 1.502n ± 0% +0.07% (p=0.000 n=10)
LeadingZeros32 1.2010n ± 0% 0.9511n ± 0% -20.81% (p=0.000 n=10)
LeadingZeros64 1.104n ± 1% 1.119n ± 0% +1.40% (p=0.000 n=10)
TrailingZeros 0.8137n ± 0% 0.8086n ± 0% -0.63% (p=0.001 n=10)
TrailingZeros8 1.031n ± 1% 1.031n ± 1% ~ (p=0.956 n=10)
TrailingZeros16 0.8204n ± 1% 0.8114n ± 0% -1.11% (p=0.000 n=10)
TrailingZeros32 0.8145n ± 0% 0.8090n ± 0% -0.68% (p=0.000 n=10)
TrailingZeros64 0.8159n ± 0% 0.8089n ± 1% -0.86% (p=0.000 n=10)
OnesCount 0.8672n ± 0% 0.8677n ± 0% +0.06% (p=0.000 n=10)
OnesCount8 0.8005n ± 0% 0.8009n ± 0% +0.06% (p=0.000 n=10)
OnesCount16 0.9339n ± 0% 0.9344n ± 0% +0.05% (p=0.000 n=10)
OnesCount32 0.8672n ± 0% 0.8677n ± 0% +0.06% (p=0.000 n=10)
OnesCount64 1.201n ± 0% 1.201n ± 0% ~ (p=0.474 n=10)
RotateLeft 0.8005n ± 0% 0.8009n ± 0% +0.05% (p=0.000 n=10)
RotateLeft8 1.202n ± 0% 1.202n ± 0% ~ (p=0.210 n=10)
RotateLeft16 0.8050n ± 0% 0.8036n ± 0% -0.17% (p=0.002 n=10)
RotateLeft32 0.6674n ± 0% 0.6674n ± 0% ~ (p=1.000 n=10)
RotateLeft64 0.6673n ± 0% 0.6674n ± 0% ~ (p=0.072 n=10)
Reverse 0.4123n ± 0% 0.4067n ± 1% -1.37% (p=0.000 n=10)
Reverse8 0.8005n ± 0% 0.8009n ± 0% +0.05% (p=0.000 n=10)
Reverse16 0.8004n ± 0% 0.8009n ± 0% +0.06% (p=0.000 n=10)
Reverse32 0.8004n ± 0% 0.8009n ± 0% +0.06% (p=0.000 n=10)
Reverse64 0.8004n ± 0% 0.8009n ± 0% +0.06% (p=0.001 n=10)
ReverseBytes 0.4100n ± 1% 0.4057n ± 1% -1.06% (p=0.002 n=10)
ReverseBytes16 0.8004n ± 0% 0.8009n ± 0% +0.07% (p=0.000 n=10)
ReverseBytes32 0.8005n ± 0% 0.8009n ± 0% +0.05% (p=0.000 n=10)
ReverseBytes64 0.8005n ± 0% 0.8009n ± 0% +0.05% (p=0.000 n=10)
Add 1.201n ± 0% 1.201n ± 0% ~ (p=1.000 n=10)
Add32 1.201n ± 0% 1.201n ± 0% ~ (p=0.474 n=10)
Add64 1.201n ± 0% 1.201n ± 0% ~ (p=1.000 n=10)
Add64multiple 1.831n ± 0% 1.832n ± 0% ~ (p=1.000 n=10)
Sub 1.201n ± 0% 1.201n ± 0% ~ (p=1.000 n=10)
Sub32 1.601n ± 0% 1.602n ± 0% +0.06% (p=0.000 n=10)
Sub64 1.201n ± 0% 1.201n ± 0% ~ (p=0.474 n=10)
Sub64multiple 2.400n ± 0% 2.402n ± 0% +0.10% (p=0.000 n=10)
Mul 0.8005n ± 0% 0.8009n ± 0% +0.05% (p=0.000 n=10)
Mul32 0.8005n ± 0% 0.8009n ± 0% +0.05% (p=0.000 n=10)
Mul64 0.8004n ± 0% 0.8008n ± 0% +0.05% (p=0.000 n=10)
Div 9.107n ± 0% 9.083n ± 0% ~ (p=0.255 n=10)
Div32 4.009n ± 0% 4.011n ± 0% +0.05% (p=0.000 n=10)
Div64 9.705n ± 0% 9.711n ± 0% +0.06% (p=0.000 n=10)
geomean 1.089n 1.083n -0.62%
goos: linux
goarch: loong64
pkg: math/bits
cpu: Loongson-3A5000 @ 2500.00MHz
| bench.old | bench.new |
| sec/op | sec/op vs base |
LeadingZeros 1.352n ± 0% 1.341n ± 4% -0.81% (p=0.024 n=10)
LeadingZeros8 1.766n ± 0% 1.781n ± 0% +0.88% (p=0.000 n=10)
LeadingZeros16 1.766n ± 0% 1.782n ± 0% +0.88% (p=0.000 n=10)
LeadingZeros32 1.536n ± 0% 1.341n ± 1% -12.73% (p=0.000 n=10)
LeadingZeros64 1.351n ± 1% 1.338n ± 0% -0.96% (p=0.000 n=10)
TrailingZeros 0.9037n ± 0% 0.9025n ± 0% -0.12% (p=0.020 n=10)
TrailingZeros8 1.087n ± 3% 1.056n ± 0% ~ (p=0.060 n=10)
TrailingZeros16 1.101n ± 0% 1.101n ± 0% ~ (p=0.211 n=10)
TrailingZeros32 0.9040n ± 0% 0.9024n ± 1% -0.18% (p=0.017 n=10)
TrailingZeros64 0.9043n ± 0% 0.9028n ± 1% ~ (p=0.118 n=10)
OnesCount 1.503n ± 2% 1.482n ± 1% -1.43% (p=0.001 n=10)
OnesCount8 1.207n ± 0% 1.206n ± 0% -0.12% (p=0.000 n=10)
OnesCount16 1.501n ± 0% 1.534n ± 0% +2.13% (p=0.000 n=10)
OnesCount32 1.483n ± 1% 1.531n ± 1% +3.27% (p=0.000 n=10)
OnesCount64 1.301n ± 0% 1.302n ± 0% +0.08% (p=0.000 n=10)
RotateLeft 0.8136n ± 4% 0.8083n ± 0% -0.66% (p=0.002 n=10)
RotateLeft8 1.311n ± 0% 1.310n ± 0% ~ (p=0.786 n=10)
RotateLeft16 1.165n ± 0% 1.149n ± 0% -1.33% (p=0.001 n=10)
RotateLeft32 0.8138n ± 1% 0.8093n ± 0% -0.57% (p=0.017 n=10)
RotateLeft64 0.8149n ± 1% 0.8088n ± 0% -0.74% (p=0.000 n=10)
Reverse 0.5195n ± 1% 0.5109n ± 0% -1.67% (p=0.000 n=10)
Reverse8 0.8007n ± 0% 0.8010n ± 0% +0.04% (p=0.000 n=10)
Reverse16 0.8007n ± 0% 0.8010n ± 0% +0.04% (p=0.000 n=10)
Reverse32 0.8007n ± 0% 0.8010n ± 0% +0.04% (p=0.012 n=10)
Reverse64 0.8007n ± 0% 0.8010n ± 0% +0.04% (p=0.010 n=10)
ReverseBytes 0.5120n ± 1% 0.5122n ± 2% ~ (p=0.306 n=10)
ReverseBytes16 0.8007n ± 0% 0.8010n ± 0% +0.04% (p=0.000 n=10)
ReverseBytes32 0.8007n ± 0% 0.8010n ± 0% +0.04% (p=0.000 n=10)
ReverseBytes64 0.8007n ± 0% 0.8010n ± 0% +0.04% (p=0.000 n=10)
Add 1.201n ± 0% 1.201n ± 4% ~ (p=0.334 n=10)
Add32 1.201n ± 0% 1.201n ± 0% ~ (p=0.563 n=10)
Add64 1.201n ± 0% 1.201n ± 1% ~ (p=0.652 n=10)
Add64multiple 1.909n ± 0% 1.902n ± 0% ~ (p=0.126 n=10)
Sub 1.201n ± 0% 1.201n ± 0% ~ (p=1.000 n=10)
Sub32 1.655n ± 0% 1.654n ± 0% ~ (p=0.589 n=10)
Sub64 1.201n ± 0% 1.201n ± 0% ~ (p=1.000 n=10)
Sub64multiple 2.150n ± 0% 2.180n ± 4% +1.37% (p=0.000 n=10)
Mul 0.9341n ± 0% 0.9345n ± 0% +0.04% (p=0.011 n=10)
Mul32 1.053n ± 0% 1.030n ± 0% -2.23% (p=0.000 n=10)
Mul64 0.9341n ± 0% 0.9345n ± 0% +0.04% (p=0.018 n=10)
Div 11.59n ± 0% 11.57n ± 1% ~ (p=0.091 n=10)
Div32 4.337n ± 0% 4.337n ± 1% ~ (p=0.783 n=10)
Div64 12.81n ± 0% 12.76n ± 0% -0.39% (p=0.001 n=10)
geomean 1.257n 1.252n -0.46%
Change-Id: I9e93ea49736760c19dc6b6463d2aa95878121b7b
Reviewed-on: https://go-review.googlesource.com/c/go/+/627855
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: David Chase <drchase@google.com>
Reviewed-by: abner chenc <chenguoqi@loongson.cn>
Reviewed-by: Meidan Li <limeidan@loongson.cn>
Reviewed-by: Junyang Shao <shaojunyang@google.com>
Diffstat (limited to 'test/codegen')
| -rw-r--r-- | test/codegen/shift.go | 15 |
1 files changed, 15 insertions, 0 deletions
diff --git a/test/codegen/shift.go b/test/codegen/shift.go index 52efefb0ed..8254e974df 100644 --- a/test/codegen/shift.go +++ b/test/codegen/shift.go @@ -11,49 +11,58 @@ package codegen // ------------------ // func lshConst64x64(v int64) int64 { + // loong64:"SLLV" // ppc64x:"SLD" // riscv64:"SLLI",-"AND",-"SLTIU" return v << uint64(33) } func rshConst64Ux64(v uint64) uint64 { + // loong64:"SRLV" // ppc64x:"SRD" // riscv64:"SRLI\t",-"AND",-"SLTIU" return v >> uint64(33) } func rshConst64Ux64Overflow32(v uint32) uint64 { + // loong64:"MOVV\t\\$0,",-"SRL\t" // riscv64:"MOV\t\\$0,",-"SRL" return uint64(v) >> 32 } func rshConst64Ux64Overflow16(v uint16) uint64 { + // loong64:"MOVV\t\\$0,",-"SRLV" // riscv64:"MOV\t\\$0,",-"SRL" return uint64(v) >> 16 } func rshConst64Ux64Overflow8(v uint8) uint64 { + // loong64:"MOVV\t\\$0,",-"SRLV" // riscv64:"MOV\t\\$0,",-"SRL" return uint64(v) >> 8 } func rshConst64x64(v int64) int64 { + // loong64:"SRAV" // ppc64x:"SRAD" // riscv64:"SRAI\t",-"OR",-"SLTIU" return v >> uint64(33) } func rshConst64x64Overflow32(v int32) int64 { + // loong64:"SRA\t\\$31" // riscv64:"SRAIW",-"SLLI",-"SRAI\t" return int64(v) >> 32 } func rshConst64x64Overflow16(v int16) int64 { + // loong64:"SLLV\t\\$48","SRAV\t\\$63" // riscv64:"SLLI","SRAI",-"SRAIW" return int64(v) >> 16 } func rshConst64x64Overflow8(v int8) int64 { + // loong64:"SLLV\t\\$56","SRAV\t\\$63" // riscv64:"SLLI","SRAI",-"SRAIW" return int64(v) >> 8 } @@ -69,36 +78,42 @@ func lshConst64x1(v int64) int64 { } func lshConst32x64(v int32) int32 { + // loong64:"SLL\t" // ppc64x:"SLW" // riscv64:"SLLI",-"AND",-"SLTIU", -"MOVW" return v << uint64(29) } func rshConst32Ux64(v uint32) uint32 { + // loong64:"SRL\t" // ppc64x:"SRW" // riscv64:"SRLIW",-"AND",-"SLTIU", -"MOVW" return v >> uint64(29) } func rshConst32x64(v int32) int32 { + // loong64:"SRA\t" // ppc64x:"SRAW" // riscv64:"SRAIW",-"OR",-"SLTIU", -"MOVW" return v >> uint64(29) } func lshConst64x32(v int64) int64 { + // loong64:"SLLV" // ppc64x:"SLD" // riscv64:"SLLI",-"AND",-"SLTIU" return v << uint32(33) } func rshConst64Ux32(v uint64) uint64 { + // loong64:"SRLV" // ppc64x:"SRD" // riscv64:"SRLI\t",-"AND",-"SLTIU" return v >> uint32(33) } func rshConst64x32(v int64) int64 { + // loong64:"SRAV" // ppc64x:"SRAD" // riscv64:"SRAI\t",-"OR",-"SLTIU" return v >> uint32(33) |
