diff options
| author | Ubuntu <markdryan@rivosinc.com> | 2023-09-22 13:14:25 +0000 |
|---|---|---|
| committer | Joel Sing <joel@sing.id.au> | 2023-10-30 14:47:06 +0000 |
| commit | 8fc043ccfacd358931e1f675777d0de584955e6f (patch) | |
| tree | 4a0ca1ddde3a28e9632229c15b3402de2f34c1cc /test/codegen | |
| parent | 1f4db9dbd6b743ffd1f3be350649ddebaad695c1 (diff) | |
| download | go-8fc043ccfacd358931e1f675777d0de584955e6f.tar.xz | |
cmd/compile: optimize right shifts of int32 on riscv64
The compiler is currently sign extending 32 bit signed integers to
64 bits before right shifting them using a 64 bit shift instruction.
There's no need to do this as RISC-V has instructions for right
shifting 32 bit signed values (sraw and sraiw) which sign extend
the result of the shift to 64 bits. Change the compiler so that
it uses sraw and sraiw for shifts of signed 32 bit integers reducing
in most cases the number of instructions needed to perform the shift.
Here are some examples of code sequences that are changed by this
patch:
int32(a) >> 2
before:
sll x5,x10,0x20
sra x10,x5,0x22
after:
sraw x10,x10,0x2
int32(v) >> int(s)
before:
sext.w x5,x10
sltiu x6,x11,64
add x6,x6,-1
or x6,x11,x6
sra x10,x5,x6
after:
sltiu x5,x11,32
add x5,x5,-1
or x5,x11,x5
sraw x10,x10,x5
int32(v) >> (int(s) & 31)
before:
sext.w x5,x10
and x6,x11,63
sra x10,x5,x6
after:
and x5,x11,31
sraw x10,x10,x5
int32(100) >> int(a)
before:
bltz x10,<target address calls runtime.panicshift>
sltiu x5,x10,64
add x5,x5,-1
or x5,x10,x5
li x6,100
sra x10,x6,x5
after:
bltz x10,<target address calls runtime.panicshift>
sltiu x5,x10,32
add x5,x5,-1
or x5,x10,x5
li x6,100
sraw x10,x6,x5
int32(v) >> (int(s) & 63)
before:
sext.w x5,x10
and x6,x11,63
sra x10,x5,x6
after:
and x5,x11,63
sltiu x6,x5,32
add x6,x6,-1
or x5,x5,x6
sraw x10,x10,x5
In most cases we eliminate one instruction. In the case where
we shift a int32 constant by a variable the number of instructions
generated is identical. A sra is simply replaced by a sraw. In the
unusual case where we shift right by a variable anded with a constant
> 31 but < 64, we generate two additional instructions. As this is
an unusual case we do not try to optimize for it.
Some improvements can be seen in some of the existing benchmarks,
notably in the utf8 package which performs right shifts of runes
which are signed 32 bit integers.
| utf8-old | utf8-new |
| sec/op | sec/op vs base |
EncodeASCIIRune-4 17.68n ± 0% 17.67n ± 0% ~ (p=0.312 n=10)
EncodeJapaneseRune-4 35.34n ± 0% 34.53n ± 1% -2.31% (p=0.000 n=10)
AppendASCIIRune-4 3.213n ± 0% 3.213n ± 0% ~ (p=0.318 n=10)
AppendJapaneseRune-4 36.14n ± 0% 35.35n ± 0% -2.19% (p=0.000 n=10)
DecodeASCIIRune-4 28.11n ± 0% 27.36n ± 0% -2.69% (p=0.000 n=10)
DecodeJapaneseRune-4 38.55n ± 0% 38.58n ± 0% ~ (p=0.612 n=10)
Change-Id: I60a91cbede9ce65597571c7b7dd9943eeb8d3cc2
Reviewed-on: https://go-review.googlesource.com/c/go/+/535115
Run-TryBot: Joel Sing <joel@sing.id.au>
TryBot-Result: Gopher Robot <gobot@golang.org>
Reviewed-by: Joel Sing <joel@sing.id.au>
Reviewed-by: Cherry Mui <cherryyz@google.com>
Reviewed-by: M Zhuo <mzh@golangcn.org>
Reviewed-by: David Chase <drchase@google.com>
Diffstat (limited to 'test/codegen')
| -rw-r--r-- | test/codegen/shift.go | 21 |
1 files changed, 13 insertions, 8 deletions
diff --git a/test/codegen/shift.go b/test/codegen/shift.go index bf8b633905..32cfaffae0 100644 --- a/test/codegen/shift.go +++ b/test/codegen/shift.go @@ -24,7 +24,7 @@ func rshConst64Ux64(v uint64) uint64 { func rshConst64x64(v int64) int64 { // ppc64x:"SRAD" - // riscv64:"SRAI",-"OR",-"SLTIU" + // riscv64:"SRAI\t",-"OR",-"SLTIU" return v >> uint64(33) } @@ -42,7 +42,7 @@ func rshConst32Ux64(v uint32) uint32 { func rshConst32x64(v int32) int32 { // ppc64x:"SRAW" - // riscv64:"SRAI",-"OR",-"SLTIU", -"MOVW" + // riscv64:"SRAIW",-"OR",-"SLTIU", -"MOVW" return v >> uint64(29) } @@ -60,7 +60,7 @@ func rshConst64Ux32(v uint64) uint64 { func rshConst64x32(v int64) int64 { // ppc64x:"SRAD" - // riscv64:"SRAI",-"OR",-"SLTIU" + // riscv64:"SRAI\t",-"OR",-"SLTIU" return v >> uint32(33) } @@ -87,7 +87,7 @@ func rshMask64Ux64(v uint64, s uint64) uint64 { func rshMask64x64(v int64, s uint64) int64 { // arm64:"ASR",-"AND",-"CSEL" // ppc64x:"RLDICL",-"ORN",-"ISEL" - // riscv64:"SRA",-"OR",-"SLTIU" + // riscv64:"SRA\t",-"OR",-"SLTIU" // s390x:-"RISBGZ",-"AND",-"LOCGR" return v >> (s & 63) } @@ -116,11 +116,16 @@ func rsh5Mask32Ux64(v uint32, s uint64) uint32 { func rshMask32x64(v int32, s uint64) int32 { // arm64:"ASR",-"AND" // ppc64x:"ISEL",-"ORN" - // riscv64:"SRA",-"OR",-"SLTIU" + // riscv64:"SRAW","OR","SLTIU" // s390x:-"RISBGZ",-"AND",-"LOCGR" return v >> (s & 63) } +func rsh5Mask32x64(v int32, s uint64) int32 { + // riscv64:"SRAW",-"OR",-"SLTIU" + return v >> (s & 31) +} + func lshMask64x32(v int64, s uint32) int64 { // arm64:"LSL",-"AND" // ppc64x:"RLDICL",-"ORN" @@ -140,7 +145,7 @@ func rshMask64Ux32(v uint64, s uint32) uint64 { func rshMask64x32(v int64, s uint32) int64 { // arm64:"ASR",-"AND",-"CSEL" // ppc64x:"RLDICL",-"ORN",-"ISEL" - // riscv64:"SRA",-"OR",-"SLTIU" + // riscv64:"SRA\t",-"OR",-"SLTIU" // s390x:-"RISBGZ",-"AND",-"LOCGR" return v >> (s & 63) } @@ -161,7 +166,7 @@ func rshMask64Ux32Ext(v uint64, s int32) uint64 { func rshMask64x32Ext(v int64, s int32) int64 { // ppc64x:"RLDICL",-"ORN",-"ISEL" - // riscv64:"SRA",-"OR",-"SLTIU" + // riscv64:"SRA\t",-"OR",-"SLTIU" // s390x:-"RISBGZ",-"AND",-"LOCGR" return v >> uint(s&63) } @@ -222,7 +227,7 @@ func rshGuarded64U(v uint64, s uint) uint64 { func rshGuarded64(v int64, s uint) int64 { if s < 64 { - // riscv64:"SRA",-"OR",-"SLTIU" + // riscv64:"SRA\t",-"OR",-"SLTIU" // s390x:-"RISBGZ",-"AND",-"LOCGR" // wasm:-"Select",-".*LtU" // arm64:"ASR",-"CSEL" |
