diff options
| author | Wayne Zuo <wdvxdr@golangcn.org> | 2022-07-29 14:24:26 +0800 |
|---|---|---|
| committer | Joel Sing <joel@sing.id.au> | 2022-08-27 05:43:32 +0000 |
| commit | 969f48a3a24032c3dd1ec351302b5b62407dfb88 (patch) | |
| tree | 7368eaba5b47e4600cb14e0b8940ca43ebdc118c /test/codegen | |
| parent | a2d2e6e7cb12c57cd8f5af64909882bab1dbca19 (diff) | |
| download | go-969f48a3a24032c3dd1ec351302b5b62407dfb88.tar.xz | |
cmd/compile: intrinsify Add64 on riscv64
According to RISCV instruction set manual v2.2 Sec 2.4, we can
implement overflowing check for unsigned addition cheaply using
SLTU instructions.
After this CL, the performance difference in crypto/elliptic
benchmarks on linux/riscv64 are:
name old time/op new time/op delta
ScalarBaseMult/P256 1.93ms ± 1% 1.64ms ± 1% -14.96% (p=0.008 n=5+5)
ScalarBaseMult/P224 1.80ms ± 2% 1.53ms ± 1% -14.89% (p=0.008 n=5+5)
ScalarBaseMult/P384 6.15ms ± 2% 5.12ms ± 2% -16.73% (p=0.008 n=5+5)
ScalarBaseMult/P521 25.9ms ± 1% 22.3ms ± 2% -13.78% (p=0.008 n=5+5)
ScalarMult/P256 5.59ms ± 1% 4.49ms ± 2% -19.79% (p=0.008 n=5+5)
ScalarMult/P224 5.42ms ± 1% 4.33ms ± 1% -20.01% (p=0.008 n=5+5)
ScalarMult/P384 19.9ms ± 2% 16.3ms ± 1% -18.15% (p=0.008 n=5+5)
ScalarMult/P521 97.3ms ± 1% 100.7ms ± 0% +3.48% (p=0.008 n=5+5)
Change-Id: Ic4c82ced4b072a4a6575343fa9f29dd09b0cabc4
Reviewed-on: https://go-review.googlesource.com/c/go/+/420094
Reviewed-by: David Chase <drchase@google.com>
Reviewed-by: Cherry Mui <cherryyz@google.com>
Run-TryBot: Wayne Zuo <wdvxdr@golangcn.org>
Reviewed-by: Joel Sing <joel@sing.id.au>
TryBot-Result: Gopher Robot <gobot@golang.org>
Diffstat (limited to 'test/codegen')
| -rw-r--r-- | test/codegen/mathbits.go | 8 |
1 files changed, 8 insertions, 0 deletions
diff --git a/test/codegen/mathbits.go b/test/codegen/mathbits.go index a507d32843..f36916ad03 100644 --- a/test/codegen/mathbits.go +++ b/test/codegen/mathbits.go @@ -442,6 +442,7 @@ func Add(x, y, ci uint) (r, co uint) { // ppc64: "ADDC", "ADDE", "ADDZE" // ppc64le: "ADDC", "ADDE", "ADDZE" // s390x:"ADDE","ADDC\t[$]-1," + // riscv64: "ADD","SLTU" return bits.Add(x, y, ci) } @@ -451,6 +452,7 @@ func AddC(x, ci uint) (r, co uint) { // ppc64: "ADDC", "ADDE", "ADDZE" // ppc64le: "ADDC", "ADDE", "ADDZE" // s390x:"ADDE","ADDC\t[$]-1," + // riscv64: "ADD","SLTU" return bits.Add(x, 7, ci) } @@ -460,6 +462,7 @@ func AddZ(x, y uint) (r, co uint) { // ppc64: "ADDC", -"ADDE", "ADDZE" // ppc64le: "ADDC", -"ADDE", "ADDZE" // s390x:"ADDC",-"ADDC\t[$]-1," + // riscv64: "ADD","SLTU" return bits.Add(x, y, 0) } @@ -469,6 +472,7 @@ func AddR(x, y, ci uint) uint { // ppc64: "ADDC", "ADDE", -"ADDZE" // ppc64le: "ADDC", "ADDE", -"ADDZE" // s390x:"ADDE","ADDC\t[$]-1," + // riscv64: "ADD",-"SLTU" r, _ := bits.Add(x, y, ci) return r } @@ -489,6 +493,7 @@ func Add64(x, y, ci uint64) (r, co uint64) { // ppc64: "ADDC", "ADDE", "ADDZE" // ppc64le: "ADDC", "ADDE", "ADDZE" // s390x:"ADDE","ADDC\t[$]-1," + // riscv64: "ADD","SLTU" return bits.Add64(x, y, ci) } @@ -498,6 +503,7 @@ func Add64C(x, ci uint64) (r, co uint64) { // ppc64: "ADDC", "ADDE", "ADDZE" // ppc64le: "ADDC", "ADDE", "ADDZE" // s390x:"ADDE","ADDC\t[$]-1," + // riscv64: "ADD","SLTU" return bits.Add64(x, 7, ci) } @@ -507,6 +513,7 @@ func Add64Z(x, y uint64) (r, co uint64) { // ppc64: "ADDC", -"ADDE", "ADDZE" // ppc64le: "ADDC", -"ADDE", "ADDZE" // s390x:"ADDC",-"ADDC\t[$]-1," + // riscv64: "ADD","SLTU" return bits.Add64(x, y, 0) } @@ -516,6 +523,7 @@ func Add64R(x, y, ci uint64) uint64 { // ppc64: "ADDC", "ADDE", -"ADDZE" // ppc64le: "ADDC", "ADDE", -"ADDZE" // s390x:"ADDE","ADDC\t[$]-1," + // riscv64: "ADD",-"SLTU" r, _ := bits.Add64(x, y, ci) return r } |
