From 969f48a3a24032c3dd1ec351302b5b62407dfb88 Mon Sep 17 00:00:00 2001 From: Wayne Zuo Date: Fri, 29 Jul 2022 14:24:26 +0800 Subject: cmd/compile: intrinsify Add64 on riscv64 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit According to RISCV instruction set manual v2.2 Sec 2.4, we can implement overflowing check for unsigned addition cheaply using SLTU instructions. After this CL, the performance difference in crypto/elliptic benchmarks on linux/riscv64 are: name old time/op new time/op delta ScalarBaseMult/P256 1.93ms ± 1% 1.64ms ± 1% -14.96% (p=0.008 n=5+5) ScalarBaseMult/P224 1.80ms ± 2% 1.53ms ± 1% -14.89% (p=0.008 n=5+5) ScalarBaseMult/P384 6.15ms ± 2% 5.12ms ± 2% -16.73% (p=0.008 n=5+5) ScalarBaseMult/P521 25.9ms ± 1% 22.3ms ± 2% -13.78% (p=0.008 n=5+5) ScalarMult/P256 5.59ms ± 1% 4.49ms ± 2% -19.79% (p=0.008 n=5+5) ScalarMult/P224 5.42ms ± 1% 4.33ms ± 1% -20.01% (p=0.008 n=5+5) ScalarMult/P384 19.9ms ± 2% 16.3ms ± 1% -18.15% (p=0.008 n=5+5) ScalarMult/P521 97.3ms ± 1% 100.7ms ± 0% +3.48% (p=0.008 n=5+5) Change-Id: Ic4c82ced4b072a4a6575343fa9f29dd09b0cabc4 Reviewed-on: https://go-review.googlesource.com/c/go/+/420094 Reviewed-by: David Chase Reviewed-by: Cherry Mui Run-TryBot: Wayne Zuo Reviewed-by: Joel Sing TryBot-Result: Gopher Robot --- test/codegen/mathbits.go | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'test/codegen/mathbits.go') diff --git a/test/codegen/mathbits.go b/test/codegen/mathbits.go index a507d32843..f36916ad03 100644 --- a/test/codegen/mathbits.go +++ b/test/codegen/mathbits.go @@ -442,6 +442,7 @@ func Add(x, y, ci uint) (r, co uint) { // ppc64: "ADDC", "ADDE", "ADDZE" // ppc64le: "ADDC", "ADDE", "ADDZE" // s390x:"ADDE","ADDC\t[$]-1," + // riscv64: "ADD","SLTU" return bits.Add(x, y, ci) } @@ -451,6 +452,7 @@ func AddC(x, ci uint) (r, co uint) { // ppc64: "ADDC", "ADDE", "ADDZE" // ppc64le: "ADDC", "ADDE", "ADDZE" // s390x:"ADDE","ADDC\t[$]-1," + // riscv64: "ADD","SLTU" return bits.Add(x, 7, ci) } @@ -460,6 +462,7 @@ func AddZ(x, y uint) (r, co uint) { // ppc64: "ADDC", -"ADDE", "ADDZE" // ppc64le: "ADDC", -"ADDE", "ADDZE" // s390x:"ADDC",-"ADDC\t[$]-1," + // riscv64: "ADD","SLTU" return bits.Add(x, y, 0) } @@ -469,6 +472,7 @@ func AddR(x, y, ci uint) uint { // ppc64: "ADDC", "ADDE", -"ADDZE" // ppc64le: "ADDC", "ADDE", -"ADDZE" // s390x:"ADDE","ADDC\t[$]-1," + // riscv64: "ADD",-"SLTU" r, _ := bits.Add(x, y, ci) return r } @@ -489,6 +493,7 @@ func Add64(x, y, ci uint64) (r, co uint64) { // ppc64: "ADDC", "ADDE", "ADDZE" // ppc64le: "ADDC", "ADDE", "ADDZE" // s390x:"ADDE","ADDC\t[$]-1," + // riscv64: "ADD","SLTU" return bits.Add64(x, y, ci) } @@ -498,6 +503,7 @@ func Add64C(x, ci uint64) (r, co uint64) { // ppc64: "ADDC", "ADDE", "ADDZE" // ppc64le: "ADDC", "ADDE", "ADDZE" // s390x:"ADDE","ADDC\t[$]-1," + // riscv64: "ADD","SLTU" return bits.Add64(x, 7, ci) } @@ -507,6 +513,7 @@ func Add64Z(x, y uint64) (r, co uint64) { // ppc64: "ADDC", -"ADDE", "ADDZE" // ppc64le: "ADDC", -"ADDE", "ADDZE" // s390x:"ADDC",-"ADDC\t[$]-1," + // riscv64: "ADD","SLTU" return bits.Add64(x, y, 0) } @@ -516,6 +523,7 @@ func Add64R(x, y, ci uint64) uint64 { // ppc64: "ADDC", "ADDE", -"ADDZE" // ppc64le: "ADDC", "ADDE", -"ADDZE" // s390x:"ADDE","ADDC\t[$]-1," + // riscv64: "ADD",-"SLTU" r, _ := bits.Add64(x, y, ci) return r } -- cgit v1.3-5-g9baa