aboutsummaryrefslogtreecommitdiff
path: root/test/codegen/mathbits.go
diff options
context:
space:
mode:
authorPaul E. Murphy <murp@ibm.com>2021-06-08 13:16:01 -0500
committerPaul Murphy <murp@ibm.com>2022-05-10 20:03:53 +0000
commit0c43878baa035db39d9bbf84ce8721cd8a97c78a (patch)
tree27bc4f48cd69efdeb8aeb3510befd583dbf9801e /test/codegen/mathbits.go
parent526de61c67add8400d73b28bbed3f3680586c472 (diff)
downloadgo-0c43878baa035db39d9bbf84ce8721cd8a97c78a.tar.xz
cmd/compile: lower Add64/Sub64 into ssa on PPC64
math/bits.Add64 and math/bits.Sub64 now lower and optimize directly in SSA form. The optimization of carry chains focuses around eliding XER<->GPR transfers of the CA bit when used exclusively as an input to a single carry operations, or when the CA value is known. This also adds support for handling XER spills in the assembler which could happen if carry chains contain inter-dependencies on each other (which seems very unlikely with practical usage), or a clobber happens (SRAW/SRAD/SUBFC operations clobber CA). With PPC64 Add64/Sub64 lowering into SSA and this patch, the net performance difference in crypto/elliptic benchmarks on P9/ppc64le are: name old time/op new time/op delta ScalarBaseMult/P256 46.3µs ± 0% 46.9µs ± 0% +1.34% ScalarBaseMult/P224 356µs ± 0% 209µs ± 0% -41.14% ScalarBaseMult/P384 1.20ms ± 0% 0.57ms ± 0% -52.14% ScalarBaseMult/P521 3.38ms ± 0% 1.44ms ± 0% -57.27% ScalarMult/P256 199µs ± 0% 199µs ± 0% -0.17% ScalarMult/P224 357µs ± 0% 212µs ± 0% -40.56% ScalarMult/P384 1.20ms ± 0% 0.58ms ± 0% -51.86% ScalarMult/P521 3.37ms ± 0% 1.44ms ± 0% -57.32% MarshalUnmarshal/P256/Uncompressed 2.59µs ± 0% 2.52µs ± 0% -2.63% MarshalUnmarshal/P256/Compressed 2.58µs ± 0% 2.52µs ± 0% -2.06% MarshalUnmarshal/P224/Uncompressed 1.54µs ± 0% 1.40µs ± 0% -9.42% MarshalUnmarshal/P224/Compressed 1.54µs ± 0% 1.39µs ± 0% -9.87% MarshalUnmarshal/P384/Uncompressed 2.40µs ± 0% 1.80µs ± 0% -24.93% MarshalUnmarshal/P384/Compressed 2.35µs ± 0% 1.81µs ± 0% -23.03% MarshalUnmarshal/P521/Uncompressed 3.79µs ± 0% 2.58µs ± 0% -31.81% MarshalUnmarshal/P521/Compressed 3.80µs ± 0% 2.60µs ± 0% -31.67% Note, P256 uses an asm implementation, thus, little variation is expected. Change-Id: I88a24f6bf0f4f285c649e40243b1ab69cc452b71 Reviewed-on: https://go-review.googlesource.com/c/go/+/346870 Reviewed-by: Lynn Boger <laboger@linux.vnet.ibm.com> Reviewed-by: Dmitri Shuralyov <dmitshur@google.com> Run-TryBot: Paul Murphy <murp@ibm.com> TryBot-Result: Gopher Robot <gobot@golang.org> Reviewed-by: Ian Lance Taylor <iant@google.com>
Diffstat (limited to 'test/codegen/mathbits.go')
-rw-r--r--test/codegen/mathbits.go56
1 files changed, 50 insertions, 6 deletions
diff --git a/test/codegen/mathbits.go b/test/codegen/mathbits.go
index 58d57b3523..1ddb5c75cc 100644
--- a/test/codegen/mathbits.go
+++ b/test/codegen/mathbits.go
@@ -423,6 +423,8 @@ func IterateBits8(n uint8) int {
func Add(x, y, ci uint) (r, co uint) {
// arm64:"ADDS","ADCS","ADC",-"ADD\t",-"CMP"
// amd64:"NEGL","ADCQ","SBBQ","NEGQ"
+ // ppc64: "ADDC", "ADDE", "ADDZE"
+ // ppc64le: "ADDC", "ADDE", "ADDZE"
// s390x:"ADDE","ADDC\t[$]-1,"
return bits.Add(x, y, ci)
}
@@ -430,6 +432,8 @@ func Add(x, y, ci uint) (r, co uint) {
func AddC(x, ci uint) (r, co uint) {
// arm64:"ADDS","ADCS","ADC",-"ADD\t",-"CMP"
// amd64:"NEGL","ADCQ","SBBQ","NEGQ"
+ // ppc64: "ADDC", "ADDE", "ADDZE"
+ // ppc64le: "ADDC", "ADDE", "ADDZE"
// s390x:"ADDE","ADDC\t[$]-1,"
return bits.Add(x, 7, ci)
}
@@ -437,6 +441,8 @@ func AddC(x, ci uint) (r, co uint) {
func AddZ(x, y uint) (r, co uint) {
// arm64:"ADDS","ADC",-"ADCS",-"ADD\t",-"CMP"
// amd64:"ADDQ","SBBQ","NEGQ",-"NEGL",-"ADCQ"
+ // ppc64: "ADDC", -"ADDE", "ADDZE"
+ // ppc64le: "ADDC", -"ADDE", "ADDZE"
// s390x:"ADDC",-"ADDC\t[$]-1,"
return bits.Add(x, y, 0)
}
@@ -444,6 +450,8 @@ func AddZ(x, y uint) (r, co uint) {
func AddR(x, y, ci uint) uint {
// arm64:"ADDS","ADCS",-"ADD\t",-"CMP"
// amd64:"NEGL","ADCQ",-"SBBQ",-"NEGQ"
+ // ppc64: "ADDC", "ADDE", -"ADDZE"
+ // ppc64le: "ADDC", "ADDE", -"ADDZE"
// s390x:"ADDE","ADDC\t[$]-1,"
r, _ := bits.Add(x, y, ci)
return r
@@ -480,8 +488,8 @@ func Add64C(x, ci uint64) (r, co uint64) {
func Add64Z(x, y uint64) (r, co uint64) {
// arm64:"ADDS","ADC",-"ADCS",-"ADD\t",-"CMP"
// amd64:"ADDQ","SBBQ","NEGQ",-"NEGL",-"ADCQ"
- // ppc64: "ADDC", "ADDE", "ADDZE"
- // ppc64le: "ADDC", "ADDE", "ADDZE"
+ // ppc64: "ADDC", -"ADDE", "ADDZE"
+ // ppc64le: "ADDC", -"ADDE", "ADDZE"
// s390x:"ADDC",-"ADDC\t[$]-1,"
return bits.Add64(x, y, 0)
}
@@ -489,8 +497,8 @@ func Add64Z(x, y uint64) (r, co uint64) {
func Add64R(x, y, ci uint64) uint64 {
// arm64:"ADDS","ADCS",-"ADD\t",-"CMP"
// amd64:"NEGL","ADCQ",-"SBBQ",-"NEGQ"
- // ppc64: "ADDC", "ADDE", "ADDZE"
- // ppc64le: "ADDC", "ADDE", "ADDZE"
+ // ppc64: "ADDC", "ADDE", -"ADDZE"
+ // ppc64le: "ADDC", "ADDE", -"ADDZE"
// s390x:"ADDE","ADDC\t[$]-1,"
r, _ := bits.Add64(x, y, ci)
return r
@@ -500,13 +508,22 @@ func Add64M(p, q, r *[3]uint64) {
r[0], c = bits.Add64(p[0], q[0], c)
// arm64:"ADCS",-"ADD\t",-"CMP"
// amd64:"ADCQ",-"NEGL",-"SBBQ",-"NEGQ"
- // ppc64: "ADDC", "ADDE", "ADDZE"
- // ppc64le: "ADDC", "ADDE", "ADDZE"
+ // ppc64: -"ADDC", "ADDE", -"ADDZE"
+ // ppc64le: -"ADDC", "ADDE", -"ADDZE"
// s390x:"ADDE",-"ADDC\t[$]-1,"
r[1], c = bits.Add64(p[1], q[1], c)
r[2], c = bits.Add64(p[2], q[2], c)
}
+func Add64MSaveC(p, q, r, c *[2]uint64) {
+ // ppc64: "ADDC\tR", "ADDZE"
+ // ppc64le: "ADDC\tR", "ADDZE"
+ r[0], c[0] = bits.Add64(p[0], q[0], 0)
+ // ppc64: "ADDC\t[$]-1", "ADDE", "ADDZE"
+ // ppc64le: "ADDC\t[$]-1", "ADDE", "ADDZE"
+ r[1], c[1] = bits.Add64(p[1], q[1], c[0])
+}
+
func Add64PanicOnOverflowEQ(a, b uint64) uint64 {
r, c := bits.Add64(a, b, 0)
// s390x:"BRC\t[$]3,",-"ADDE"
@@ -577,6 +594,8 @@ func Add64MPanicOnOverflowGT(a, b [2]uint64) [2]uint64 {
func Sub(x, y, ci uint) (r, co uint) {
// amd64:"NEGL","SBBQ","NEGQ"
// arm64:"NEGS","SBCS","NGC","NEG",-"ADD",-"SUB",-"CMP"
+ // ppc64:"SUBC", "SUBE", "SUBZE", "NEG"
+ // ppc64le:"SUBC", "SUBE", "SUBZE", "NEG"
// s390x:"SUBE"
return bits.Sub(x, y, ci)
}
@@ -584,6 +603,8 @@ func Sub(x, y, ci uint) (r, co uint) {
func SubC(x, ci uint) (r, co uint) {
// amd64:"NEGL","SBBQ","NEGQ"
// arm64:"NEGS","SBCS","NGC","NEG",-"ADD",-"SUB",-"CMP"
+ // ppc64:"SUBC", "SUBE", "SUBZE", "NEG"
+ // ppc64le:"SUBC", "SUBE", "SUBZE", "NEG"
// s390x:"SUBE"
return bits.Sub(x, 7, ci)
}
@@ -591,6 +612,8 @@ func SubC(x, ci uint) (r, co uint) {
func SubZ(x, y uint) (r, co uint) {
// amd64:"SUBQ","SBBQ","NEGQ",-"NEGL"
// arm64:"SUBS","NGC","NEG",-"SBCS",-"ADD",-"SUB\t",-"CMP"
+ // ppc64:"SUBC", -"SUBE", "SUBZE", "NEG"
+ // ppc64le:"SUBC", -"SUBE", "SUBZE", "NEG"
// s390x:"SUBC"
return bits.Sub(x, y, 0)
}
@@ -598,6 +621,8 @@ func SubZ(x, y uint) (r, co uint) {
func SubR(x, y, ci uint) uint {
// amd64:"NEGL","SBBQ",-"NEGQ"
// arm64:"NEGS","SBCS",-"NGC",-"NEG\t",-"ADD",-"SUB",-"CMP"
+ // ppc64:"SUBC", "SUBE", -"SUBZE", -"NEG"
+ // ppc64le:"SUBC", "SUBE", -"SUBZE", -"NEG"
// s390x:"SUBE"
r, _ := bits.Sub(x, y, ci)
return r
@@ -607,6 +632,8 @@ func SubM(p, q, r *[3]uint) {
r[0], c = bits.Sub(p[0], q[0], c)
// amd64:"SBBQ",-"NEGL",-"NEGQ"
// arm64:"SBCS",-"NEGS",-"NGC",-"NEG",-"ADD",-"SUB",-"CMP"
+ // ppc64:-"SUBC", "SUBE", -"SUBZE", -"NEG"
+ // ppc64le:-"SUBC", "SUBE", -"SUBZE", -"NEG"
// s390x:"SUBE"
r[1], c = bits.Sub(p[1], q[1], c)
r[2], c = bits.Sub(p[2], q[2], c)
@@ -615,6 +642,8 @@ func SubM(p, q, r *[3]uint) {
func Sub64(x, y, ci uint64) (r, co uint64) {
// amd64:"NEGL","SBBQ","NEGQ"
// arm64:"NEGS","SBCS","NGC","NEG",-"ADD",-"SUB",-"CMP"
+ // ppc64:"SUBC", "SUBE", "SUBZE", "NEG"
+ // ppc64le:"SUBC", "SUBE", "SUBZE", "NEG"
// s390x:"SUBE"
return bits.Sub64(x, y, ci)
}
@@ -622,6 +651,8 @@ func Sub64(x, y, ci uint64) (r, co uint64) {
func Sub64C(x, ci uint64) (r, co uint64) {
// amd64:"NEGL","SBBQ","NEGQ"
// arm64:"NEGS","SBCS","NGC","NEG",-"ADD",-"SUB",-"CMP"
+ // ppc64:"SUBC", "SUBE", "SUBZE", "NEG"
+ // ppc64le:"SUBC", "SUBE", "SUBZE", "NEG"
// s390x:"SUBE"
return bits.Sub64(x, 7, ci)
}
@@ -629,6 +660,8 @@ func Sub64C(x, ci uint64) (r, co uint64) {
func Sub64Z(x, y uint64) (r, co uint64) {
// amd64:"SUBQ","SBBQ","NEGQ",-"NEGL"
// arm64:"SUBS","NGC","NEG",-"SBCS",-"ADD",-"SUB\t",-"CMP"
+ // ppc64:"SUBC", -"SUBE", "SUBZE", "NEG"
+ // ppc64le:"SUBC", -"SUBE", "SUBZE", "NEG"
// s390x:"SUBC"
return bits.Sub64(x, y, 0)
}
@@ -636,6 +669,8 @@ func Sub64Z(x, y uint64) (r, co uint64) {
func Sub64R(x, y, ci uint64) uint64 {
// amd64:"NEGL","SBBQ",-"NEGQ"
// arm64:"NEGS","SBCS",-"NGC",-"NEG\t",-"ADD",-"SUB",-"CMP"
+ // ppc64:"SUBC", "SUBE", -"SUBZE", -"NEG"
+ // ppc64le:"SUBC", "SUBE", -"SUBZE", -"NEG"
// s390x:"SUBE"
r, _ := bits.Sub64(x, y, ci)
return r
@@ -650,6 +685,15 @@ func Sub64M(p, q, r *[3]uint64) {
r[2], c = bits.Sub64(p[2], q[2], c)
}
+func Sub64MSaveC(p, q, r, c *[2]uint64) {
+ // ppc64:"SUBC\tR\\d+, R\\d+,", "SUBZE", "NEG"
+ // ppc64le:"SUBC\tR\\d+, R\\d+,", "SUBZE", "NEG"
+ r[0], c[0] = bits.Sub64(p[0], q[0], 0)
+ // ppc64:"SUBC\tR\\d+, [$]0,", "SUBE", "SUBZE", "NEG"
+ // ppc64le:"SUBC\tR\\d+, [$]0,", "SUBE", "SUBZE", "NEG"
+ r[1], c[1] = bits.Sub64(p[1], q[1], c[0])
+}
+
func Sub64PanicOnOverflowEQ(a, b uint64) uint64 {
r, b := bits.Sub64(a, b, 0)
// s390x:"BRC\t[$]12,",-"ADDE",-"SUBE"