aboutsummaryrefslogtreecommitdiff
path: root/src/internal/bytealg
diff options
context:
space:
mode:
authorPaul Murphy <murp@ibm.com>2023-08-17 16:35:55 -0500
committerPaul Murphy <murp@ibm.com>2023-08-28 17:33:20 +0000
commitb2e809bab59a692aa6a69e1bd1d32eeeab4622e3 (patch)
tree9281e2c8c22f09f4a9d9c60ddc40069064b755f4 /src/internal/bytealg
parentc5569f09f22a6c002f1f6dc694272c3a84d4b16c (diff)
downloadgo-b2e809bab59a692aa6a69e1bd1d32eeeab4622e3.tar.xz
internal/bytealg: improve compare on Power10/PPC64
Handle comparisons of 15 or less bytes more efficiently with Power10 instructions when building with GOPPC64=power10. name old time/op new time/op delta BytesCompare/1 2.53ns ± 0% 2.17ns ± 0% -14.17% BytesCompare/2 2.70ns ± 0% 2.17ns ± 0% -19.77% BytesCompare/4 2.59ns ± 0% 2.17ns ± 0% -16.20% BytesCompare/8 2.66ns ± 0% 2.17ns ± 0% -18.63% Change-Id: I6d7c6af0a58ea3e03acc3930c54b77f2ac1dfbd5 Reviewed-on: https://go-review.googlesource.com/c/go/+/522315 Reviewed-by: Joedian Reid <joedian@golang.org> Run-TryBot: Paul Murphy <murp@ibm.com> Reviewed-by: Lynn Boger <laboger@linux.vnet.ibm.com> Reviewed-by: Bryan Mills <bcmills@google.com> TryBot-Result: Gopher Robot <gobot@golang.org>
Diffstat (limited to 'src/internal/bytealg')
-rw-r--r--src/internal/bytealg/compare_ppc64x.s12
1 files changed, 11 insertions, 1 deletions
diff --git a/src/internal/bytealg/compare_ppc64x.s b/src/internal/bytealg/compare_ppc64x.s
index 63c33ee635..2629251e43 100644
--- a/src/internal/bytealg/compare_ppc64x.s
+++ b/src/internal/bytealg/compare_ppc64x.s
@@ -274,7 +274,16 @@ lower:
RET
PCALIGN $16
-cmp8: // 8 - 15B
+cmp8: // 8 - 15B (0 - 15B if GOPPC64_power10)
+#ifdef GOPPC64_power10
+ SLD $56,R9,R9
+ LXVLL R5,R9,V3 // Load bytes starting from MSB to LSB, unused are zero filled.
+ LXVLL R6,R9,V4
+ VCMPUQ V3,V4,CR0 // Compare as a 128b integer.
+ SETB_CR0(R6)
+ ISEL CR0EQ,R3,R6,R3 // If equal, length determines the return value.
+ RET
+#else
CMP R9,$8
BLT cmp4
ANDCC $7,R9,R9
@@ -330,3 +339,4 @@ cmp0:
SETB_CR0(R6)
ISEL CR0EQ,R3,R6,R3
RET
+#endif