aboutsummaryrefslogtreecommitdiff
path: root/src/crypto
diff options
context:
space:
mode:
authorPaul E. Murphy <murp@ibm.com>2022-11-08 15:46:05 -0600
committerGopher Robot <gobot@golang.org>2022-11-21 15:06:26 +0000
commit8614c525b301b2b993bf99269a01975ea072b086 (patch)
treec392bbb8691e092e6102340bf256d71de62af513 /src/crypto
parentcf93b25366aa418dea3eea49a7b85447631c2a1d (diff)
downloadgo-8614c525b301b2b993bf99269a01975ea072b086.tar.xz
crypto/aes: On ppc64le, use better instructions when available
Several operations emulate instructions available on power9. Use the GOPPC64_power9 macro provided by the compiler to select the native instructions if the minimum cpu requirements are met. Likewise rework the LXSDX_BE to simplify usage when overriding it. It is only used in one place. All three configurations are tested via CI. On POWER9: pkg:crypto/cipher goos:linux goarch:ppc64le AESCBCEncrypt1K 949MB/s ± 0% 957MB/s ± 0% +0.83% AESCBCDecrypt1K 1.82GB/s ± 0% 1.99GB/s ± 0% +8.93% pkg:crypto/aes goos:linux goarch:ppc64le Encrypt 1.01GB/s ± 0% 1.05GB/s ± 0% +4.36% Decrypt 987MB/s ± 0% 1024MB/s ± 0% +3.77% Change-Id: I56d0eb845647dd3c43bcad71eb281b499e1d1789 Reviewed-on: https://go-review.googlesource.com/c/go/+/449116 Reviewed-by: Lynn Boger <laboger@linux.vnet.ibm.com> Auto-Submit: Paul Murphy <murp@ibm.com> TryBot-Result: Gopher Robot <gobot@golang.org> Reviewed-by: Joedian Reid <joedian@golang.org> Reviewed-by: Bryan Mills <bcmills@google.com> Run-TryBot: Paul Murphy <murp@ibm.com>
Diffstat (limited to 'src/crypto')
-rw-r--r--src/crypto/aes/asm_ppc64x.s42
1 files changed, 23 insertions, 19 deletions
diff --git a/src/crypto/aes/asm_ppc64x.s b/src/crypto/aes/asm_ppc64x.s
index c838d53923..8ac97ec281 100644
--- a/src/crypto/aes/asm_ppc64x.s
+++ b/src/crypto/aes/asm_ppc64x.s
@@ -48,7 +48,7 @@
// For P9 instruction emulation
#define ESPERM V21 // Endian swapping permute into BE
-#define TMP2 V22 // Temporary for P8_STXVB16X/P8_STXV
+#define TMP2 V22 // Temporary for P8_STXVB16X/P8_STXVB16X
// For {en,de}cryptBlockAsm
#define BLK_INP R3
@@ -69,8 +69,15 @@ DATA ·rcon+0x40(SB)/8, $0x0000000000000000
DATA ·rcon+0x48(SB)/8, $0x0000000000000000
GLOBL ·rcon(SB), RODATA, $80
-// Emulate unaligned BE vector load/stores on LE targets
#ifdef GOARCH_ppc64le
+# ifdef GOPPC64_power9
+#define P8_LXVB16X(RA,RB,VT) LXVB16X (RA+RB), VT
+#define P8_STXVB16X(VS,RA,RB) STXVB16X VS, (RA+RB)
+#define XXBRD_ON_LE(VA,VT) XXBRD VA, VT
+# else
+// On POWER8/ppc64le, emulate the POWER9 instructions by loading unaligned
+// doublewords and byte-swapping each doubleword to emulate BE load/stores.
+#define NEEDS_ESPERM
#define P8_LXVB16X(RA,RB,VT) \
LXVD2X (RA+RB), VT \
VPERM VT, VT, ESPERM, VT
@@ -79,19 +86,15 @@ GLOBL ·rcon(SB), RODATA, $80
VPERM VS, VS, ESPERM, TMP2 \
STXVD2X TMP2, (RA+RB)
-#define LXSDX_BE(RA,RB,VT) \
- LXSDX (RA+RB), VT \
- VPERM VT, VT, ESPERM, VT
-#else
-#define P8_LXVB16X(RA,RB,VT) \
- LXVD2X (RA+RB), VT
+#define XXBRD_ON_LE(VA,VT) \
+ VPERM VA, VA, ESPERM, VT
-#define P8_STXVB16X(VS,RA,RB) \
- STXVD2X VS, (RA+RB)
-
-#define LXSDX_BE(RA,RB,VT) \
- LXSDX (RA+RB), VT
-#endif
+# endif // defined(GOPPC64_power9)
+#else
+#define P8_LXVB16X(RA,RB,VT) LXVD2X (RA+RB), VT
+#define P8_STXVB16X(VS,RA,RB) STXVD2X VS, (RA+RB)
+#define XXBRD_ON_LE(VA, VT)
+#endif // defined(GOARCH_ppc64le)
// func setEncryptKeyAsm(nr int, key *byte, enc *uint32, dec *uint32)
TEXT ·expandKeyAsm(SB), NOSPLIT|NOFRAME, $0
@@ -101,7 +104,7 @@ TEXT ·expandKeyAsm(SB), NOSPLIT|NOFRAME, $0
MOVD enc+16(FP), OUTENC
MOVD dec+24(FP), OUTDEC
-#ifdef GOARCH_ppc64le
+#ifdef NEEDS_ESPERM
MOVD $·rcon(SB), PTR // PTR points to rcon addr
LVX (PTR), ESPERM
ADD $0x10, PTR
@@ -191,7 +194,8 @@ loop128:
RET
l192:
- LXSDX_BE(INP, R0, IN1) // Load next 8 bytes into upper half of VSR in BE order.
+ LXSDX (INP+R0), IN1 // Load next 8 bytes into upper half of VSR.
+ XXBRD_ON_LE(IN1, IN1) // and convert to BE ordering on LE hosts.
MOVD $4, CNT // li 7,4
STXVD2X IN0, (R0+OUTENC)
STXVD2X IN0, (R0+OUTDEC)
@@ -309,7 +313,7 @@ TEXT ·encryptBlockAsm(SB), NOSPLIT|NOFRAME, $0
MOVD xk+8(FP), R5 // Key pointer
MOVD dst+16(FP), R3 // Dest pointer
MOVD src+24(FP), R4 // Src pointer
-#ifdef GOARCH_ppc64le
+#ifdef NEEDS_ESPERM
MOVD $·rcon(SB), R7
LVX (R7), ESPERM // Permute value for P8_ macros.
#endif
@@ -404,7 +408,7 @@ TEXT ·decryptBlockAsm(SB), NOSPLIT|NOFRAME, $0
MOVD xk+8(FP), R5 // Key pointer
MOVD dst+16(FP), R3 // Dest pointer
MOVD src+24(FP), R4 // Src pointer
-#ifdef GOARCH_ppc64le
+#ifdef NEEDS_ESPERM
MOVD $·rcon(SB), R7
LVX (R7), ESPERM // Permute value for P8_ macros.
#endif
@@ -622,7 +626,7 @@ TEXT ·cryptBlocksChain(SB), NOSPLIT|NOFRAME, $0
MOVD enc+40(FP), ENC
MOVD nr+48(FP), ROUNDS
-#ifdef GOARCH_ppc64le
+#ifdef NEEDS_ESPERM
MOVD $·rcon(SB), R11
LVX (R11), ESPERM // Permute value for P8_ macros.
#endif