diff options
| author | Paul E. Murphy <murp@ibm.com> | 2022-11-08 15:46:05 -0600 |
|---|---|---|
| committer | Gopher Robot <gobot@golang.org> | 2022-11-21 15:06:26 +0000 |
| commit | 8614c525b301b2b993bf99269a01975ea072b086 (patch) | |
| tree | c392bbb8691e092e6102340bf256d71de62af513 /src/crypto | |
| parent | cf93b25366aa418dea3eea49a7b85447631c2a1d (diff) | |
| download | go-8614c525b301b2b993bf99269a01975ea072b086.tar.xz | |
crypto/aes: On ppc64le, use better instructions when available
Several operations emulate instructions available on power9. Use
the GOPPC64_power9 macro provided by the compiler to select the
native instructions if the minimum cpu requirements are met.
Likewise rework the LXSDX_BE to simplify usage when overriding
it. It is only used in one place.
All three configurations are tested via CI.
On POWER9:
pkg:crypto/cipher goos:linux goarch:ppc64le
AESCBCEncrypt1K 949MB/s ± 0% 957MB/s ± 0% +0.83%
AESCBCDecrypt1K 1.82GB/s ± 0% 1.99GB/s ± 0% +8.93%
pkg:crypto/aes goos:linux goarch:ppc64le
Encrypt 1.01GB/s ± 0% 1.05GB/s ± 0% +4.36%
Decrypt 987MB/s ± 0% 1024MB/s ± 0% +3.77%
Change-Id: I56d0eb845647dd3c43bcad71eb281b499e1d1789
Reviewed-on: https://go-review.googlesource.com/c/go/+/449116
Reviewed-by: Lynn Boger <laboger@linux.vnet.ibm.com>
Auto-Submit: Paul Murphy <murp@ibm.com>
TryBot-Result: Gopher Robot <gobot@golang.org>
Reviewed-by: Joedian Reid <joedian@golang.org>
Reviewed-by: Bryan Mills <bcmills@google.com>
Run-TryBot: Paul Murphy <murp@ibm.com>
Diffstat (limited to 'src/crypto')
| -rw-r--r-- | src/crypto/aes/asm_ppc64x.s | 42 |
1 files changed, 23 insertions, 19 deletions
diff --git a/src/crypto/aes/asm_ppc64x.s b/src/crypto/aes/asm_ppc64x.s index c838d53923..8ac97ec281 100644 --- a/src/crypto/aes/asm_ppc64x.s +++ b/src/crypto/aes/asm_ppc64x.s @@ -48,7 +48,7 @@ // For P9 instruction emulation #define ESPERM V21 // Endian swapping permute into BE -#define TMP2 V22 // Temporary for P8_STXVB16X/P8_STXV +#define TMP2 V22 // Temporary for P8_STXVB16X/P8_STXVB16X // For {en,de}cryptBlockAsm #define BLK_INP R3 @@ -69,8 +69,15 @@ DATA ·rcon+0x40(SB)/8, $0x0000000000000000 DATA ·rcon+0x48(SB)/8, $0x0000000000000000 GLOBL ·rcon(SB), RODATA, $80 -// Emulate unaligned BE vector load/stores on LE targets #ifdef GOARCH_ppc64le +# ifdef GOPPC64_power9 +#define P8_LXVB16X(RA,RB,VT) LXVB16X (RA+RB), VT +#define P8_STXVB16X(VS,RA,RB) STXVB16X VS, (RA+RB) +#define XXBRD_ON_LE(VA,VT) XXBRD VA, VT +# else +// On POWER8/ppc64le, emulate the POWER9 instructions by loading unaligned +// doublewords and byte-swapping each doubleword to emulate BE load/stores. +#define NEEDS_ESPERM #define P8_LXVB16X(RA,RB,VT) \ LXVD2X (RA+RB), VT \ VPERM VT, VT, ESPERM, VT @@ -79,19 +86,15 @@ GLOBL ·rcon(SB), RODATA, $80 VPERM VS, VS, ESPERM, TMP2 \ STXVD2X TMP2, (RA+RB) -#define LXSDX_BE(RA,RB,VT) \ - LXSDX (RA+RB), VT \ - VPERM VT, VT, ESPERM, VT -#else -#define P8_LXVB16X(RA,RB,VT) \ - LXVD2X (RA+RB), VT +#define XXBRD_ON_LE(VA,VT) \ + VPERM VA, VA, ESPERM, VT -#define P8_STXVB16X(VS,RA,RB) \ - STXVD2X VS, (RA+RB) - -#define LXSDX_BE(RA,RB,VT) \ - LXSDX (RA+RB), VT -#endif +# endif // defined(GOPPC64_power9) +#else +#define P8_LXVB16X(RA,RB,VT) LXVD2X (RA+RB), VT +#define P8_STXVB16X(VS,RA,RB) STXVD2X VS, (RA+RB) +#define XXBRD_ON_LE(VA, VT) +#endif // defined(GOARCH_ppc64le) // func setEncryptKeyAsm(nr int, key *byte, enc *uint32, dec *uint32) TEXT ·expandKeyAsm(SB), NOSPLIT|NOFRAME, $0 @@ -101,7 +104,7 @@ TEXT ·expandKeyAsm(SB), NOSPLIT|NOFRAME, $0 MOVD enc+16(FP), OUTENC MOVD dec+24(FP), OUTDEC -#ifdef GOARCH_ppc64le +#ifdef NEEDS_ESPERM MOVD $·rcon(SB), PTR // PTR points to rcon addr LVX (PTR), ESPERM ADD $0x10, PTR @@ -191,7 +194,8 @@ loop128: RET l192: - LXSDX_BE(INP, R0, IN1) // Load next 8 bytes into upper half of VSR in BE order. + LXSDX (INP+R0), IN1 // Load next 8 bytes into upper half of VSR. + XXBRD_ON_LE(IN1, IN1) // and convert to BE ordering on LE hosts. MOVD $4, CNT // li 7,4 STXVD2X IN0, (R0+OUTENC) STXVD2X IN0, (R0+OUTDEC) @@ -309,7 +313,7 @@ TEXT ·encryptBlockAsm(SB), NOSPLIT|NOFRAME, $0 MOVD xk+8(FP), R5 // Key pointer MOVD dst+16(FP), R3 // Dest pointer MOVD src+24(FP), R4 // Src pointer -#ifdef GOARCH_ppc64le +#ifdef NEEDS_ESPERM MOVD $·rcon(SB), R7 LVX (R7), ESPERM // Permute value for P8_ macros. #endif @@ -404,7 +408,7 @@ TEXT ·decryptBlockAsm(SB), NOSPLIT|NOFRAME, $0 MOVD xk+8(FP), R5 // Key pointer MOVD dst+16(FP), R3 // Dest pointer MOVD src+24(FP), R4 // Src pointer -#ifdef GOARCH_ppc64le +#ifdef NEEDS_ESPERM MOVD $·rcon(SB), R7 LVX (R7), ESPERM // Permute value for P8_ macros. #endif @@ -622,7 +626,7 @@ TEXT ·cryptBlocksChain(SB), NOSPLIT|NOFRAME, $0 MOVD enc+40(FP), ENC MOVD nr+48(FP), ROUNDS -#ifdef GOARCH_ppc64le +#ifdef NEEDS_ESPERM MOVD $·rcon(SB), R11 LVX (R11), ESPERM // Permute value for P8_ macros. #endif |
