aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMauri de Souza Meneguzzo <mauri870@gmail.com>2023-11-07 18:34:49 +0000
committerRuss Cox <rsc@golang.org>2023-11-07 22:36:17 +0000
commitdf0bc9e1bb5eaba2300400ba5294aae0a10b6a54 (patch)
treebf266639a07af86b4e38c98709361456dbbaa7bf
parent74c2ba9521f1b7082a53cd34a0c4a01284cbd99b (diff)
downloadgo-x-crypto-df0bc9e1bb5eaba2300400ba5294aae0a10b6a54.tar.xz
chacha20poly1305: guard PSHUFB usage with GOAMD64_v2
The PSHUFB instruction is part of SSE which is only v2+ but it is being used without the GOAMD64_v2 guard. The ROL macros were copied from CL 516859 that adds internal/chacha8rand. Fixes golang/go#63871 Change-Id: I3c8ba75ff284cda4fc788885643246936e617b85 GitHub-Last-Rev: e235e8eae67f16b3a58817cfdff729693faf2665 GitHub-Pull-Request: golang/crypto#275 Reviewed-on: https://go-review.googlesource.com/c/crypto/+/538786 Reviewed-by: Keith Randall <khr@google.com> Run-TryBot: Mauri de Souza Meneguzzo <mauri870@gmail.com> TryBot-Result: Gopher Robot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org> Reviewed-by: Russ Cox <rsc@golang.org>
-rw-r--r--chacha20poly1305/chacha20poly1305_amd64.s24
1 files changed, 22 insertions, 2 deletions
diff --git a/chacha20poly1305/chacha20poly1305_amd64.s b/chacha20poly1305/chacha20poly1305_amd64.s
index 541d696..731d2ac 100644
--- a/chacha20poly1305/chacha20poly1305_amd64.s
+++ b/chacha20poly1305/chacha20poly1305_amd64.s
@@ -183,11 +183,31 @@ GLOBL ·andMask<>(SB), (NOPTR+RODATA), $240
#define shiftD1Right BYTE $0x66; BYTE $0x45; BYTE $0x0f; BYTE $0x3a; BYTE $0x0f; BYTE $0xd2; BYTE $0x04 // PALIGNR $4, X10, X10
#define shiftD2Right BYTE $0x66; BYTE $0x45; BYTE $0x0f; BYTE $0x3a; BYTE $0x0f; BYTE $0xdb; BYTE $0x04 // PALIGNR $4, X11, X11
#define shiftD3Right BYTE $0x66; BYTE $0x45; BYTE $0x0f; BYTE $0x3a; BYTE $0x0f; BYTE $0xff; BYTE $0x04 // PALIGNR $4, X15, X15
+
// Some macros
+
+// ROL rotates the uint32s in register R left by N bits, using temporary T.
+#define ROL(N, R, T) \
+ MOVO R, T; PSLLL $(N), T; PSRLL $(32-(N)), R; PXOR T, R
+
+// ROL16 rotates the uint32s in register R left by 16, using temporary T if needed.
+#ifdef GOAMD64_v2
+#define ROL16(R, T) PSHUFB ·rol16<>(SB), R
+#else
+#define ROL16(R, T) ROL(16, R, T)
+#endif
+
+// ROL8 rotates the uint32s in register R left by 8, using temporary T if needed.
+#ifdef GOAMD64_v2
+#define ROL8(R, T) PSHUFB ·rol8<>(SB), R
+#else
+#define ROL8(R, T) ROL(8, R, T)
+#endif
+
#define chachaQR(A, B, C, D, T) \
- PADDD B, A; PXOR A, D; PSHUFB ·rol16<>(SB), D \
+ PADDD B, A; PXOR A, D; ROL16(D, T) \
PADDD D, C; PXOR C, B; MOVO B, T; PSLLL $12, T; PSRLL $20, B; PXOR T, B \
- PADDD B, A; PXOR A, D; PSHUFB ·rol8<>(SB), D \
+ PADDD B, A; PXOR A, D; ROL8(D, T) \
PADDD D, C; PXOR C, B; MOVO B, T; PSLLL $7, T; PSRLL $25, B; PXOR T, B
#define chachaQR_AVX2(A, B, C, D, T) \