aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMichael Pratt <mpratt@google.com>2025-04-15 02:22:59 -0700
committerGopher Robot <gobot@golang.org>2025-04-16 06:49:50 -0700
commit7c358664da2071dd8c46274b0e0ba68b11f796cd (patch)
tree23502a884e0fb15f5c194829baf08719dc31d1c6
parent0091fc8e7c693be8d8cbfee206c04512cf4f7513 (diff)
downloadgo-x-crypto-7c358664da2071dd8c46274b0e0ba68b11f796cd.tar.xz
Revert "argon2: add loong64 SIMD implementation"
This reverts CL 657795. Reason for revert: Does not build on 1.23 or 1.24 For golang/go#73354. Change-Id: I69498f24b57bd6650c9d1f4c755922cfaaba4a84 Reviewed-on: https://go-review.googlesource.com/c/crypto/+/665555 Reviewed-by: abner chenc <chenguoqi@loongson.cn> Auto-Submit: Michael Pratt <mpratt@google.com> LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com> Reviewed-by: Dmitri Shuralyov <dmitshur@google.com>
-rw-r--r--argon2/blamka_loong64.go59
-rw-r--r--argon2/blamka_loong64.s258
-rw-r--r--argon2/blamka_ref.go2
3 files changed, 1 insertions, 318 deletions
diff --git a/argon2/blamka_loong64.go b/argon2/blamka_loong64.go
deleted file mode 100644
index 1b43a2e..0000000
--- a/argon2/blamka_loong64.go
+++ /dev/null
@@ -1,59 +0,0 @@
-// Copyright 2025 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-//go:build loong64 && gc && !purego
-
-package argon2
-
-import "golang.org/x/sys/cpu"
-
-//go:noescape
-func mixBlocks1VX(out, in1, in2 *block)
-
-//go:noescape
-func mixBlocks2VX(out, in1, in2, t *block)
-
-//go:noescape
-func xorBlocksVX(out, in1, in2, t *block)
-
-//go:noescape
-func blamkaVX(b *block)
-
-func processBlockVX(out, in1, in2 *block, xor bool) {
- var t block
- mixBlocks1VX(&t, in1, in2)
- if cpu.Loong64.HasLSX {
- blamkaVX(&t)
- } else {
- for i := 0; i < blockLength; i += 16 {
- blamkaGeneric(
- &t[i+0], &t[i+1], &t[i+2], &t[i+3],
- &t[i+4], &t[i+5], &t[i+6], &t[i+7],
- &t[i+8], &t[i+9], &t[i+10], &t[i+11],
- &t[i+12], &t[i+13], &t[i+14], &t[i+15],
- )
- }
- for i := 0; i < blockLength/8; i += 2 {
- blamkaGeneric(
- &t[i], &t[i+1], &t[16+i], &t[16+i+1],
- &t[32+i], &t[32+i+1], &t[48+i], &t[48+i+1],
- &t[64+i], &t[64+i+1], &t[80+i], &t[80+i+1],
- &t[96+i], &t[96+i+1], &t[112+i], &t[112+i+1],
- )
- }
- }
- if xor {
- xorBlocksVX(out, in1, in2, &t)
- } else {
- mixBlocks2VX(out, in1, in2, &t)
- }
-}
-
-func processBlock(out, in1, in2 *block) {
- processBlockVX(out, in1, in2, false)
-}
-
-func processBlockXOR(out, in1, in2 *block) {
- processBlockVX(out, in1, in2, true)
-}
diff --git a/argon2/blamka_loong64.s b/argon2/blamka_loong64.s
deleted file mode 100644
index c380d77..0000000
--- a/argon2/blamka_loong64.s
+++ /dev/null
@@ -1,258 +0,0 @@
-// Copyright 2025 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-//go:build loong64 && gc && !purego
-
-#include "textflag.h"
-
-#define BLAMKA_ROUND \
- VMULWEVVWU V0, V2, V8; \
- VADDV V2, V0, V0; \
- VADDV V0, V8, V0; \
- VADDV V0, V8, V0; \
- VXORV V6, V0, V6; \
- VROTRV $32, V6, V6; \
- VMULWEVVWU V4, V6, V8; \
- VADDV V4, V6, V4; \
- VADDV V4, V8, V4; \
- VADDV V4, V8, V4; \
- VXORV V2, V4, V2; \
- VROTRV $24, V2, V2; \
- VMULWEVVWU V0, V2, V8; \
- VADDV V0, V2, V0; \
- VADDV V0, V8, V0; \
- VADDV V0, V8, V0; \
- VXORV V6, V0, V6; \
- VROTRV $16, V6, V6; \
- VMULWEVVWU V4, V6, V8; \
- VADDV V4, V6, V4; \
- VADDV V4, V8, V4; \
- VADDV V4, V8, V4; \
- VXORV V2, V4, V2; \
- VROTRV $63, V2, V2; \
-;\
- VMULWEVVWU V1, V3, V8; \
- VADDV V1, V3, V1; \
- VADDV V1, V8, V1; \
- VADDV V1, V8, V1; \
- VXORV V7, V1, V7; \
- VROTRV $32, V7, V7; \
- VMULWEVVWU V5, V7, V8; \
- VADDV V5, V7, V5; \
- VADDV V5, V8, V5; \
- VADDV V5, V8, V5; \
- VXORV V3, V5, V3; \
- VROTRV $24, V3, V3; \
- VMULWEVVWU V1, V3, V8; \
- VADDV V1, V3, V1; \
- VADDV V1, V8, V1; \
- VADDV V1, V8, V1; \
- VXORV V7, V1, V7; \
- VROTRV $16, V7, V7; \
- VMULWEVVWU V5, V7, V8; \
- VADDV V5, V7, V5; \
- VADDV V5, V8, V5; \
- VADDV V5, V8, V5; \
- VXORV V3, V5, V3; \
- VROTRV $63, V3, V3; \
-;\
- VXORV V0, V0, V8; \ // V8 = 0
- VADDV V2, V8, V9; \ // V9 = V2
- VADDV V5, V8, V10; \ // V10 = V5
- VADDV V6, V8, V11; \ // V11 = V6
- VADDV V4, V8, V5; \ // V5 = V4
- VADDV V10, V8, V4; \ // V4 = V5
- VSHUF4IV $9, V3, V2; \
- VSHUF4IV $9, V9, V3; \
- VSHUF4IV $3, V7, V6; \
- VSHUF4IV $3, V11, V7; \
-;\
- VMULWEVVWU V0, V2, V9; \
- VADDV V0, V2, V0; \
- VADDV V0, V9, V0; \
- VADDV V0, V9, V0; \
- VXORV V6, V0, V6; \
- VROTRV $32, V6, V6; \
- VMULWEVVWU V4, V6, V9; \
- VADDV V4, V6, V4; \
- VADDV V4, V9, V4; \
- VADDV V4, V9, V4; \
- VXORV V2, V4, V2; \
- VROTRV $24, V2, V2; \
- VMULWEVVWU V0, V2, V9; \
- VADDV V0, V2, V0; \
- VADDV V0, V9, V0; \
- VADDV V0, V9, V0; \
- VXORV V6, V0, V6; \
- VROTRV $16, V6, V6; \
- VMULWEVVWU V4, V6, V9; \
- VADDV V4, V6, V4; \
- VADDV V4, V9, V4; \
- VADDV V4, V9, V4; \
- VXORV V2, V4, V2; \
- VROTRV $63, V2, V2; \
-;\
- VMULWEVVWU V1, V3, V9; \
- VADDV V1, V3, V1; \
- VADDV V1, V9, V1; \
- VADDV V1, V9, V1; \
- VXORV V7, V1, V7; \
- VROTRV $32, V7, V7; \
- VMULWEVVWU V5, V7, V9; \
- VADDV V5, V7, V5; \
- VADDV V5, V9, V5; \
- VADDV V5, V9, V5; \
- VXORV V3, V5, V3; \
- VROTRV $24, V3, V3; \
- VMULWEVVWU V1, V3, V9; \
- VADDV V1, V3, V1; \
- VADDV V1, V9, V1; \
- VADDV V1, V9, V1; \
- VXORV V7, V1, V7; \
- VROTRV $16, V7, V7; \
- VMULWEVVWU V5, V7, V9; \
- VADDV V5, V7, V5; \
- VADDV V5, V9, V5; \
- VADDV V5, V9, V5; \
- VXORV V3, V5, V3; \
- VROTRV $63, V3, V3; \
-;\
- VADDV V2, V8, V9; \ // V9 = V2
- VADDV V5, V8, V10; \ // V10 = V5
- VADDV V6, V8, V11; \ // V11 = V6
- VADDV V4, V8, V5; \ // V5 = V4
- VADDV V10, V8, V4; \ // V4 = V5
- VSHUF4IV $3, V3, V2; \
- VSHUF4IV $3, V9, V3; \
- VSHUF4IV $9, V7, V6; \
- VSHUF4IV $9, V11, V7; \
-
-#define BLAMKA_ROUND1(index) \
- VMOVQ (index+0)(R4), V0; \
- VMOVQ (index+16)(R4), V1; \
- VMOVQ (index+32)(R4), V2; \
- VMOVQ (index+48)(R4), V3; \
- VMOVQ (index+64)(R4), V4; \
- VMOVQ (index+80)(R4), V5; \
- VMOVQ (index+96)(R4), V6; \
- VMOVQ (index+112)(R4), V7; \
- BLAMKA_ROUND; \
- VMOVQ V0, (index+0)(R4); \
- VMOVQ V1, (index+16)(R4); \
- VMOVQ V2, (index+32)(R4); \
- VMOVQ V3, (index+48)(R4); \
- VMOVQ V4, (index+64)(R4); \
- VMOVQ V5, (index+80)(R4); \
- VMOVQ V6, (index+96)(R4); \
- VMOVQ V7, (index+112)(R4); \
-
-#define BLAMKA_ROUND2(index) \
- VMOVQ (index+0)(R4), V0; \
- VMOVQ (index+128)(R4), V1; \
- VMOVQ (index+256)(R4), V2; \
- VMOVQ (index+384)(R4), V3; \
- VMOVQ (index+512)(R4), V4; \
- VMOVQ (index+640)(R4), V5; \
- VMOVQ (index+768)(R4), V6; \
- VMOVQ (index+896)(R4), V7; \
- BLAMKA_ROUND; \
- VMOVQ V0, (index+0)(R4); \
- VMOVQ V1, (index+128)(R4); \
- VMOVQ V2, (index+256)(R4); \
- VMOVQ V3, (index+384)(R4); \
- VMOVQ V4, (index+512)(R4); \
- VMOVQ V5, (index+640)(R4); \
- VMOVQ V6, (index+768)(R4); \
- VMOVQ V7, (index+896)(R4); \
-
-// func blamkaVX(b *block)
-TEXT ·blamkaVX(SB), NOSPLIT, $0-8
- MOVV b+0(FP), R4
-
- BLAMKA_ROUND1(0)
- BLAMKA_ROUND1(128)
- BLAMKA_ROUND1(256)
- BLAMKA_ROUND1(384)
- BLAMKA_ROUND1(512)
- BLAMKA_ROUND1(640)
- BLAMKA_ROUND1(768)
- BLAMKA_ROUND1(896)
-
- BLAMKA_ROUND2(0)
- BLAMKA_ROUND2(16)
- BLAMKA_ROUND2(32)
- BLAMKA_ROUND2(48)
- BLAMKA_ROUND2(64)
- BLAMKA_ROUND2(80)
- BLAMKA_ROUND2(96)
- BLAMKA_ROUND2(112)
-
- RET
-
-// func mixBlocks1VX(t *block, in1 *block, in2 *block)
-TEXT ·mixBlocks1VX(SB), NOSPLIT, $0-24
- MOVV t+0(FP), R4
- MOVV in1+8(FP), R5
- MOVV in2+16(FP), R6
- MOVV $128, R8
-
-loop:
- VMOVQ (R5), V0
- VMOVQ (R6), V1
- VXORV V0, V1, V2
- VMOVQ V2, (R4)
- ADDV $16, R5
- ADDV $16, R6
- ADDV $16, R4
- SUBV $2, R8
- BLT R0, R8, loop
- RET
-
-// func mixBlocks2VX(out *block, in1 *block, in2 *block, t *block)
-TEXT ·mixBlocks2VX(SB), NOSPLIT, $0-32
- MOVV out+0(FP), R4
- MOVV in1+8(FP), R5
- MOVV in2+16(FP), R6
- MOVV t+24(FP), R7
- MOVV $128, R8
-
-loop:
- VMOVQ (R5), V0
- VMOVQ (R6), V1
- VMOVQ (R7), V2
- VXORV V0, V1, V3
- VXORV V3, V2, V4
- VMOVQ V4, (R4)
- ADDV $16, R5
- ADDV $16, R6
- ADDV $16, R7
- ADDV $16, R4
- SUBV $2, R8
- BLT R0, R8, loop
- RET
-
-// func xorBlocksVX(out *block, in1 *block, in2 *block, t *block)
-TEXT ·xorBlocksVX(SB), NOSPLIT, $0-32
- MOVV out+0(FP), R4
- MOVV in1+8(FP), R5
- MOVV in2+16(FP), R6
- MOVV t+24(FP), R7
- MOVV $128, R8
-
-loop:
- VMOVQ (R5), V0
- VMOVQ (R6), V1
- VMOVQ (R7), V2
- VMOVQ (R4), V3
- VXORV V0, V1, V4
- VXORV V4, V2, V5
- VXORV V5, V3, V6
- VMOVQ V6, (R4)
- ADDV $16, R5
- ADDV $16, R6
- ADDV $16, R7
- ADDV $16, R4
- SUBV $2, R8
- BLT R0, R8, loop
- RET
diff --git a/argon2/blamka_ref.go b/argon2/blamka_ref.go
index cf3e141..16d58c6 100644
--- a/argon2/blamka_ref.go
+++ b/argon2/blamka_ref.go
@@ -2,7 +2,7 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
-//go:build (!amd64 && !loong64) || purego || !gc
+//go:build !amd64 || purego || !gc
package argon2