aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMichael Pratt <mpratt@google.com>2025-04-15 02:22:47 -0700
committerGopher Robot <gobot@golang.org>2025-04-16 06:50:31 -0700
commit958cde86ef61ba2f51186f4a59557fa82348133a (patch)
tree8eaaa45198379cebd99885821badd8b45e2b91df
parent51f005cfd443a6a8fe542c8c2e58ed138f1cbbe2 (diff)
downloadgo-x-crypto-958cde86ef61ba2f51186f4a59557fa82348133a.tar.xz
Revert "chacha20: add loong64 SIMD implementation"
This reverts CL 636257. Reason for revert: Does not build on 1.23 or 1.24 For golang/go#73354. Change-Id: Ie28d5c45930a8eacd5cabed64390ef674b3fb446 Reviewed-on: https://go-review.googlesource.com/c/crypto/+/665536 Reviewed-by: abner chenc <chenguoqi@loongson.cn> Auto-Submit: Michael Pratt <mpratt@google.com> Reviewed-by: Dmitri Shuralyov <dmitshur@google.com> LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
-rw-r--r--chacha20/chacha_loong64.go22
-rw-r--r--chacha20/chacha_loong64.s374
-rw-r--r--chacha20/chacha_noasm.go2
3 files changed, 1 insertions, 397 deletions
diff --git a/chacha20/chacha_loong64.go b/chacha20/chacha_loong64.go
deleted file mode 100644
index 0d9547e..0000000
--- a/chacha20/chacha_loong64.go
+++ /dev/null
@@ -1,22 +0,0 @@
-// Copyright 2025 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-//go:build gc && !purego
-
-package chacha20
-
-import "golang.org/x/sys/cpu"
-
-const bufSize = 256
-
-//go:noescape
-func xorKeyStreamVX(dst, src []byte, key *[8]uint32, nonce *[3]uint32, counter *uint32)
-
-func (c *Cipher) xorKeyStreamBlocks(dst, src []byte) {
- if cpu.Loong64.HasLSX {
- xorKeyStreamVX(dst, src, &c.key, &c.nonce, &c.counter)
- } else {
- c.xorKeyStreamBlocksGeneric(dst, src)
- }
-}
diff --git a/chacha20/chacha_loong64.s b/chacha20/chacha_loong64.s
deleted file mode 100644
index 831ebf4..0000000
--- a/chacha20/chacha_loong64.s
+++ /dev/null
@@ -1,374 +0,0 @@
-// Copyright 2025 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// derived from chacha_arm64.s
-
-//go:build gc && !purego
-
-#include "textflag.h"
-
-DATA ·constants+0x00(SB)/4, $0x61707865
-DATA ·constants+0x04(SB)/4, $0x3320646e
-DATA ·constants+0x08(SB)/4, $0x79622d32
-DATA ·constants+0x0c(SB)/4, $0x6b206574
-GLOBL ·constants(SB), NOPTR|RODATA, $32
-
-DATA ·incRotMatrix+0x00(SB)/4, $0x00000000
-DATA ·incRotMatrix+0x04(SB)/4, $0x00000001
-DATA ·incRotMatrix+0x08(SB)/4, $0x00000002
-DATA ·incRotMatrix+0x0c(SB)/4, $0x00000003
-GLOBL ·incRotMatrix(SB), NOPTR|RODATA, $32
-
-#define NUM_ROUNDS 10
-
-// func xorKeyStreamVX(dst, src []byte, key *[8]uint32, nonce *[3]uint32, counter *uint32)
-TEXT ·xorKeyStreamVX(SB), NOSPLIT, $0
- MOVV dst+0(FP), R4
- MOVV src+24(FP), R5
- MOVV src_len+32(FP), R6
- MOVV key+48(FP), R7
- MOVV nonce+56(FP), R8
- MOVV counter+64(FP), R9
-
- MOVV $·constants(SB), R10
- MOVV $·incRotMatrix(SB), R11
-
- MOVW (R9), R12
-
-loop:
- MOVV $NUM_ROUNDS, R15
- // load 4-32bit data from incRotMatrix added to counter
- VMOVQ (R11), V30
-
- // load contants
- // VLDREPL.W $0, R10, V0
- WORD $0x30200140
- // VLDREPL.W $1, R10, V1
- WORD $0x30200541
- // VLDREPL.W $2, R10, V2
- WORD $0x30200942
- // VLDREPL.W $3, R10, V3
- WORD $0x30200d43
-
- // load keys
- // VLDREPL.W $0, R7, V4
- WORD $0x302000e4
- // VLDREPL.W $1, R7, V5
- WORD $0x302004e5
- // VLDREPL.W $2, R7, V6
- WORD $0x302008e6
- // VLDREPL.W $3, R7, V7
- WORD $0x30200ce7
- // VLDREPL.W $4, R7, V8
- WORD $0x302010e8
- // VLDREPL.W $5, R7, V9
- WORD $0x302014e9
- // VLDREPL.W $6, R7, V10
- WORD $0x302018ea
- // VLDREPL.W $7, R7, V11
- WORD $0x30201ceb
-
- // load counter + nonce
- // VLDREPL.W $0, R9, V12
- WORD $0x3020012c
-
- // VLDREPL.W $0, R8, V13
- WORD $0x3020010d
- // VLDREPL.W $1, R8, V14
- WORD $0x3020050e
- // VLDREPL.W $2, R8, V15
- WORD $0x3020090f
-
- // update counter
- VADDW V30, V12, V12
-
-chacha:
- // V0..V3 += V4..V7
- // V12..V15 <<<= ((V12..V15 XOR V0..V3), 16)
- VADDW V0, V4, V0
- VADDW V1, V5, V1
- VADDW V2, V6, V2
- VADDW V3, V7, V3
- VXORV V12, V0, V12
- VXORV V13, V1, V13
- VXORV V14, V2, V14
- VXORV V15, V3, V15
- VROTRW $16, V12, V12
- VROTRW $16, V13, V13
- VROTRW $16, V14, V14
- VROTRW $16, V15, V15
-
- // V8..V11 += V12..V15
- // V4..V7 <<<= ((V4..V7 XOR V8..V11), 12)
- VADDW V8, V12, V8
- VADDW V9, V13, V9
- VADDW V10, V14, V10
- VADDW V11, V15, V11
- VXORV V4, V8, V4
- VXORV V5, V9, V5
- VXORV V6, V10, V6
- VXORV V7, V11, V7
- VROTRW $20, V4, V4
- VROTRW $20, V5, V5
- VROTRW $20, V6, V6
- VROTRW $20, V7, V7
-
- // V0..V3 += V4..V7
- // V12..V15 <<<= ((V12..V15 XOR V0..V3), 8)
- VADDW V0, V4, V0
- VADDW V1, V5, V1
- VADDW V2, V6, V2
- VADDW V3, V7, V3
- VXORV V12, V0, V12
- VXORV V13, V1, V13
- VXORV V14, V2, V14
- VXORV V15, V3, V15
- VROTRW $24, V12, V12
- VROTRW $24, V13, V13
- VROTRW $24, V14, V14
- VROTRW $24, V15, V15
-
- // V8..V11 += V12..V15
- // V4..V7 <<<= ((V4..V7 XOR V8..V11), 7)
- VADDW V12, V8, V8
- VADDW V13, V9, V9
- VADDW V14, V10, V10
- VADDW V15, V11, V11
- VXORV V4, V8, V4
- VXORV V5, V9, V5
- VXORV V6, V10, V6
- VXORV V7, V11, V7
- VROTRW $25, V4, V4
- VROTRW $25, V5, V5
- VROTRW $25, V6, V6
- VROTRW $25, V7, V7
-
- // V0..V3 += V5..V7, V4
- // V15,V12-V14 <<<= ((V15,V12-V14 XOR V0..V3), 16)
- VADDW V0, V5, V0
- VADDW V1, V6, V1
- VADDW V2, V7, V2
- VADDW V3, V4, V3
- VXORV V15, V0, V15
- VXORV V12, V1, V12
- VXORV V13, V2, V13
- VXORV V14, V3, V14
- VROTRW $16, V15, V15
- VROTRW $16, V12, V12
- VROTRW $16, V13, V13
- VROTRW $16, V14, V14
-
- // V10,V11,V8,V9 += V15,V12,V13,V14
- // V5,V6,V7,V4 <<<= ((V5,V6,V7,V4 XOR V10,V11,V8,V9), 12)
- VADDW V10, V15, V10
- VADDW V11, V12, V11
- VADDW V8, V13, V8
- VADDW V9, V14, V9
- VXORV V5, V10, V5
- VXORV V6, V11, V6
- VXORV V7, V8, V7
- VXORV V4, V9, V4
- VROTRW $20, V5, V5
- VROTRW $20, V6, V6
- VROTRW $20, V7, V7
- VROTRW $20, V4, V4
-
- // V0..V3 += V5..V7, V4
- // V15,V12-V14 <<<= ((V15,V12-V14 XOR V0..V3), 8)
- VADDW V5, V0, V0
- VADDW V6, V1, V1
- VADDW V7, V2, V2
- VADDW V4, V3, V3
- VXORV V15, V0, V15
- VXORV V12, V1, V12
- VXORV V13, V2, V13
- VXORV V14, V3, V14
- VROTRW $24, V15, V15
- VROTRW $24, V12, V12
- VROTRW $24, V13, V13
- VROTRW $24, V14, V14
-
- // V10,V11,V8,V9 += V15,V12,V13,V14
- // V5,V6,V7,V4 <<<= ((V5,V6,V7,V4 XOR V10,V11,V8,V9), 7)
- VADDW V15, V10, V10
- VADDW V12, V11, V11
- VADDW V13, V8, V8
- VADDW V14, V9, V9
- VXORV V5, V10, V5
- VXORV V6, V11, V6
- VXORV V7, V8, V7
- VXORV V4, V9, V4
- VROTRW $25, V5, V5
- VROTRW $25, V6, V6
- VROTRW $25, V7, V7
- VROTRW $25, V4, V4
-
- SUBV $1, R15
- BNE R15, R0, chacha
-
- // load origin contants
- // VLDREPL.W $0, R10, V16
- WORD $0x30200150
- // VLDREPL.W $1, R10, V17
- WORD $0x30200551
- // VLDREPL.W $2, R10, V18
- WORD $0x30200952
- // VLDREPL.W $3, R10, V19
- WORD $0x30200d53
-
- // load origin keys
- // VLDREPL.W $0, R7, V20
- WORD $0x302000f4
- // VLDREPL.W $1, R7, V21
- WORD $0x302004f5
- // VLDREPL.W $2, R7, V22
- WORD $0x302008f6
- // VLDREPL.W $3, R7, V23
- WORD $0x30200cf7
- // VLDREPL.W $4, R7, V24
- WORD $0x302010f8
- // VLDREPL.W $5, R7, V25
- WORD $0x302014f9
- // VLDREPL.W $6, R7, V26
- WORD $0x302018fa
- // VLDREPL.W $7, R7, V27
- WORD $0x30201cfb
-
- // add back the initial state to generate the key stream
- VADDW V30, V12, V12 // update counter in advance to prevent V30 from being overwritten
- VADDW V16, V0, V0
- VADDW V17, V1, V1
- VADDW V18, V2, V2
- VADDW V19, V3, V3
-
- // load origin counter + nonce
- // VLDREPL.W $0, R9, V28
- WORD $0x3020013c
- // VLDREPL.W $0, R8, V29
- WORD $0x3020011d
- // VLDREPL.W $1, R8, V30
- WORD $0x3020051e
- // VLDREPL.W $2, R8, V31
- WORD $0x3020091f
-
- VADDW V20, V4, V4
- VADDW V21, V5, V5
- VADDW V22, V6, V6
- VADDW V23, V7, V7
- VADDW V24, V8, V8
- VADDW V25, V9, V9
- VADDW V26, V10, V10
- VADDW V27, V11, V11
- VADDW V28, V12, V12
- VADDW V29, V13, V13
- VADDW V30, V14, V14
- VADDW V31, V15, V15
-
- // shuffle
- VILVLW V0, V1, V16
- VILVHW V0, V1, V17
- VILVLW V2, V3, V18
- VILVHW V2, V3, V19
- VILVLW V4, V5 ,V20
- VILVHW V4, V5, V21
- VILVLW V6, V7, V22
- VILVHW V6, V7, V23
- VILVLW V8, V9, V24
- VILVHW V8, V9, V25
- VILVLW V10, V11, V26
- VILVHW V10, V11, V27
- VILVLW V12, V13, V28
- VILVHW V12, V13, V29
- VILVLW V14, V15, V30
- VILVHW V14, V15, V31
- VILVLV V16, V18, V0
- VILVHV V16, V18, V4
- VILVLV V17, V19, V8
- VILVHV V17, V19, V12
-
- // load src data from R5
- VMOVQ 0(R5), V16
- VMOVQ 16(R5), V17
- VMOVQ 32(R5), V18
- VMOVQ 48(R5), V19
-
- VILVLV V20, V22, V1
- VILVHV V20, V22, V5
- VILVLV V21, V23, V9
- VILVHV V21, V23, V13
-
- VMOVQ 64(R5), V20
- VMOVQ 80(R5), V21
- VMOVQ 96(R5), V22
- VMOVQ 112(R5), V23
-
- VILVLV V24, V26, V2
- VILVHV V24, V26, V6
- VILVLV V25, V27, V10
- VILVHV V25, V27, V14
-
- VMOVQ 128(R5), V24
- VMOVQ 144(R5), V25
- VMOVQ 160(R5), V26
- VMOVQ 176(R5), V27
-
- VILVLV V28, V30, V3
- VILVHV V28, V30, V7
- VILVLV V29, V31, V11
- VILVHV V29, V31, V15
-
- VMOVQ 192(R5), V28
- VMOVQ 208(R5), V29
- VMOVQ 224(R5), V30
- VMOVQ 240(R5), V31
-
- VXORV V0, V16, V16
- VXORV V1, V17, V17
- VXORV V2, V18, V18
- VXORV V3, V19, V19
-
- VMOVQ V16, 0(R4)
- VMOVQ V17, 16(R4)
- VMOVQ V18, 32(R4)
- VMOVQ V19, 48(R4)
-
- VXORV V4, V20, V20
- VXORV V5, V21, V21
- VXORV V6, V22, V22
- VXORV V7, V23, V23
-
- VMOVQ V20, 64(R4)
- VMOVQ V21, 80(R4)
- VMOVQ V22, 96(R4)
- VMOVQ V23, 112(R4)
-
- VXORV V8, V24, V24
- VXORV V9, V25, V25
- VXORV V10, V26, V26
- VXORV V11, V27, V27
-
- VMOVQ V24, 128(R4)
- VMOVQ V25, 144(R4)
- VMOVQ V26, 160(R4)
- VMOVQ V27, 176(R4)
-
- VXORV V12, V28, V28
- VXORV V13, V29, V29
- VXORV V14, V30, V30
- VXORV V15, V31, V31
-
- VMOVQ V28, 192(R4)
- VMOVQ V29, 208(R4)
- VMOVQ V30, 224(R4)
- VMOVQ V31, 240(R4)
-
- ADD $4, R12, R12
- MOVW R12, (R9) // update counter
-
- ADDV $256, R4, R4
- ADDV $256, R5, R5
- SUBV $256, R6, R6
- BNE R6, R0, loop
-
- RET
diff --git a/chacha20/chacha_noasm.go b/chacha20/chacha_noasm.go
index 3853cc0..c709b72 100644
--- a/chacha20/chacha_noasm.go
+++ b/chacha20/chacha_noasm.go
@@ -2,7 +2,7 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
-//go:build (!arm64 && !loong64 && !s390x && !ppc64 && !ppc64le) || !gc || purego
+//go:build (!arm64 && !s390x && !ppc64 && !ppc64le) || !gc || purego
package chacha20