diff options
| author | Michael Pratt <mpratt@google.com> | 2025-04-15 02:22:47 -0700 |
|---|---|---|
| committer | Gopher Robot <gobot@golang.org> | 2025-04-16 06:50:31 -0700 |
| commit | 958cde86ef61ba2f51186f4a59557fa82348133a (patch) | |
| tree | 8eaaa45198379cebd99885821badd8b45e2b91df | |
| parent | 51f005cfd443a6a8fe542c8c2e58ed138f1cbbe2 (diff) | |
| download | go-x-crypto-958cde86ef61ba2f51186f4a59557fa82348133a.tar.xz | |
Revert "chacha20: add loong64 SIMD implementation"
This reverts CL 636257.
Reason for revert: Does not build on 1.23 or 1.24
For golang/go#73354.
Change-Id: Ie28d5c45930a8eacd5cabed64390ef674b3fb446
Reviewed-on: https://go-review.googlesource.com/c/crypto/+/665536
Reviewed-by: abner chenc <chenguoqi@loongson.cn>
Auto-Submit: Michael Pratt <mpratt@google.com>
Reviewed-by: Dmitri Shuralyov <dmitshur@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
| -rw-r--r-- | chacha20/chacha_loong64.go | 22 | ||||
| -rw-r--r-- | chacha20/chacha_loong64.s | 374 | ||||
| -rw-r--r-- | chacha20/chacha_noasm.go | 2 |
3 files changed, 1 insertions, 397 deletions
diff --git a/chacha20/chacha_loong64.go b/chacha20/chacha_loong64.go deleted file mode 100644 index 0d9547e..0000000 --- a/chacha20/chacha_loong64.go +++ /dev/null @@ -1,22 +0,0 @@ -// Copyright 2025 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -//go:build gc && !purego - -package chacha20 - -import "golang.org/x/sys/cpu" - -const bufSize = 256 - -//go:noescape -func xorKeyStreamVX(dst, src []byte, key *[8]uint32, nonce *[3]uint32, counter *uint32) - -func (c *Cipher) xorKeyStreamBlocks(dst, src []byte) { - if cpu.Loong64.HasLSX { - xorKeyStreamVX(dst, src, &c.key, &c.nonce, &c.counter) - } else { - c.xorKeyStreamBlocksGeneric(dst, src) - } -} diff --git a/chacha20/chacha_loong64.s b/chacha20/chacha_loong64.s deleted file mode 100644 index 831ebf4..0000000 --- a/chacha20/chacha_loong64.s +++ /dev/null @@ -1,374 +0,0 @@ -// Copyright 2025 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -// derived from chacha_arm64.s - -//go:build gc && !purego - -#include "textflag.h" - -DATA ·constants+0x00(SB)/4, $0x61707865 -DATA ·constants+0x04(SB)/4, $0x3320646e -DATA ·constants+0x08(SB)/4, $0x79622d32 -DATA ·constants+0x0c(SB)/4, $0x6b206574 -GLOBL ·constants(SB), NOPTR|RODATA, $32 - -DATA ·incRotMatrix+0x00(SB)/4, $0x00000000 -DATA ·incRotMatrix+0x04(SB)/4, $0x00000001 -DATA ·incRotMatrix+0x08(SB)/4, $0x00000002 -DATA ·incRotMatrix+0x0c(SB)/4, $0x00000003 -GLOBL ·incRotMatrix(SB), NOPTR|RODATA, $32 - -#define NUM_ROUNDS 10 - -// func xorKeyStreamVX(dst, src []byte, key *[8]uint32, nonce *[3]uint32, counter *uint32) -TEXT ·xorKeyStreamVX(SB), NOSPLIT, $0 - MOVV dst+0(FP), R4 - MOVV src+24(FP), R5 - MOVV src_len+32(FP), R6 - MOVV key+48(FP), R7 - MOVV nonce+56(FP), R8 - MOVV counter+64(FP), R9 - - MOVV $·constants(SB), R10 - MOVV $·incRotMatrix(SB), R11 - - MOVW (R9), R12 - -loop: - MOVV $NUM_ROUNDS, R15 - // load 4-32bit data from incRotMatrix added to counter - VMOVQ (R11), V30 - - // load contants - // VLDREPL.W $0, R10, V0 - WORD $0x30200140 - // VLDREPL.W $1, R10, V1 - WORD $0x30200541 - // VLDREPL.W $2, R10, V2 - WORD $0x30200942 - // VLDREPL.W $3, R10, V3 - WORD $0x30200d43 - - // load keys - // VLDREPL.W $0, R7, V4 - WORD $0x302000e4 - // VLDREPL.W $1, R7, V5 - WORD $0x302004e5 - // VLDREPL.W $2, R7, V6 - WORD $0x302008e6 - // VLDREPL.W $3, R7, V7 - WORD $0x30200ce7 - // VLDREPL.W $4, R7, V8 - WORD $0x302010e8 - // VLDREPL.W $5, R7, V9 - WORD $0x302014e9 - // VLDREPL.W $6, R7, V10 - WORD $0x302018ea - // VLDREPL.W $7, R7, V11 - WORD $0x30201ceb - - // load counter + nonce - // VLDREPL.W $0, R9, V12 - WORD $0x3020012c - - // VLDREPL.W $0, R8, V13 - WORD $0x3020010d - // VLDREPL.W $1, R8, V14 - WORD $0x3020050e - // VLDREPL.W $2, R8, V15 - WORD $0x3020090f - - // update counter - VADDW V30, V12, V12 - -chacha: - // V0..V3 += V4..V7 - // V12..V15 <<<= ((V12..V15 XOR V0..V3), 16) - VADDW V0, V4, V0 - VADDW V1, V5, V1 - VADDW V2, V6, V2 - VADDW V3, V7, V3 - VXORV V12, V0, V12 - VXORV V13, V1, V13 - VXORV V14, V2, V14 - VXORV V15, V3, V15 - VROTRW $16, V12, V12 - VROTRW $16, V13, V13 - VROTRW $16, V14, V14 - VROTRW $16, V15, V15 - - // V8..V11 += V12..V15 - // V4..V7 <<<= ((V4..V7 XOR V8..V11), 12) - VADDW V8, V12, V8 - VADDW V9, V13, V9 - VADDW V10, V14, V10 - VADDW V11, V15, V11 - VXORV V4, V8, V4 - VXORV V5, V9, V5 - VXORV V6, V10, V6 - VXORV V7, V11, V7 - VROTRW $20, V4, V4 - VROTRW $20, V5, V5 - VROTRW $20, V6, V6 - VROTRW $20, V7, V7 - - // V0..V3 += V4..V7 - // V12..V15 <<<= ((V12..V15 XOR V0..V3), 8) - VADDW V0, V4, V0 - VADDW V1, V5, V1 - VADDW V2, V6, V2 - VADDW V3, V7, V3 - VXORV V12, V0, V12 - VXORV V13, V1, V13 - VXORV V14, V2, V14 - VXORV V15, V3, V15 - VROTRW $24, V12, V12 - VROTRW $24, V13, V13 - VROTRW $24, V14, V14 - VROTRW $24, V15, V15 - - // V8..V11 += V12..V15 - // V4..V7 <<<= ((V4..V7 XOR V8..V11), 7) - VADDW V12, V8, V8 - VADDW V13, V9, V9 - VADDW V14, V10, V10 - VADDW V15, V11, V11 - VXORV V4, V8, V4 - VXORV V5, V9, V5 - VXORV V6, V10, V6 - VXORV V7, V11, V7 - VROTRW $25, V4, V4 - VROTRW $25, V5, V5 - VROTRW $25, V6, V6 - VROTRW $25, V7, V7 - - // V0..V3 += V5..V7, V4 - // V15,V12-V14 <<<= ((V15,V12-V14 XOR V0..V3), 16) - VADDW V0, V5, V0 - VADDW V1, V6, V1 - VADDW V2, V7, V2 - VADDW V3, V4, V3 - VXORV V15, V0, V15 - VXORV V12, V1, V12 - VXORV V13, V2, V13 - VXORV V14, V3, V14 - VROTRW $16, V15, V15 - VROTRW $16, V12, V12 - VROTRW $16, V13, V13 - VROTRW $16, V14, V14 - - // V10,V11,V8,V9 += V15,V12,V13,V14 - // V5,V6,V7,V4 <<<= ((V5,V6,V7,V4 XOR V10,V11,V8,V9), 12) - VADDW V10, V15, V10 - VADDW V11, V12, V11 - VADDW V8, V13, V8 - VADDW V9, V14, V9 - VXORV V5, V10, V5 - VXORV V6, V11, V6 - VXORV V7, V8, V7 - VXORV V4, V9, V4 - VROTRW $20, V5, V5 - VROTRW $20, V6, V6 - VROTRW $20, V7, V7 - VROTRW $20, V4, V4 - - // V0..V3 += V5..V7, V4 - // V15,V12-V14 <<<= ((V15,V12-V14 XOR V0..V3), 8) - VADDW V5, V0, V0 - VADDW V6, V1, V1 - VADDW V7, V2, V2 - VADDW V4, V3, V3 - VXORV V15, V0, V15 - VXORV V12, V1, V12 - VXORV V13, V2, V13 - VXORV V14, V3, V14 - VROTRW $24, V15, V15 - VROTRW $24, V12, V12 - VROTRW $24, V13, V13 - VROTRW $24, V14, V14 - - // V10,V11,V8,V9 += V15,V12,V13,V14 - // V5,V6,V7,V4 <<<= ((V5,V6,V7,V4 XOR V10,V11,V8,V9), 7) - VADDW V15, V10, V10 - VADDW V12, V11, V11 - VADDW V13, V8, V8 - VADDW V14, V9, V9 - VXORV V5, V10, V5 - VXORV V6, V11, V6 - VXORV V7, V8, V7 - VXORV V4, V9, V4 - VROTRW $25, V5, V5 - VROTRW $25, V6, V6 - VROTRW $25, V7, V7 - VROTRW $25, V4, V4 - - SUBV $1, R15 - BNE R15, R0, chacha - - // load origin contants - // VLDREPL.W $0, R10, V16 - WORD $0x30200150 - // VLDREPL.W $1, R10, V17 - WORD $0x30200551 - // VLDREPL.W $2, R10, V18 - WORD $0x30200952 - // VLDREPL.W $3, R10, V19 - WORD $0x30200d53 - - // load origin keys - // VLDREPL.W $0, R7, V20 - WORD $0x302000f4 - // VLDREPL.W $1, R7, V21 - WORD $0x302004f5 - // VLDREPL.W $2, R7, V22 - WORD $0x302008f6 - // VLDREPL.W $3, R7, V23 - WORD $0x30200cf7 - // VLDREPL.W $4, R7, V24 - WORD $0x302010f8 - // VLDREPL.W $5, R7, V25 - WORD $0x302014f9 - // VLDREPL.W $6, R7, V26 - WORD $0x302018fa - // VLDREPL.W $7, R7, V27 - WORD $0x30201cfb - - // add back the initial state to generate the key stream - VADDW V30, V12, V12 // update counter in advance to prevent V30 from being overwritten - VADDW V16, V0, V0 - VADDW V17, V1, V1 - VADDW V18, V2, V2 - VADDW V19, V3, V3 - - // load origin counter + nonce - // VLDREPL.W $0, R9, V28 - WORD $0x3020013c - // VLDREPL.W $0, R8, V29 - WORD $0x3020011d - // VLDREPL.W $1, R8, V30 - WORD $0x3020051e - // VLDREPL.W $2, R8, V31 - WORD $0x3020091f - - VADDW V20, V4, V4 - VADDW V21, V5, V5 - VADDW V22, V6, V6 - VADDW V23, V7, V7 - VADDW V24, V8, V8 - VADDW V25, V9, V9 - VADDW V26, V10, V10 - VADDW V27, V11, V11 - VADDW V28, V12, V12 - VADDW V29, V13, V13 - VADDW V30, V14, V14 - VADDW V31, V15, V15 - - // shuffle - VILVLW V0, V1, V16 - VILVHW V0, V1, V17 - VILVLW V2, V3, V18 - VILVHW V2, V3, V19 - VILVLW V4, V5 ,V20 - VILVHW V4, V5, V21 - VILVLW V6, V7, V22 - VILVHW V6, V7, V23 - VILVLW V8, V9, V24 - VILVHW V8, V9, V25 - VILVLW V10, V11, V26 - VILVHW V10, V11, V27 - VILVLW V12, V13, V28 - VILVHW V12, V13, V29 - VILVLW V14, V15, V30 - VILVHW V14, V15, V31 - VILVLV V16, V18, V0 - VILVHV V16, V18, V4 - VILVLV V17, V19, V8 - VILVHV V17, V19, V12 - - // load src data from R5 - VMOVQ 0(R5), V16 - VMOVQ 16(R5), V17 - VMOVQ 32(R5), V18 - VMOVQ 48(R5), V19 - - VILVLV V20, V22, V1 - VILVHV V20, V22, V5 - VILVLV V21, V23, V9 - VILVHV V21, V23, V13 - - VMOVQ 64(R5), V20 - VMOVQ 80(R5), V21 - VMOVQ 96(R5), V22 - VMOVQ 112(R5), V23 - - VILVLV V24, V26, V2 - VILVHV V24, V26, V6 - VILVLV V25, V27, V10 - VILVHV V25, V27, V14 - - VMOVQ 128(R5), V24 - VMOVQ 144(R5), V25 - VMOVQ 160(R5), V26 - VMOVQ 176(R5), V27 - - VILVLV V28, V30, V3 - VILVHV V28, V30, V7 - VILVLV V29, V31, V11 - VILVHV V29, V31, V15 - - VMOVQ 192(R5), V28 - VMOVQ 208(R5), V29 - VMOVQ 224(R5), V30 - VMOVQ 240(R5), V31 - - VXORV V0, V16, V16 - VXORV V1, V17, V17 - VXORV V2, V18, V18 - VXORV V3, V19, V19 - - VMOVQ V16, 0(R4) - VMOVQ V17, 16(R4) - VMOVQ V18, 32(R4) - VMOVQ V19, 48(R4) - - VXORV V4, V20, V20 - VXORV V5, V21, V21 - VXORV V6, V22, V22 - VXORV V7, V23, V23 - - VMOVQ V20, 64(R4) - VMOVQ V21, 80(R4) - VMOVQ V22, 96(R4) - VMOVQ V23, 112(R4) - - VXORV V8, V24, V24 - VXORV V9, V25, V25 - VXORV V10, V26, V26 - VXORV V11, V27, V27 - - VMOVQ V24, 128(R4) - VMOVQ V25, 144(R4) - VMOVQ V26, 160(R4) - VMOVQ V27, 176(R4) - - VXORV V12, V28, V28 - VXORV V13, V29, V29 - VXORV V14, V30, V30 - VXORV V15, V31, V31 - - VMOVQ V28, 192(R4) - VMOVQ V29, 208(R4) - VMOVQ V30, 224(R4) - VMOVQ V31, 240(R4) - - ADD $4, R12, R12 - MOVW R12, (R9) // update counter - - ADDV $256, R4, R4 - ADDV $256, R5, R5 - SUBV $256, R6, R6 - BNE R6, R0, loop - - RET diff --git a/chacha20/chacha_noasm.go b/chacha20/chacha_noasm.go index 3853cc0..c709b72 100644 --- a/chacha20/chacha_noasm.go +++ b/chacha20/chacha_noasm.go @@ -2,7 +2,7 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -//go:build (!arm64 && !loong64 && !s390x && !ppc64 && !ppc64le) || !gc || purego +//go:build (!arm64 && !s390x && !ppc64 && !ppc64le) || !gc || purego package chacha20 |
