From 7e6b38e0529fbcff585fd741e011f5128cbcd8a5 Mon Sep 17 00:00:00 2001 From: Filippo Valsorda Date: Wed, 23 Oct 2024 11:41:42 +0200 Subject: crypto/internal/mlkem768: move to crypto/internal/fips/mlkem In the process, replace out-of-module imports with their FIPS versions. For #69536 Change-Id: I83e900b7c38ecf760382e5dca7fd0b1eaa5a5589 Reviewed-on: https://go-review.googlesource.com/c/go/+/626879 LUCI-TryBot-Result: Go LUCI Reviewed-by: Russ Cox Auto-Submit: Filippo Valsorda Reviewed-by: Daniel McCarney Reviewed-by: Michael Knyszek --- src/crypto/internal/fips/mlkem/field.go | 455 ++ src/crypto/internal/fips/mlkem/field_test.go | 191 + src/crypto/internal/fips/mlkem/mlkem768.go | 389 ++ src/crypto/internal/fips/mlkem/mlkem768_test.go | 235 + src/crypto/internal/mlkem768/field.go | 456 -- src/crypto/internal/mlkem768/field_test.go | 191 - src/crypto/internal/mlkem768/mlkem768.go | 388 -- src/crypto/internal/mlkem768/mlkem768_test.go | 236 - src/crypto/tls/handshake_client.go | 6 +- src/crypto/tls/handshake_client_tls13.go | 4 +- src/crypto/tls/handshake_server_tls13.go | 4 +- src/crypto/tls/key_schedule.go | 18 +- src/crypto/tls/key_schedule_test.go | 4 +- src/go/build/deps_test.go | 2 +- src/vendor/golang.org/x/crypto/sha3/doc.go | 62 - src/vendor/golang.org/x/crypto/sha3/hashes.go | 109 - .../golang.org/x/crypto/sha3/hashes_noasm.go | 23 - src/vendor/golang.org/x/crypto/sha3/keccakf.go | 414 -- .../golang.org/x/crypto/sha3/keccakf_amd64.go | 13 - .../golang.org/x/crypto/sha3/keccakf_amd64.s | 5419 -------------------- src/vendor/golang.org/x/crypto/sha3/sha3.go | 185 - src/vendor/golang.org/x/crypto/sha3/sha3_s390x.go | 303 -- src/vendor/golang.org/x/crypto/sha3/sha3_s390x.s | 33 - src/vendor/golang.org/x/crypto/sha3/shake.go | 174 - src/vendor/golang.org/x/crypto/sha3/shake_noasm.go | 15 - src/vendor/golang.org/x/crypto/sha3/xor.go | 40 - src/vendor/modules.txt | 1 - 27 files changed, 1289 insertions(+), 8081 deletions(-) create mode 100644 src/crypto/internal/fips/mlkem/field.go create mode 100644 src/crypto/internal/fips/mlkem/field_test.go create mode 100644 src/crypto/internal/fips/mlkem/mlkem768.go create mode 100644 src/crypto/internal/fips/mlkem/mlkem768_test.go delete mode 100644 src/crypto/internal/mlkem768/field.go delete mode 100644 src/crypto/internal/mlkem768/field_test.go delete mode 100644 src/crypto/internal/mlkem768/mlkem768.go delete mode 100644 src/crypto/internal/mlkem768/mlkem768_test.go delete mode 100644 src/vendor/golang.org/x/crypto/sha3/doc.go delete mode 100644 src/vendor/golang.org/x/crypto/sha3/hashes.go delete mode 100644 src/vendor/golang.org/x/crypto/sha3/hashes_noasm.go delete mode 100644 src/vendor/golang.org/x/crypto/sha3/keccakf.go delete mode 100644 src/vendor/golang.org/x/crypto/sha3/keccakf_amd64.go delete mode 100644 src/vendor/golang.org/x/crypto/sha3/keccakf_amd64.s delete mode 100644 src/vendor/golang.org/x/crypto/sha3/sha3.go delete mode 100644 src/vendor/golang.org/x/crypto/sha3/sha3_s390x.go delete mode 100644 src/vendor/golang.org/x/crypto/sha3/sha3_s390x.s delete mode 100644 src/vendor/golang.org/x/crypto/sha3/shake.go delete mode 100644 src/vendor/golang.org/x/crypto/sha3/shake_noasm.go delete mode 100644 src/vendor/golang.org/x/crypto/sha3/xor.go (limited to 'src') diff --git a/src/crypto/internal/fips/mlkem/field.go b/src/crypto/internal/fips/mlkem/field.go new file mode 100644 index 0000000000..9c22ec86f9 --- /dev/null +++ b/src/crypto/internal/fips/mlkem/field.go @@ -0,0 +1,455 @@ +// Copyright 2024 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package mlkem + +import ( + "crypto/internal/fips/sha3" + "errors" + "internal/byteorder" +) + +// fieldElement is an integer modulo q, an element of ℤ_q. It is always reduced. +type fieldElement uint16 + +// fieldCheckReduced checks that a value a is < q. +func fieldCheckReduced(a uint16) (fieldElement, error) { + if a >= q { + return 0, errors.New("unreduced field element") + } + return fieldElement(a), nil +} + +// fieldReduceOnce reduces a value a < 2q. +func fieldReduceOnce(a uint16) fieldElement { + x := a - q + // If x underflowed, then x >= 2¹⁶ - q > 2¹⁵, so the top bit is set. + x += (x >> 15) * q + return fieldElement(x) +} + +func fieldAdd(a, b fieldElement) fieldElement { + x := uint16(a + b) + return fieldReduceOnce(x) +} + +func fieldSub(a, b fieldElement) fieldElement { + x := uint16(a - b + q) + return fieldReduceOnce(x) +} + +const ( + barrettMultiplier = 5039 // 2¹² * 2¹² / q + barrettShift = 24 // log₂(2¹² * 2¹²) +) + +// fieldReduce reduces a value a < 2q² using Barrett reduction, to avoid +// potentially variable-time division. +func fieldReduce(a uint32) fieldElement { + quotient := uint32((uint64(a) * barrettMultiplier) >> barrettShift) + return fieldReduceOnce(uint16(a - quotient*q)) +} + +func fieldMul(a, b fieldElement) fieldElement { + x := uint32(a) * uint32(b) + return fieldReduce(x) +} + +// fieldMulSub returns a * (b - c). This operation is fused to save a +// fieldReduceOnce after the subtraction. +func fieldMulSub(a, b, c fieldElement) fieldElement { + x := uint32(a) * uint32(b-c+q) + return fieldReduce(x) +} + +// fieldAddMul returns a * b + c * d. This operation is fused to save a +// fieldReduceOnce and a fieldReduce. +func fieldAddMul(a, b, c, d fieldElement) fieldElement { + x := uint32(a) * uint32(b) + x += uint32(c) * uint32(d) + return fieldReduce(x) +} + +// compress maps a field element uniformly to the range 0 to 2ᵈ-1, according to +// FIPS 203, Definition 4.7. +func compress(x fieldElement, d uint8) uint16 { + // We want to compute (x * 2ᵈ) / q, rounded to nearest integer, with 1/2 + // rounding up (see FIPS 203, Section 2.3). + + // Barrett reduction produces a quotient and a remainder in the range [0, 2q), + // such that dividend = quotient * q + remainder. + dividend := uint32(x) << d // x * 2ᵈ + quotient := uint32(uint64(dividend) * barrettMultiplier >> barrettShift) + remainder := dividend - quotient*q + + // Since the remainder is in the range [0, 2q), not [0, q), we need to + // portion it into three spans for rounding. + // + // [ 0, q/2 ) -> round to 0 + // [ q/2, q + q/2 ) -> round to 1 + // [ q + q/2, 2q ) -> round to 2 + // + // We can convert that to the following logic: add 1 if remainder > q/2, + // then add 1 again if remainder > q + q/2. + // + // Note that if remainder > x, then ⌊x⌋ - remainder underflows, and the top + // bit of the difference will be set. + quotient += (q/2 - remainder) >> 31 & 1 + quotient += (q + q/2 - remainder) >> 31 & 1 + + // quotient might have overflowed at this point, so reduce it by masking. + var mask uint32 = (1 << d) - 1 + return uint16(quotient & mask) +} + +// decompress maps a number x between 0 and 2ᵈ-1 uniformly to the full range of +// field elements, according to FIPS 203, Definition 4.8. +func decompress(y uint16, d uint8) fieldElement { + // We want to compute (y * q) / 2ᵈ, rounded to nearest integer, with 1/2 + // rounding up (see FIPS 203, Section 2.3). + + dividend := uint32(y) * q + quotient := dividend >> d // (y * q) / 2ᵈ + + // The d'th least-significant bit of the dividend (the most significant bit + // of the remainder) is 1 for the top half of the values that divide to the + // same quotient, which are the ones that round up. + quotient += dividend >> (d - 1) & 1 + + // quotient is at most (2¹¹-1) * q / 2¹¹ + 1 = 3328, so it didn't overflow. + return fieldElement(quotient) +} + +// ringElement is a polynomial, an element of R_q, represented as an array +// according to FIPS 203, Section 2.4.4. +type ringElement [n]fieldElement + +// polyAdd adds two ringElements or nttElements. +func polyAdd[T ~[n]fieldElement](a, b T) (s T) { + for i := range s { + s[i] = fieldAdd(a[i], b[i]) + } + return s +} + +// polySub subtracts two ringElements or nttElements. +func polySub[T ~[n]fieldElement](a, b T) (s T) { + for i := range s { + s[i] = fieldSub(a[i], b[i]) + } + return s +} + +// polyByteEncode appends the 384-byte encoding of f to b. +// +// It implements ByteEncode₁₂, according to FIPS 203, Algorithm 5. +func polyByteEncode[T ~[n]fieldElement](b []byte, f T) []byte { + out, B := sliceForAppend(b, encodingSize12) + for i := 0; i < n; i += 2 { + x := uint32(f[i]) | uint32(f[i+1])<<12 + B[0] = uint8(x) + B[1] = uint8(x >> 8) + B[2] = uint8(x >> 16) + B = B[3:] + } + return out +} + +// polyByteDecode decodes the 384-byte encoding of a polynomial, checking that +// all the coefficients are properly reduced. This fulfills the "Modulus check" +// step of ML-KEM Encapsulation. +// +// It implements ByteDecode₁₂, according to FIPS 203, Algorithm 6. +func polyByteDecode[T ~[n]fieldElement](b []byte) (T, error) { + if len(b) != encodingSize12 { + return T{}, errors.New("mlkem: invalid encoding length") + } + var f T + for i := 0; i < n; i += 2 { + d := uint32(b[0]) | uint32(b[1])<<8 | uint32(b[2])<<16 + const mask12 = 0b1111_1111_1111 + var err error + if f[i], err = fieldCheckReduced(uint16(d & mask12)); err != nil { + return T{}, errors.New("mlkem: invalid polynomial encoding") + } + if f[i+1], err = fieldCheckReduced(uint16(d >> 12)); err != nil { + return T{}, errors.New("mlkem: invalid polynomial encoding") + } + b = b[3:] + } + return f, nil +} + +// sliceForAppend takes a slice and a requested number of bytes. It returns a +// slice with the contents of the given slice followed by that many bytes and a +// second slice that aliases into it and contains only the extra bytes. If the +// original slice has sufficient capacity then no allocation is performed. +func sliceForAppend(in []byte, n int) (head, tail []byte) { + if total := len(in) + n; cap(in) >= total { + head = in[:total] + } else { + head = make([]byte, total) + copy(head, in) + } + tail = head[len(in):] + return +} + +// ringCompressAndEncode1 appends a 32-byte encoding of a ring element to s, +// compressing one coefficients per bit. +// +// It implements Compress₁, according to FIPS 203, Definition 4.7, +// followed by ByteEncode₁, according to FIPS 203, Algorithm 5. +func ringCompressAndEncode1(s []byte, f ringElement) []byte { + s, b := sliceForAppend(s, encodingSize1) + for i := range b { + b[i] = 0 + } + for i := range f { + b[i/8] |= uint8(compress(f[i], 1) << (i % 8)) + } + return s +} + +// ringDecodeAndDecompress1 decodes a 32-byte slice to a ring element where each +// bit is mapped to 0 or ⌈q/2⌋. +// +// It implements ByteDecode₁, according to FIPS 203, Algorithm 6, +// followed by Decompress₁, according to FIPS 203, Definition 4.8. +func ringDecodeAndDecompress1(b *[encodingSize1]byte) ringElement { + var f ringElement + for i := range f { + b_i := b[i/8] >> (i % 8) & 1 + const halfQ = (q + 1) / 2 // ⌈q/2⌋, rounded up per FIPS 203, Section 2.3 + f[i] = fieldElement(b_i) * halfQ // 0 decompresses to 0, and 1 to ⌈q/2⌋ + } + return f +} + +// ringCompressAndEncode4 appends a 128-byte encoding of a ring element to s, +// compressing two coefficients per byte. +// +// It implements Compress₄, according to FIPS 203, Definition 4.7, +// followed by ByteEncode₄, according to FIPS 203, Algorithm 5. +func ringCompressAndEncode4(s []byte, f ringElement) []byte { + s, b := sliceForAppend(s, encodingSize4) + for i := 0; i < n; i += 2 { + b[i/2] = uint8(compress(f[i], 4) | compress(f[i+1], 4)<<4) + } + return s +} + +// ringDecodeAndDecompress4 decodes a 128-byte encoding of a ring element where +// each four bits are mapped to an equidistant distribution. +// +// It implements ByteDecode₄, according to FIPS 203, Algorithm 6, +// followed by Decompress₄, according to FIPS 203, Definition 4.8. +func ringDecodeAndDecompress4(b *[encodingSize4]byte) ringElement { + var f ringElement + for i := 0; i < n; i += 2 { + f[i] = fieldElement(decompress(uint16(b[i/2]&0b1111), 4)) + f[i+1] = fieldElement(decompress(uint16(b[i/2]>>4), 4)) + } + return f +} + +// ringCompressAndEncode10 appends a 320-byte encoding of a ring element to s, +// compressing four coefficients per five bytes. +// +// It implements Compress₁₀, according to FIPS 203, Definition 4.7, +// followed by ByteEncode₁₀, according to FIPS 203, Algorithm 5. +func ringCompressAndEncode10(s []byte, f ringElement) []byte { + s, b := sliceForAppend(s, encodingSize10) + for i := 0; i < n; i += 4 { + var x uint64 + x |= uint64(compress(f[i+0], 10)) + x |= uint64(compress(f[i+1], 10)) << 10 + x |= uint64(compress(f[i+2], 10)) << 20 + x |= uint64(compress(f[i+3], 10)) << 30 + b[0] = uint8(x) + b[1] = uint8(x >> 8) + b[2] = uint8(x >> 16) + b[3] = uint8(x >> 24) + b[4] = uint8(x >> 32) + b = b[5:] + } + return s +} + +// ringDecodeAndDecompress10 decodes a 320-byte encoding of a ring element where +// each ten bits are mapped to an equidistant distribution. +// +// It implements ByteDecode₁₀, according to FIPS 203, Algorithm 6, +// followed by Decompress₁₀, according to FIPS 203, Definition 4.8. +func ringDecodeAndDecompress10(bb *[encodingSize10]byte) ringElement { + b := bb[:] + var f ringElement + for i := 0; i < n; i += 4 { + x := uint64(b[0]) | uint64(b[1])<<8 | uint64(b[2])<<16 | uint64(b[3])<<24 | uint64(b[4])<<32 + b = b[5:] + f[i] = fieldElement(decompress(uint16(x>>0&0b11_1111_1111), 10)) + f[i+1] = fieldElement(decompress(uint16(x>>10&0b11_1111_1111), 10)) + f[i+2] = fieldElement(decompress(uint16(x>>20&0b11_1111_1111), 10)) + f[i+3] = fieldElement(decompress(uint16(x>>30&0b11_1111_1111), 10)) + } + return f +} + +// samplePolyCBD draws a ringElement from the special Dη distribution given a +// stream of random bytes generated by the PRF function, according to FIPS 203, +// Algorithm 8 and Definition 4.3. +func samplePolyCBD(s []byte, b byte) ringElement { + prf := sha3.NewShake256() + prf.Write(s) + prf.Write([]byte{b}) + B := make([]byte, 64*2) // η = 2 + prf.Read(B) + + // SamplePolyCBD simply draws four (2η) bits for each coefficient, and adds + // the first two and subtracts the last two. + + var f ringElement + for i := 0; i < n; i += 2 { + b := B[i/2] + b_7, b_6, b_5, b_4 := b>>7, b>>6&1, b>>5&1, b>>4&1 + b_3, b_2, b_1, b_0 := b>>3&1, b>>2&1, b>>1&1, b&1 + f[i] = fieldSub(fieldElement(b_0+b_1), fieldElement(b_2+b_3)) + f[i+1] = fieldSub(fieldElement(b_4+b_5), fieldElement(b_6+b_7)) + } + return f +} + +// nttElement is an NTT representation, an element of T_q, represented as an +// array according to FIPS 203, Section 2.4.4. +type nttElement [n]fieldElement + +// gammas are the values ζ^2BitRev7(i)+1 mod q for each index i, according to +// FIPS 203, Appendix A (with negative values reduced to positive). +var gammas = [128]fieldElement{17, 3312, 2761, 568, 583, 2746, 2649, 680, 1637, 1692, 723, 2606, 2288, 1041, 1100, 2229, 1409, 1920, 2662, 667, 3281, 48, 233, 3096, 756, 2573, 2156, 1173, 3015, 314, 3050, 279, 1703, 1626, 1651, 1678, 2789, 540, 1789, 1540, 1847, 1482, 952, 2377, 1461, 1868, 2687, 642, 939, 2390, 2308, 1021, 2437, 892, 2388, 941, 733, 2596, 2337, 992, 268, 3061, 641, 2688, 1584, 1745, 2298, 1031, 2037, 1292, 3220, 109, 375, 2954, 2549, 780, 2090, 1239, 1645, 1684, 1063, 2266, 319, 3010, 2773, 556, 757, 2572, 2099, 1230, 561, 2768, 2466, 863, 2594, 735, 2804, 525, 1092, 2237, 403, 2926, 1026, 2303, 1143, 2186, 2150, 1179, 2775, 554, 886, 2443, 1722, 1607, 1212, 2117, 1874, 1455, 1029, 2300, 2110, 1219, 2935, 394, 885, 2444, 2154, 1175} + +// nttMul multiplies two nttElements. +// +// It implements MultiplyNTTs, according to FIPS 203, Algorithm 11. +func nttMul(f, g nttElement) nttElement { + var h nttElement + // We use i += 2 for bounds check elimination. See https://go.dev/issue/66826. + for i := 0; i < 256; i += 2 { + a0, a1 := f[i], f[i+1] + b0, b1 := g[i], g[i+1] + h[i] = fieldAddMul(a0, b0, fieldMul(a1, b1), gammas[i/2]) + h[i+1] = fieldAddMul(a0, b1, a1, b0) + } + return h +} + +// zetas are the values ζ^BitRev7(k) mod q for each index k, according to FIPS +// 203, Appendix A. +var zetas = [128]fieldElement{1, 1729, 2580, 3289, 2642, 630, 1897, 848, 1062, 1919, 193, 797, 2786, 3260, 569, 1746, 296, 2447, 1339, 1476, 3046, 56, 2240, 1333, 1426, 2094, 535, 2882, 2393, 2879, 1974, 821, 289, 331, 3253, 1756, 1197, 2304, 2277, 2055, 650, 1977, 2513, 632, 2865, 33, 1320, 1915, 2319, 1435, 807, 452, 1438, 2868, 1534, 2402, 2647, 2617, 1481, 648, 2474, 3110, 1227, 910, 17, 2761, 583, 2649, 1637, 723, 2288, 1100, 1409, 2662, 3281, 233, 756, 2156, 3015, 3050, 1703, 1651, 2789, 1789, 1847, 952, 1461, 2687, 939, 2308, 2437, 2388, 733, 2337, 268, 641, 1584, 2298, 2037, 3220, 375, 2549, 2090, 1645, 1063, 319, 2773, 757, 2099, 561, 2466, 2594, 2804, 1092, 403, 1026, 1143, 2150, 2775, 886, 1722, 1212, 1874, 1029, 2110, 2935, 885, 2154} + +// ntt maps a ringElement to its nttElement representation. +// +// It implements NTT, according to FIPS 203, Algorithm 9. +func ntt(f ringElement) nttElement { + k := 1 + for len := 128; len >= 2; len /= 2 { + for start := 0; start < 256; start += 2 * len { + zeta := zetas[k] + k++ + // Bounds check elimination hint. + f, flen := f[start:start+len], f[start+len:start+len+len] + for j := 0; j < len; j++ { + t := fieldMul(zeta, flen[j]) + flen[j] = fieldSub(f[j], t) + f[j] = fieldAdd(f[j], t) + } + } + } + return nttElement(f) +} + +// inverseNTT maps a nttElement back to the ringElement it represents. +// +// It implements NTT⁻¹, according to FIPS 203, Algorithm 10. +func inverseNTT(f nttElement) ringElement { + k := 127 + for len := 2; len <= 128; len *= 2 { + for start := 0; start < 256; start += 2 * len { + zeta := zetas[k] + k-- + // Bounds check elimination hint. + f, flen := f[start:start+len], f[start+len:start+len+len] + for j := 0; j < len; j++ { + t := f[j] + f[j] = fieldAdd(t, flen[j]) + flen[j] = fieldMulSub(zeta, flen[j], t) + } + } + } + for i := range f { + f[i] = fieldMul(f[i], 3303) // 3303 = 128⁻¹ mod q + } + return ringElement(f) +} + +// sampleNTT draws a uniformly random nttElement from a stream of uniformly +// random bytes generated by the XOF function, according to FIPS 203, +// Algorithm 7. +func sampleNTT(rho []byte, ii, jj byte) nttElement { + B := sha3.NewShake128() + B.Write(rho) + B.Write([]byte{ii, jj}) + + // SampleNTT essentially draws 12 bits at a time from r, interprets them in + // little-endian, and rejects values higher than q, until it drew 256 + // values. (The rejection rate is approximately 19%.) + // + // To do this from a bytes stream, it draws three bytes at a time, and + // splits them into two uint16 appropriately masked. + // + // r₀ r₁ r₂ + // |- - - - - - - -|- - - - - - - -|- - - - - - - -| + // + // Uint16(r₀ || r₁) + // |- - - - - - - - - - - - - - - -| + // |- - - - - - - - - - - -| + // d₁ + // + // Uint16(r₁ || r₂) + // |- - - - - - - - - - - - - - - -| + // |- - - - - - - - - - - -| + // d₂ + // + // Note that in little-endian, the rightmost bits are the most significant + // bits (dropped with a mask) and the leftmost bits are the least + // significant bits (dropped with a right shift). + + var a nttElement + var j int // index into a + var buf [24]byte // buffered reads from B + off := len(buf) // index into buf, starts in a "buffer fully consumed" state + for { + if off >= len(buf) { + B.Read(buf[:]) + off = 0 + } + d1 := byteorder.LeUint16(buf[off:]) & 0b1111_1111_1111 + d2 := byteorder.LeUint16(buf[off+1:]) >> 4 + off += 3 + if d1 < q { + a[j] = fieldElement(d1) + j++ + } + if j >= len(a) { + break + } + if d2 < q { + a[j] = fieldElement(d2) + j++ + } + if j >= len(a) { + break + } + } + return a +} diff --git a/src/crypto/internal/fips/mlkem/field_test.go b/src/crypto/internal/fips/mlkem/field_test.go new file mode 100644 index 0000000000..a842913627 --- /dev/null +++ b/src/crypto/internal/fips/mlkem/field_test.go @@ -0,0 +1,191 @@ +// Copyright 2023 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package mlkem + +import ( + "math/big" + "strconv" + "testing" +) + +func TestFieldReduce(t *testing.T) { + for a := uint32(0); a < 2*q*q; a++ { + got := fieldReduce(a) + exp := fieldElement(a % q) + if got != exp { + t.Fatalf("reduce(%d) = %d, expected %d", a, got, exp) + } + } +} + +func TestFieldAdd(t *testing.T) { + for a := fieldElement(0); a < q; a++ { + for b := fieldElement(0); b < q; b++ { + got := fieldAdd(a, b) + exp := (a + b) % q + if got != exp { + t.Fatalf("%d + %d = %d, expected %d", a, b, got, exp) + } + } + } +} + +func TestFieldSub(t *testing.T) { + for a := fieldElement(0); a < q; a++ { + for b := fieldElement(0); b < q; b++ { + got := fieldSub(a, b) + exp := (a - b + q) % q + if got != exp { + t.Fatalf("%d - %d = %d, expected %d", a, b, got, exp) + } + } + } +} + +func TestFieldMul(t *testing.T) { + for a := fieldElement(0); a < q; a++ { + for b := fieldElement(0); b < q; b++ { + got := fieldMul(a, b) + exp := fieldElement((uint32(a) * uint32(b)) % q) + if got != exp { + t.Fatalf("%d * %d = %d, expected %d", a, b, got, exp) + } + } + } +} + +func TestDecompressCompress(t *testing.T) { + for _, bits := range []uint8{1, 4, 10} { + for a := uint16(0); a < 1<= q { + t.Fatalf("decompress(%d, %d) = %d >= q", a, bits, f) + } + got := compress(f, bits) + if got != a { + t.Fatalf("compress(decompress(%d, %d), %d) = %d", a, bits, bits, got) + } + } + + for a := fieldElement(0); a < q; a++ { + c := compress(a, bits) + if c >= 1<= 2^bits", a, bits, c) + } + got := decompress(c, bits) + diff := min(a-got, got-a, a-got+q, got-a+q) + ceil := q / (1 << bits) + if diff > fieldElement(ceil) { + t.Fatalf("decompress(compress(%d, %d), %d) = %d (diff %d, max diff %d)", + a, bits, bits, got, diff, ceil) + } + } + } +} + +func CompressRat(x fieldElement, d uint8) uint16 { + if x >= q { + panic("x out of range") + } + if d <= 0 || d >= 12 { + panic("d out of range") + } + + precise := big.NewRat((1<= 1<= 12 { + panic("d out of range") + } + + precise := big.NewRat(q*int64(y), 1<>7 != 0 { + panic("not 7 bits") + } + var r uint8 + r |= n >> 6 & 0b0000_0001 + r |= n >> 4 & 0b0000_0010 + r |= n >> 2 & 0b0000_0100 + r |= n /**/ & 0b0000_1000 + r |= n << 2 & 0b0001_0000 + r |= n << 4 & 0b0010_0000 + r |= n << 6 & 0b0100_0000 + return r +} + +func TestZetas(t *testing.T) { + ζ := big.NewInt(17) + q := big.NewInt(q) + for k, zeta := range zetas { + // ζ^BitRev7(k) mod q + exp := new(big.Int).Exp(ζ, big.NewInt(int64(BitRev7(uint8(k)))), q) + if big.NewInt(int64(zeta)).Cmp(exp) != 0 { + t.Errorf("zetas[%d] = %v, expected %v", k, zeta, exp) + } + } +} + +func TestGammas(t *testing.T) { + ζ := big.NewInt(17) + q := big.NewInt(q) + for k, gamma := range gammas { + // ζ^2BitRev7(i)+1 + exp := new(big.Int).Exp(ζ, big.NewInt(int64(BitRev7(uint8(k)))*2+1), q) + if big.NewInt(int64(gamma)).Cmp(exp) != 0 { + t.Errorf("gammas[%d] = %v, expected %v", k, gamma, exp) + } + } +} diff --git a/src/crypto/internal/fips/mlkem/mlkem768.go b/src/crypto/internal/fips/mlkem/mlkem768.go new file mode 100644 index 0000000000..afbd31abe5 --- /dev/null +++ b/src/crypto/internal/fips/mlkem/mlkem768.go @@ -0,0 +1,389 @@ +// Copyright 2023 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package mlkem implements the quantum-resistant key encapsulation method +// ML-KEM (formerly known as Kyber), as specified in [NIST FIPS 203]. +// +// Only the recommended ML-KEM-768 parameter set is provided. +// +// [NIST FIPS 203]: https://doi.org/10.6028/NIST.FIPS.203 +package mlkem + +// This package targets security, correctness, simplicity, readability, and +// reviewability as its primary goals. All critical operations are performed in +// constant time. +// +// Variable and function names, as well as code layout, are selected to +// facilitate reviewing the implementation against the NIST FIPS 203 document. +// +// Reviewers unfamiliar with polynomials or linear algebra might find the +// background at https://words.filippo.io/kyber-math/ useful. + +import ( + "crypto/internal/fips/drbg" + "crypto/internal/fips/sha3" + "crypto/internal/fips/subtle" + "errors" +) + +const ( + // ML-KEM global constants. + n = 256 + q = 3329 + + // encodingSizeX is the byte size of a ringElement or nttElement encoded + // by ByteEncode_X (FIPS 203, Algorithm 5). + encodingSize12 = n * 12 / 8 + encodingSize10 = n * 10 / 8 + encodingSize4 = n * 4 / 8 + encodingSize1 = n * 1 / 8 + + messageSize = encodingSize1 + + SharedKeySize = 32 + SeedSize = 32 + 32 +) + +// ML-KEM-768 parameters. +const ( + k = 3 + + decryptionKeySize = k * encodingSize12 + encryptionKeySize = k*encodingSize12 + 32 + + CiphertextSize768 = k*encodingSize10 + encodingSize4 + EncapsulationKeySize768 = encryptionKeySize +) + +// A DecapsulationKey768 is the secret key used to decapsulate a shared key from a +// ciphertext. It includes various precomputed values. +type DecapsulationKey768 struct { + d [32]byte // decapsulation key seed + z [32]byte // implicit rejection sampling seed + + ρ [32]byte // sampleNTT seed for A, stored for the encapsulation key + h [32]byte // H(ek), stored for ML-KEM.Decaps_internal + + encryptionKey + decryptionKey +} + +// Bytes returns the decapsulation key as a 64-byte seed in the "d || z" form. +// +// The decapsulation key must be kept secret. +func (dk *DecapsulationKey768) Bytes() []byte { + var b [SeedSize]byte + copy(b[:], dk.d[:]) + copy(b[32:], dk.z[:]) + return b[:] +} + +// EncapsulationKey returns the public encapsulation key necessary to produce +// ciphertexts. +func (dk *DecapsulationKey768) EncapsulationKey() *EncapsulationKey768 { + return &EncapsulationKey768{ + ρ: dk.ρ, + h: dk.h, + encryptionKey: dk.encryptionKey, + } +} + +// An EncapsulationKey768 is the public key used to produce ciphertexts to be +// decapsulated by the corresponding [DecapsulationKey768]. +type EncapsulationKey768 struct { + ρ [32]byte // sampleNTT seed for A + h [32]byte // H(ek) + encryptionKey +} + +// Bytes returns the encapsulation key as a byte slice. +func (ek *EncapsulationKey768) Bytes() []byte { + // The actual logic is in a separate function to outline this allocation. + b := make([]byte, 0, EncapsulationKeySize768) + return ek.bytes(b) +} + +func (ek *EncapsulationKey768) bytes(b []byte) []byte { + for i := range ek.t { + b = polyByteEncode(b, ek.t[i]) + } + b = append(b, ek.ρ[:]...) + return b +} + +// encryptionKey is the parsed and expanded form of a PKE encryption key. +type encryptionKey struct { + t [k]nttElement // ByteDecode₁₂(ek[:384k]) + a [k * k]nttElement // A[i*k+j] = sampleNTT(ρ, j, i) +} + +// decryptionKey is the parsed and expanded form of a PKE decryption key. +type decryptionKey struct { + s [k]nttElement // ByteDecode₁₂(dk[:decryptionKeySize]) +} + +// GenerateKey768 generates a new decapsulation key, drawing random bytes from +// a DRBG. The decapsulation key must be kept secret. +func GenerateKey768() (*DecapsulationKey768, error) { + // The actual logic is in a separate function to outline this allocation. + dk := &DecapsulationKey768{} + return generateKey(dk), nil +} + +func generateKey(dk *DecapsulationKey768) *DecapsulationKey768 { + var d [32]byte + drbg.Read(d[:]) + var z [32]byte + drbg.Read(z[:]) + return kemKeyGen(dk, &d, &z) +} + +// NewDecapsulationKey768 parses a decapsulation key from a 64-byte +// seed in the "d || z" form. The seed must be uniformly random. +func NewDecapsulationKey768(seed []byte) (*DecapsulationKey768, error) { + // The actual logic is in a separate function to outline this allocation. + dk := &DecapsulationKey768{} + return newKeyFromSeed(dk, seed) +} + +func newKeyFromSeed(dk *DecapsulationKey768, seed []byte) (*DecapsulationKey768, error) { + if len(seed) != SeedSize { + return nil, errors.New("mlkem: invalid seed length") + } + d := (*[32]byte)(seed[:32]) + z := (*[32]byte)(seed[32:]) + return kemKeyGen(dk, d, z), nil +} + +// kemKeyGen generates a decapsulation key. +// +// It implements ML-KEM.KeyGen_internal according to FIPS 203, Algorithm 16, and +// K-PKE.KeyGen according to FIPS 203, Algorithm 13. The two are merged to save +// copies and allocations. +func kemKeyGen(dk *DecapsulationKey768, d, z *[32]byte) *DecapsulationKey768 { + if dk == nil { + dk = &DecapsulationKey768{} + } + dk.d = *d + dk.z = *z + + g := sha3.New512() + g.Write(d[:]) + g.Write([]byte{k}) // Module dimension as a domain separator. + G := g.Sum(make([]byte, 0, 64)) + ρ, σ := G[:32], G[32:] + dk.ρ = [32]byte(ρ) + + A := &dk.a + for i := byte(0); i < k; i++ { + for j := byte(0); j < k; j++ { + A[i*k+j] = sampleNTT(ρ, j, i) + } + } + + var N byte + s := &dk.s + for i := range s { + s[i] = ntt(samplePolyCBD(σ, N)) + N++ + } + e := make([]nttElement, k) + for i := range e { + e[i] = ntt(samplePolyCBD(σ, N)) + N++ + } + + t := &dk.t + for i := range t { // t = A ◦ s + e + t[i] = e[i] + for j := range s { + t[i] = polyAdd(t[i], nttMul(A[i*k+j], s[j])) + } + } + + H := sha3.New256() + ek := dk.EncapsulationKey().Bytes() + H.Write(ek) + H.Sum(dk.h[:0]) + + return dk +} + +// Encapsulate generates a shared key and an associated ciphertext from an +// encapsulation key, drawing random bytes from a DRBG. +// +// The shared key must be kept secret. +func (ek *EncapsulationKey768) Encapsulate() (ciphertext, sharedKey []byte) { + // The actual logic is in a separate function to outline this allocation. + var cc [CiphertextSize768]byte + return ek.encapsulate(&cc) +} + +func (ek *EncapsulationKey768) encapsulate(cc *[CiphertextSize768]byte) (ciphertext, sharedKey []byte) { + var m [messageSize]byte + drbg.Read(m[:]) + // Note that the modulus check (step 2 of the encapsulation key check from + // FIPS 203, Section 7.2) is performed by polyByteDecode in parseEK. + return kemEncaps(cc, ek, &m) +} + +// kemEncaps generates a shared key and an associated ciphertext. +// +// It implements ML-KEM.Encaps_internal according to FIPS 203, Algorithm 17. +func kemEncaps(cc *[CiphertextSize768]byte, ek *EncapsulationKey768, m *[messageSize]byte) (c, K []byte) { + if cc == nil { + cc = &[CiphertextSize768]byte{} + } + + g := sha3.New512() + g.Write(m[:]) + g.Write(ek.h[:]) + G := g.Sum(nil) + K, r := G[:SharedKeySize], G[SharedKeySize:] + c = pkeEncrypt(cc, &ek.encryptionKey, m, r) + return c, K +} + +// NewEncapsulationKey768 parses an encapsulation key from its encoded form. +// If the encapsulation key is not valid, NewEncapsulationKey768 returns an error. +func NewEncapsulationKey768(encapsulationKey []byte) (*EncapsulationKey768, error) { + // The actual logic is in a separate function to outline this allocation. + ek := &EncapsulationKey768{} + return parseEK(ek, encapsulationKey) +} + +// parseEK parses an encryption key from its encoded form. +// +// It implements the initial stages of K-PKE.Encrypt according to FIPS 203, +// Algorithm 14. +func parseEK(ek *EncapsulationKey768, ekPKE []byte) (*EncapsulationKey768, error) { + if len(ekPKE) != encryptionKeySize { + return nil, errors.New("mlkem: invalid encapsulation key length") + } + + h := sha3.New256() + h.Write(ekPKE) + h.Sum(ek.h[:0]) + + for i := range ek.t { + var err error + ek.t[i], err = polyByteDecode[nttElement](ekPKE[:encodingSize12]) + if err != nil { + return nil, err + } + ekPKE = ekPKE[encodingSize12:] + } + copy(ek.ρ[:], ekPKE) + + for i := byte(0); i < k; i++ { + for j := byte(0); j < k; j++ { + ek.a[i*k+j] = sampleNTT(ek.ρ[:], j, i) + } + } + + return ek, nil +} + +// pkeEncrypt encrypt a plaintext message. +// +// It implements K-PKE.Encrypt according to FIPS 203, Algorithm 14, although the +// computation of t and AT is done in parseEK. +func pkeEncrypt(cc *[CiphertextSize768]byte, ex *encryptionKey, m *[messageSize]byte, rnd []byte) []byte { + var N byte + r, e1 := make([]nttElement, k), make([]ringElement, k) + for i := range r { + r[i] = ntt(samplePolyCBD(rnd, N)) + N++ + } + for i := range e1 { + e1[i] = samplePolyCBD(rnd, N) + N++ + } + e2 := samplePolyCBD(rnd, N) + + u := make([]ringElement, k) // NTT⁻¹(AT ◦ r) + e1 + for i := range u { + u[i] = e1[i] + for j := range r { + // Note that i and j are inverted, as we need the transposed of A. + u[i] = polyAdd(u[i], inverseNTT(nttMul(ex.a[j*k+i], r[j]))) + } + } + + μ := ringDecodeAndDecompress1(m) + + var vNTT nttElement // t⊺ ◦ r + for i := range ex.t { + vNTT = polyAdd(vNTT, nttMul(ex.t[i], r[i])) + } + v := polyAdd(polyAdd(inverseNTT(vNTT), e2), μ) + + c := cc[:0] + for _, f := range u { + c = ringCompressAndEncode10(c, f) + } + c = ringCompressAndEncode4(c, v) + + return c +} + +// Decapsulate generates a shared key from a ciphertext and a decapsulation key. +// If the ciphertext is not valid, Decapsulate returns an error. +// +// The shared key must be kept secret. +func (dk *DecapsulationKey768) Decapsulate(ciphertext []byte) (sharedKey []byte, err error) { + if len(ciphertext) != CiphertextSize768 { + return nil, errors.New("mlkem: invalid ciphertext length") + } + c := (*[CiphertextSize768]byte)(ciphertext) + // Note that the hash check (step 3 of the decapsulation input check from + // FIPS 203, Section 7.3) is foregone as a DecapsulationKey is always + // validly generated by ML-KEM.KeyGen_internal. + return kemDecaps(dk, c), nil +} + +// kemDecaps produces a shared key from a ciphertext. +// +// It implements ML-KEM.Decaps_internal according to FIPS 203, Algorithm 18. +func kemDecaps(dk *DecapsulationKey768, c *[CiphertextSize768]byte) (K []byte) { + m := pkeDecrypt(&dk.decryptionKey, c) + g := sha3.New512() + g.Write(m[:]) + g.Write(dk.h[:]) + G := g.Sum(make([]byte, 0, 64)) + Kprime, r := G[:SharedKeySize], G[SharedKeySize:] + J := sha3.NewShake256() + J.Write(dk.z[:]) + J.Write(c[:]) + Kout := make([]byte, SharedKeySize) + J.Read(Kout) + var cc [CiphertextSize768]byte + c1 := pkeEncrypt(&cc, &dk.encryptionKey, (*[32]byte)(m), r) + + subtle.ConstantTimeCopy(subtle.ConstantTimeCompare(c[:], c1), Kout, Kprime) + return Kout +} + +// pkeDecrypt decrypts a ciphertext. +// +// It implements K-PKE.Decrypt according to FIPS 203, Algorithm 15, +// although s is retained from kemKeyGen. +func pkeDecrypt(dx *decryptionKey, c *[CiphertextSize768]byte) []byte { + u := make([]ringElement, k) + for i := range u { + b := (*[encodingSize10]byte)(c[encodingSize10*i : encodingSize10*(i+1)]) + u[i] = ringDecodeAndDecompress10(b) + } + + b := (*[encodingSize4]byte)(c[encodingSize10*k:]) + v := ringDecodeAndDecompress4(b) + + var mask nttElement // s⊺ ◦ NTT(u) + for i := range dx.s { + mask = polyAdd(mask, nttMul(dx.s[i], ntt(u[i]))) + } + w := polySub(v, inverseNTT(mask)) + + return ringCompressAndEncode1(nil, w) +} diff --git a/src/crypto/internal/fips/mlkem/mlkem768_test.go b/src/crypto/internal/fips/mlkem/mlkem768_test.go new file mode 100644 index 0000000000..28d17fe81a --- /dev/null +++ b/src/crypto/internal/fips/mlkem/mlkem768_test.go @@ -0,0 +1,235 @@ +// Copyright 2023 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package mlkem + +import ( + "bytes" + "crypto/internal/fips/sha3" + "crypto/rand" + _ "embed" + "encoding/hex" + "flag" + "testing" +) + +func TestRoundTrip(t *testing.T) { + dk, err := GenerateKey768() + if err != nil { + t.Fatal(err) + } + c, Ke := dk.EncapsulationKey().Encapsulate() + Kd, err := dk.Decapsulate(c) + if err != nil { + t.Fatal(err) + } + if !bytes.Equal(Ke, Kd) { + t.Fail() + } + + dk1, err := GenerateKey768() + if err != nil { + t.Fatal(err) + } + if bytes.Equal(dk.EncapsulationKey().Bytes(), dk1.EncapsulationKey().Bytes()) { + t.Fail() + } + if bytes.Equal(dk.Bytes(), dk1.Bytes()) { + t.Fail() + } + + c1, Ke1 := dk.EncapsulationKey().Encapsulate() + if bytes.Equal(c, c1) { + t.Fail() + } + if bytes.Equal(Ke, Ke1) { + t.Fail() + } +} + +func TestBadLengths(t *testing.T) { + dk, err := GenerateKey768() + if err != nil { + t.Fatal(err) + } + ek := dk.EncapsulationKey() + ekBytes := dk.EncapsulationKey().Bytes() + c, _ := ek.Encapsulate() + + for i := 0; i < len(ekBytes)-1; i++ { + if _, err := NewEncapsulationKey768(ekBytes[:i]); err == nil { + t.Errorf("expected error for ek length %d", i) + } + } + ekLong := ekBytes + for i := 0; i < 100; i++ { + ekLong = append(ekLong, 0) + if _, err := NewEncapsulationKey768(ekLong); err == nil { + t.Errorf("expected error for ek length %d", len(ekLong)) + } + } + + for i := 0; i < len(c)-1; i++ { + if _, err := dk.Decapsulate(c[:i]); err == nil { + t.Errorf("expected error for c length %d", i) + } + } + cLong := c + for i := 0; i < 100; i++ { + cLong = append(cLong, 0) + if _, err := dk.Decapsulate(cLong); err == nil { + t.Errorf("expected error for c length %d", len(cLong)) + } + } +} + +var millionFlag = flag.Bool("million", false, "run the million vector test") + +// TestAccumulated accumulates 10k (or 100, or 1M) random vectors and checks the +// hash of the result, to avoid checking in 150MB of test vectors. +func TestAccumulated(t *testing.T) { + n := 10000 + expected := "8a518cc63da366322a8e7a818c7a0d63483cb3528d34a4cf42f35d5ad73f22fc" + if testing.Short() { + n = 100 + expected = "1114b1b6699ed191734fa339376afa7e285c9e6acf6ff0177d346696ce564415" + } + if *millionFlag { + n = 1000000 + expected = "424bf8f0e8ae99b78d788a6e2e8e9cdaf9773fc0c08a6f433507cb559edfd0f0" + } + + s := sha3.NewShake128() + o := sha3.NewShake128() + seed := make([]byte, SeedSize) + var msg [messageSize]byte + ct1 := make([]byte, CiphertextSize768) + + for i := 0; i < n; i++ { + s.Read(seed) + dk, err := NewDecapsulationKey768(seed) + if err != nil { + t.Fatal(err) + } + ek := dk.EncapsulationKey() + o.Write(ek.Bytes()) + + s.Read(msg[:]) + ct, k := kemEncaps(nil, ek, &msg) + o.Write(ct) + o.Write(k) + + kk, err := dk.Decapsulate(ct) + if err != nil { + t.Fatal(err) + } + if !bytes.Equal(kk, k) { + t.Errorf("k: got %x, expected %x", kk, k) + } + + s.Read(ct1) + k1, err := dk.Decapsulate(ct1) + if err != nil { + t.Fatal(err) + } + o.Write(k1) + } + + got := hex.EncodeToString(o.Sum(nil)) + if got != expected { + t.Errorf("got %s, expected %s", got, expected) + } +} + +var sink byte + +func BenchmarkKeyGen(b *testing.B) { + var dk DecapsulationKey768 + var d, z [32]byte + rand.Read(d[:]) + rand.Read(z[:]) + b.ResetTimer() + for i := 0; i < b.N; i++ { + dk := kemKeyGen(&dk, &d, &z) + sink ^= dk.EncapsulationKey().Bytes()[0] + } +} + +func BenchmarkEncaps(b *testing.B) { + seed := make([]byte, SeedSize) + rand.Read(seed) + var m [messageSize]byte + rand.Read(m[:]) + dk, err := NewDecapsulationKey768(seed) + if err != nil { + b.Fatal(err) + } + ekBytes := dk.EncapsulationKey().Bytes() + var c [CiphertextSize768]byte + b.ResetTimer() + for i := 0; i < b.N; i++ { + ek, err := NewEncapsulationKey768(ekBytes) + if err != nil { + b.Fatal(err) + } + c, K := kemEncaps(&c, ek, &m) + sink ^= c[0] ^ K[0] + } +} + +func BenchmarkDecaps(b *testing.B) { + dk, err := GenerateKey768() + if err != nil { + b.Fatal(err) + } + ek := dk.EncapsulationKey() + c, _ := ek.Encapsulate() + b.ResetTimer() + for i := 0; i < b.N; i++ { + K := kemDecaps(dk, (*[CiphertextSize768]byte)(c)) + sink ^= K[0] + } +} + +func BenchmarkRoundTrip(b *testing.B) { + dk, err := GenerateKey768() + if err != nil { + b.Fatal(err) + } + ek := dk.EncapsulationKey() + ekBytes := ek.Bytes() + c, _ := ek.Encapsulate() + if err != nil { + b.Fatal(err) + } + b.Run("Alice", func(b *testing.B) { + for i := 0; i < b.N; i++ { + dkS, err := GenerateKey768() + if err != nil { + b.Fatal(err) + } + ekS := dkS.EncapsulationKey().Bytes() + sink ^= ekS[0] + + Ks, err := dk.Decapsulate(c) + if err != nil { + b.Fatal(err) + } + sink ^= Ks[0] + } + }) + b.Run("Bob", func(b *testing.B) { + for i := 0; i < b.N; i++ { + ek, err := NewEncapsulationKey768(ekBytes) + if err != nil { + b.Fatal(err) + } + cS, Ks := ek.Encapsulate() + if err != nil { + b.Fatal(err) + } + sink ^= cS[0] ^ Ks[0] + } + }) +} diff --git a/src/crypto/internal/mlkem768/field.go b/src/crypto/internal/mlkem768/field.go deleted file mode 100644 index e0cde03354..0000000000 --- a/src/crypto/internal/mlkem768/field.go +++ /dev/null @@ -1,456 +0,0 @@ -// Copyright 2024 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -package mlkem768 - -import ( - "errors" - "internal/byteorder" - - "golang.org/x/crypto/sha3" -) - -// fieldElement is an integer modulo q, an element of ℤ_q. It is always reduced. -type fieldElement uint16 - -// fieldCheckReduced checks that a value a is < q. -func fieldCheckReduced(a uint16) (fieldElement, error) { - if a >= q { - return 0, errors.New("unreduced field element") - } - return fieldElement(a), nil -} - -// fieldReduceOnce reduces a value a < 2q. -func fieldReduceOnce(a uint16) fieldElement { - x := a - q - // If x underflowed, then x >= 2¹⁶ - q > 2¹⁵, so the top bit is set. - x += (x >> 15) * q - return fieldElement(x) -} - -func fieldAdd(a, b fieldElement) fieldElement { - x := uint16(a + b) - return fieldReduceOnce(x) -} - -func fieldSub(a, b fieldElement) fieldElement { - x := uint16(a - b + q) - return fieldReduceOnce(x) -} - -const ( - barrettMultiplier = 5039 // 2¹² * 2¹² / q - barrettShift = 24 // log₂(2¹² * 2¹²) -) - -// fieldReduce reduces a value a < 2q² using Barrett reduction, to avoid -// potentially variable-time division. -func fieldReduce(a uint32) fieldElement { - quotient := uint32((uint64(a) * barrettMultiplier) >> barrettShift) - return fieldReduceOnce(uint16(a - quotient*q)) -} - -func fieldMul(a, b fieldElement) fieldElement { - x := uint32(a) * uint32(b) - return fieldReduce(x) -} - -// fieldMulSub returns a * (b - c). This operation is fused to save a -// fieldReduceOnce after the subtraction. -func fieldMulSub(a, b, c fieldElement) fieldElement { - x := uint32(a) * uint32(b-c+q) - return fieldReduce(x) -} - -// fieldAddMul returns a * b + c * d. This operation is fused to save a -// fieldReduceOnce and a fieldReduce. -func fieldAddMul(a, b, c, d fieldElement) fieldElement { - x := uint32(a) * uint32(b) - x += uint32(c) * uint32(d) - return fieldReduce(x) -} - -// compress maps a field element uniformly to the range 0 to 2ᵈ-1, according to -// FIPS 203, Definition 4.7. -func compress(x fieldElement, d uint8) uint16 { - // We want to compute (x * 2ᵈ) / q, rounded to nearest integer, with 1/2 - // rounding up (see FIPS 203, Section 2.3). - - // Barrett reduction produces a quotient and a remainder in the range [0, 2q), - // such that dividend = quotient * q + remainder. - dividend := uint32(x) << d // x * 2ᵈ - quotient := uint32(uint64(dividend) * barrettMultiplier >> barrettShift) - remainder := dividend - quotient*q - - // Since the remainder is in the range [0, 2q), not [0, q), we need to - // portion it into three spans for rounding. - // - // [ 0, q/2 ) -> round to 0 - // [ q/2, q + q/2 ) -> round to 1 - // [ q + q/2, 2q ) -> round to 2 - // - // We can convert that to the following logic: add 1 if remainder > q/2, - // then add 1 again if remainder > q + q/2. - // - // Note that if remainder > x, then ⌊x⌋ - remainder underflows, and the top - // bit of the difference will be set. - quotient += (q/2 - remainder) >> 31 & 1 - quotient += (q + q/2 - remainder) >> 31 & 1 - - // quotient might have overflowed at this point, so reduce it by masking. - var mask uint32 = (1 << d) - 1 - return uint16(quotient & mask) -} - -// decompress maps a number x between 0 and 2ᵈ-1 uniformly to the full range of -// field elements, according to FIPS 203, Definition 4.8. -func decompress(y uint16, d uint8) fieldElement { - // We want to compute (y * q) / 2ᵈ, rounded to nearest integer, with 1/2 - // rounding up (see FIPS 203, Section 2.3). - - dividend := uint32(y) * q - quotient := dividend >> d // (y * q) / 2ᵈ - - // The d'th least-significant bit of the dividend (the most significant bit - // of the remainder) is 1 for the top half of the values that divide to the - // same quotient, which are the ones that round up. - quotient += dividend >> (d - 1) & 1 - - // quotient is at most (2¹¹-1) * q / 2¹¹ + 1 = 3328, so it didn't overflow. - return fieldElement(quotient) -} - -// ringElement is a polynomial, an element of R_q, represented as an array -// according to FIPS 203, Section 2.4.4. -type ringElement [n]fieldElement - -// polyAdd adds two ringElements or nttElements. -func polyAdd[T ~[n]fieldElement](a, b T) (s T) { - for i := range s { - s[i] = fieldAdd(a[i], b[i]) - } - return s -} - -// polySub subtracts two ringElements or nttElements. -func polySub[T ~[n]fieldElement](a, b T) (s T) { - for i := range s { - s[i] = fieldSub(a[i], b[i]) - } - return s -} - -// polyByteEncode appends the 384-byte encoding of f to b. -// -// It implements ByteEncode₁₂, according to FIPS 203, Algorithm 5. -func polyByteEncode[T ~[n]fieldElement](b []byte, f T) []byte { - out, B := sliceForAppend(b, encodingSize12) - for i := 0; i < n; i += 2 { - x := uint32(f[i]) | uint32(f[i+1])<<12 - B[0] = uint8(x) - B[1] = uint8(x >> 8) - B[2] = uint8(x >> 16) - B = B[3:] - } - return out -} - -// polyByteDecode decodes the 384-byte encoding of a polynomial, checking that -// all the coefficients are properly reduced. This fulfills the "Modulus check" -// step of ML-KEM Encapsulation. -// -// It implements ByteDecode₁₂, according to FIPS 203, Algorithm 6. -func polyByteDecode[T ~[n]fieldElement](b []byte) (T, error) { - if len(b) != encodingSize12 { - return T{}, errors.New("mlkem768: invalid encoding length") - } - var f T - for i := 0; i < n; i += 2 { - d := uint32(b[0]) | uint32(b[1])<<8 | uint32(b[2])<<16 - const mask12 = 0b1111_1111_1111 - var err error - if f[i], err = fieldCheckReduced(uint16(d & mask12)); err != nil { - return T{}, errors.New("mlkem768: invalid polynomial encoding") - } - if f[i+1], err = fieldCheckReduced(uint16(d >> 12)); err != nil { - return T{}, errors.New("mlkem768: invalid polynomial encoding") - } - b = b[3:] - } - return f, nil -} - -// sliceForAppend takes a slice and a requested number of bytes. It returns a -// slice with the contents of the given slice followed by that many bytes and a -// second slice that aliases into it and contains only the extra bytes. If the -// original slice has sufficient capacity then no allocation is performed. -func sliceForAppend(in []byte, n int) (head, tail []byte) { - if total := len(in) + n; cap(in) >= total { - head = in[:total] - } else { - head = make([]byte, total) - copy(head, in) - } - tail = head[len(in):] - return -} - -// ringCompressAndEncode1 appends a 32-byte encoding of a ring element to s, -// compressing one coefficients per bit. -// -// It implements Compress₁, according to FIPS 203, Definition 4.7, -// followed by ByteEncode₁, according to FIPS 203, Algorithm 5. -func ringCompressAndEncode1(s []byte, f ringElement) []byte { - s, b := sliceForAppend(s, encodingSize1) - for i := range b { - b[i] = 0 - } - for i := range f { - b[i/8] |= uint8(compress(f[i], 1) << (i % 8)) - } - return s -} - -// ringDecodeAndDecompress1 decodes a 32-byte slice to a ring element where each -// bit is mapped to 0 or ⌈q/2⌋. -// -// It implements ByteDecode₁, according to FIPS 203, Algorithm 6, -// followed by Decompress₁, according to FIPS 203, Definition 4.8. -func ringDecodeAndDecompress1(b *[encodingSize1]byte) ringElement { - var f ringElement - for i := range f { - b_i := b[i/8] >> (i % 8) & 1 - const halfQ = (q + 1) / 2 // ⌈q/2⌋, rounded up per FIPS 203, Section 2.3 - f[i] = fieldElement(b_i) * halfQ // 0 decompresses to 0, and 1 to ⌈q/2⌋ - } - return f -} - -// ringCompressAndEncode4 appends a 128-byte encoding of a ring element to s, -// compressing two coefficients per byte. -// -// It implements Compress₄, according to FIPS 203, Definition 4.7, -// followed by ByteEncode₄, according to FIPS 203, Algorithm 5. -func ringCompressAndEncode4(s []byte, f ringElement) []byte { - s, b := sliceForAppend(s, encodingSize4) - for i := 0; i < n; i += 2 { - b[i/2] = uint8(compress(f[i], 4) | compress(f[i+1], 4)<<4) - } - return s -} - -// ringDecodeAndDecompress4 decodes a 128-byte encoding of a ring element where -// each four bits are mapped to an equidistant distribution. -// -// It implements ByteDecode₄, according to FIPS 203, Algorithm 6, -// followed by Decompress₄, according to FIPS 203, Definition 4.8. -func ringDecodeAndDecompress4(b *[encodingSize4]byte) ringElement { - var f ringElement - for i := 0; i < n; i += 2 { - f[i] = fieldElement(decompress(uint16(b[i/2]&0b1111), 4)) - f[i+1] = fieldElement(decompress(uint16(b[i/2]>>4), 4)) - } - return f -} - -// ringCompressAndEncode10 appends a 320-byte encoding of a ring element to s, -// compressing four coefficients per five bytes. -// -// It implements Compress₁₀, according to FIPS 203, Definition 4.7, -// followed by ByteEncode₁₀, according to FIPS 203, Algorithm 5. -func ringCompressAndEncode10(s []byte, f ringElement) []byte { - s, b := sliceForAppend(s, encodingSize10) - for i := 0; i < n; i += 4 { - var x uint64 - x |= uint64(compress(f[i+0], 10)) - x |= uint64(compress(f[i+1], 10)) << 10 - x |= uint64(compress(f[i+2], 10)) << 20 - x |= uint64(compress(f[i+3], 10)) << 30 - b[0] = uint8(x) - b[1] = uint8(x >> 8) - b[2] = uint8(x >> 16) - b[3] = uint8(x >> 24) - b[4] = uint8(x >> 32) - b = b[5:] - } - return s -} - -// ringDecodeAndDecompress10 decodes a 320-byte encoding of a ring element where -// each ten bits are mapped to an equidistant distribution. -// -// It implements ByteDecode₁₀, according to FIPS 203, Algorithm 6, -// followed by Decompress₁₀, according to FIPS 203, Definition 4.8. -func ringDecodeAndDecompress10(bb *[encodingSize10]byte) ringElement { - b := bb[:] - var f ringElement - for i := 0; i < n; i += 4 { - x := uint64(b[0]) | uint64(b[1])<<8 | uint64(b[2])<<16 | uint64(b[3])<<24 | uint64(b[4])<<32 - b = b[5:] - f[i] = fieldElement(decompress(uint16(x>>0&0b11_1111_1111), 10)) - f[i+1] = fieldElement(decompress(uint16(x>>10&0b11_1111_1111), 10)) - f[i+2] = fieldElement(decompress(uint16(x>>20&0b11_1111_1111), 10)) - f[i+3] = fieldElement(decompress(uint16(x>>30&0b11_1111_1111), 10)) - } - return f -} - -// samplePolyCBD draws a ringElement from the special Dη distribution given a -// stream of random bytes generated by the PRF function, according to FIPS 203, -// Algorithm 8 and Definition 4.3. -func samplePolyCBD(s []byte, b byte) ringElement { - prf := sha3.NewShake256() - prf.Write(s) - prf.Write([]byte{b}) - B := make([]byte, 64*2) // η = 2 - prf.Read(B) - - // SamplePolyCBD simply draws four (2η) bits for each coefficient, and adds - // the first two and subtracts the last two. - - var f ringElement - for i := 0; i < n; i += 2 { - b := B[i/2] - b_7, b_6, b_5, b_4 := b>>7, b>>6&1, b>>5&1, b>>4&1 - b_3, b_2, b_1, b_0 := b>>3&1, b>>2&1, b>>1&1, b&1 - f[i] = fieldSub(fieldElement(b_0+b_1), fieldElement(b_2+b_3)) - f[i+1] = fieldSub(fieldElement(b_4+b_5), fieldElement(b_6+b_7)) - } - return f -} - -// nttElement is an NTT representation, an element of T_q, represented as an -// array according to FIPS 203, Section 2.4.4. -type nttElement [n]fieldElement - -// gammas are the values ζ^2BitRev7(i)+1 mod q for each index i, according to -// FIPS 203, Appendix A (with negative values reduced to positive). -var gammas = [128]fieldElement{17, 3312, 2761, 568, 583, 2746, 2649, 680, 1637, 1692, 723, 2606, 2288, 1041, 1100, 2229, 1409, 1920, 2662, 667, 3281, 48, 233, 3096, 756, 2573, 2156, 1173, 3015, 314, 3050, 279, 1703, 1626, 1651, 1678, 2789, 540, 1789, 1540, 1847, 1482, 952, 2377, 1461, 1868, 2687, 642, 939, 2390, 2308, 1021, 2437, 892, 2388, 941, 733, 2596, 2337, 992, 268, 3061, 641, 2688, 1584, 1745, 2298, 1031, 2037, 1292, 3220, 109, 375, 2954, 2549, 780, 2090, 1239, 1645, 1684, 1063, 2266, 319, 3010, 2773, 556, 757, 2572, 2099, 1230, 561, 2768, 2466, 863, 2594, 735, 2804, 525, 1092, 2237, 403, 2926, 1026, 2303, 1143, 2186, 2150, 1179, 2775, 554, 886, 2443, 1722, 1607, 1212, 2117, 1874, 1455, 1029, 2300, 2110, 1219, 2935, 394, 885, 2444, 2154, 1175} - -// nttMul multiplies two nttElements. -// -// It implements MultiplyNTTs, according to FIPS 203, Algorithm 11. -func nttMul(f, g nttElement) nttElement { - var h nttElement - // We use i += 2 for bounds check elimination. See https://go.dev/issue/66826. - for i := 0; i < 256; i += 2 { - a0, a1 := f[i], f[i+1] - b0, b1 := g[i], g[i+1] - h[i] = fieldAddMul(a0, b0, fieldMul(a1, b1), gammas[i/2]) - h[i+1] = fieldAddMul(a0, b1, a1, b0) - } - return h -} - -// zetas are the values ζ^BitRev7(k) mod q for each index k, according to FIPS -// 203, Appendix A. -var zetas = [128]fieldElement{1, 1729, 2580, 3289, 2642, 630, 1897, 848, 1062, 1919, 193, 797, 2786, 3260, 569, 1746, 296, 2447, 1339, 1476, 3046, 56, 2240, 1333, 1426, 2094, 535, 2882, 2393, 2879, 1974, 821, 289, 331, 3253, 1756, 1197, 2304, 2277, 2055, 650, 1977, 2513, 632, 2865, 33, 1320, 1915, 2319, 1435, 807, 452, 1438, 2868, 1534, 2402, 2647, 2617, 1481, 648, 2474, 3110, 1227, 910, 17, 2761, 583, 2649, 1637, 723, 2288, 1100, 1409, 2662, 3281, 233, 756, 2156, 3015, 3050, 1703, 1651, 2789, 1789, 1847, 952, 1461, 2687, 939, 2308, 2437, 2388, 733, 2337, 268, 641, 1584, 2298, 2037, 3220, 375, 2549, 2090, 1645, 1063, 319, 2773, 757, 2099, 561, 2466, 2594, 2804, 1092, 403, 1026, 1143, 2150, 2775, 886, 1722, 1212, 1874, 1029, 2110, 2935, 885, 2154} - -// ntt maps a ringElement to its nttElement representation. -// -// It implements NTT, according to FIPS 203, Algorithm 9. -func ntt(f ringElement) nttElement { - k := 1 - for len := 128; len >= 2; len /= 2 { - for start := 0; start < 256; start += 2 * len { - zeta := zetas[k] - k++ - // Bounds check elimination hint. - f, flen := f[start:start+len], f[start+len:start+len+len] - for j := 0; j < len; j++ { - t := fieldMul(zeta, flen[j]) - flen[j] = fieldSub(f[j], t) - f[j] = fieldAdd(f[j], t) - } - } - } - return nttElement(f) -} - -// inverseNTT maps a nttElement back to the ringElement it represents. -// -// It implements NTT⁻¹, according to FIPS 203, Algorithm 10. -func inverseNTT(f nttElement) ringElement { - k := 127 - for len := 2; len <= 128; len *= 2 { - for start := 0; start < 256; start += 2 * len { - zeta := zetas[k] - k-- - // Bounds check elimination hint. - f, flen := f[start:start+len], f[start+len:start+len+len] - for j := 0; j < len; j++ { - t := f[j] - f[j] = fieldAdd(t, flen[j]) - flen[j] = fieldMulSub(zeta, flen[j], t) - } - } - } - for i := range f { - f[i] = fieldMul(f[i], 3303) // 3303 = 128⁻¹ mod q - } - return ringElement(f) -} - -// sampleNTT draws a uniformly random nttElement from a stream of uniformly -// random bytes generated by the XOF function, according to FIPS 203, -// Algorithm 7. -func sampleNTT(rho []byte, ii, jj byte) nttElement { - B := sha3.NewShake128() - B.Write(rho) - B.Write([]byte{ii, jj}) - - // SampleNTT essentially draws 12 bits at a time from r, interprets them in - // little-endian, and rejects values higher than q, until it drew 256 - // values. (The rejection rate is approximately 19%.) - // - // To do this from a bytes stream, it draws three bytes at a time, and - // splits them into two uint16 appropriately masked. - // - // r₀ r₁ r₂ - // |- - - - - - - -|- - - - - - - -|- - - - - - - -| - // - // Uint16(r₀ || r₁) - // |- - - - - - - - - - - - - - - -| - // |- - - - - - - - - - - -| - // d₁ - // - // Uint16(r₁ || r₂) - // |- - - - - - - - - - - - - - - -| - // |- - - - - - - - - - - -| - // d₂ - // - // Note that in little-endian, the rightmost bits are the most significant - // bits (dropped with a mask) and the leftmost bits are the least - // significant bits (dropped with a right shift). - - var a nttElement - var j int // index into a - var buf [24]byte // buffered reads from B - off := len(buf) // index into buf, starts in a "buffer fully consumed" state - for { - if off >= len(buf) { - B.Read(buf[:]) - off = 0 - } - d1 := byteorder.LeUint16(buf[off:]) & 0b1111_1111_1111 - d2 := byteorder.LeUint16(buf[off+1:]) >> 4 - off += 3 - if d1 < q { - a[j] = fieldElement(d1) - j++ - } - if j >= len(a) { - break - } - if d2 < q { - a[j] = fieldElement(d2) - j++ - } - if j >= len(a) { - break - } - } - return a -} diff --git a/src/crypto/internal/mlkem768/field_test.go b/src/crypto/internal/mlkem768/field_test.go deleted file mode 100644 index 6350ebc82f..0000000000 --- a/src/crypto/internal/mlkem768/field_test.go +++ /dev/null @@ -1,191 +0,0 @@ -// Copyright 2023 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -package mlkem768 - -import ( - "math/big" - "strconv" - "testing" -) - -func TestFieldReduce(t *testing.T) { - for a := uint32(0); a < 2*q*q; a++ { - got := fieldReduce(a) - exp := fieldElement(a % q) - if got != exp { - t.Fatalf("reduce(%d) = %d, expected %d", a, got, exp) - } - } -} - -func TestFieldAdd(t *testing.T) { - for a := fieldElement(0); a < q; a++ { - for b := fieldElement(0); b < q; b++ { - got := fieldAdd(a, b) - exp := (a + b) % q - if got != exp { - t.Fatalf("%d + %d = %d, expected %d", a, b, got, exp) - } - } - } -} - -func TestFieldSub(t *testing.T) { - for a := fieldElement(0); a < q; a++ { - for b := fieldElement(0); b < q; b++ { - got := fieldSub(a, b) - exp := (a - b + q) % q - if got != exp { - t.Fatalf("%d - %d = %d, expected %d", a, b, got, exp) - } - } - } -} - -func TestFieldMul(t *testing.T) { - for a := fieldElement(0); a < q; a++ { - for b := fieldElement(0); b < q; b++ { - got := fieldMul(a, b) - exp := fieldElement((uint32(a) * uint32(b)) % q) - if got != exp { - t.Fatalf("%d * %d = %d, expected %d", a, b, got, exp) - } - } - } -} - -func TestDecompressCompress(t *testing.T) { - for _, bits := range []uint8{1, 4, 10} { - for a := uint16(0); a < 1<= q { - t.Fatalf("decompress(%d, %d) = %d >= q", a, bits, f) - } - got := compress(f, bits) - if got != a { - t.Fatalf("compress(decompress(%d, %d), %d) = %d", a, bits, bits, got) - } - } - - for a := fieldElement(0); a < q; a++ { - c := compress(a, bits) - if c >= 1<= 2^bits", a, bits, c) - } - got := decompress(c, bits) - diff := min(a-got, got-a, a-got+q, got-a+q) - ceil := q / (1 << bits) - if diff > fieldElement(ceil) { - t.Fatalf("decompress(compress(%d, %d), %d) = %d (diff %d, max diff %d)", - a, bits, bits, got, diff, ceil) - } - } - } -} - -func CompressRat(x fieldElement, d uint8) uint16 { - if x >= q { - panic("x out of range") - } - if d <= 0 || d >= 12 { - panic("d out of range") - } - - precise := big.NewRat((1<= 1<= 12 { - panic("d out of range") - } - - precise := big.NewRat(q*int64(y), 1<>7 != 0 { - panic("not 7 bits") - } - var r uint8 - r |= n >> 6 & 0b0000_0001 - r |= n >> 4 & 0b0000_0010 - r |= n >> 2 & 0b0000_0100 - r |= n /**/ & 0b0000_1000 - r |= n << 2 & 0b0001_0000 - r |= n << 4 & 0b0010_0000 - r |= n << 6 & 0b0100_0000 - return r -} - -func TestZetas(t *testing.T) { - ζ := big.NewInt(17) - q := big.NewInt(q) - for k, zeta := range zetas { - // ζ^BitRev7(k) mod q - exp := new(big.Int).Exp(ζ, big.NewInt(int64(BitRev7(uint8(k)))), q) - if big.NewInt(int64(zeta)).Cmp(exp) != 0 { - t.Errorf("zetas[%d] = %v, expected %v", k, zeta, exp) - } - } -} - -func TestGammas(t *testing.T) { - ζ := big.NewInt(17) - q := big.NewInt(q) - for k, gamma := range gammas { - // ζ^2BitRev7(i)+1 - exp := new(big.Int).Exp(ζ, big.NewInt(int64(BitRev7(uint8(k)))*2+1), q) - if big.NewInt(int64(gamma)).Cmp(exp) != 0 { - t.Errorf("gammas[%d] = %v, expected %v", k, gamma, exp) - } - } -} diff --git a/src/crypto/internal/mlkem768/mlkem768.go b/src/crypto/internal/mlkem768/mlkem768.go deleted file mode 100644 index 1e46c46df9..0000000000 --- a/src/crypto/internal/mlkem768/mlkem768.go +++ /dev/null @@ -1,388 +0,0 @@ -// Copyright 2023 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -// Package mlkem768 implements the quantum-resistant key encapsulation method -// ML-KEM (formerly known as Kyber), as specified in [NIST FIPS 203]. -// -// Only the recommended ML-KEM-768 parameter set is provided. -// -// [NIST FIPS 203]: https://doi.org/10.6028/NIST.FIPS.203 -package mlkem768 - -// This package targets security, correctness, simplicity, readability, and -// reviewability as its primary goals. All critical operations are performed in -// constant time. -// -// Variable and function names, as well as code layout, are selected to -// facilitate reviewing the implementation against the NIST FIPS 203 document. -// -// Reviewers unfamiliar with polynomials or linear algebra might find the -// background at https://words.filippo.io/kyber-math/ useful. - -import ( - "crypto/rand" - "crypto/subtle" - "errors" - - "golang.org/x/crypto/sha3" -) - -const ( - // ML-KEM global constants. - n = 256 - q = 3329 - - // encodingSizeX is the byte size of a ringElement or nttElement encoded - // by ByteEncode_X (FIPS 203, Algorithm 5). - encodingSize12 = n * 12 / 8 - encodingSize10 = n * 10 / 8 - encodingSize4 = n * 4 / 8 - encodingSize1 = n * 1 / 8 - - messageSize = encodingSize1 - - SharedKeySize = 32 - SeedSize = 32 + 32 -) - -// ML-KEM-768 parameters. -const ( - k = 3 - - decryptionKeySize = k * encodingSize12 - encryptionKeySize = k*encodingSize12 + 32 - - CiphertextSize768 = k*encodingSize10 + encodingSize4 - EncapsulationKeySize768 = encryptionKeySize -) - -// A DecapsulationKey768 is the secret key used to decapsulate a shared key from a -// ciphertext. It includes various precomputed values. -type DecapsulationKey768 struct { - d [32]byte // decapsulation key seed - z [32]byte // implicit rejection sampling seed - - ρ [32]byte // sampleNTT seed for A, stored for the encapsulation key - h [32]byte // H(ek), stored for ML-KEM.Decaps_internal - - encryptionKey - decryptionKey -} - -// Bytes returns the decapsulation key as a 64-byte seed in the "d || z" form. -// -// The decapsulation key must be kept secret. -func (dk *DecapsulationKey768) Bytes() []byte { - var b [SeedSize]byte - copy(b[:], dk.d[:]) - copy(b[32:], dk.z[:]) - return b[:] -} - -// EncapsulationKey returns the public encapsulation key necessary to produce -// ciphertexts. -func (dk *DecapsulationKey768) EncapsulationKey() *EncapsulationKey768 { - return &EncapsulationKey768{ - ρ: dk.ρ, - h: dk.h, - encryptionKey: dk.encryptionKey, - } -} - -// An EncapsulationKey768 is the public key used to produce ciphertexts to be -// decapsulated by the corresponding [DecapsulationKey768]. -type EncapsulationKey768 struct { - ρ [32]byte // sampleNTT seed for A - h [32]byte // H(ek) - encryptionKey -} - -// Bytes returns the encapsulation key as a byte slice. -func (ek *EncapsulationKey768) Bytes() []byte { - // The actual logic is in a separate function to outline this allocation. - b := make([]byte, 0, EncapsulationKeySize768) - return ek.bytes(b) -} - -func (ek *EncapsulationKey768) bytes(b []byte) []byte { - for i := range ek.t { - b = polyByteEncode(b, ek.t[i]) - } - b = append(b, ek.ρ[:]...) - return b -} - -// encryptionKey is the parsed and expanded form of a PKE encryption key. -type encryptionKey struct { - t [k]nttElement // ByteDecode₁₂(ek[:384k]) - a [k * k]nttElement // A[i*k+j] = sampleNTT(ρ, j, i) -} - -// decryptionKey is the parsed and expanded form of a PKE decryption key. -type decryptionKey struct { - s [k]nttElement // ByteDecode₁₂(dk[:decryptionKeySize]) -} - -// GenerateKey768 generates a new decapsulation key, drawing random bytes from -// crypto/rand. The decapsulation key must be kept secret. -func GenerateKey768() (*DecapsulationKey768, error) { - // The actual logic is in a separate function to outline this allocation. - dk := &DecapsulationKey768{} - return generateKey(dk), nil -} - -func generateKey(dk *DecapsulationKey768) *DecapsulationKey768 { - var d [32]byte - rand.Read(d[:]) - var z [32]byte - rand.Read(z[:]) - return kemKeyGen(dk, &d, &z) -} - -// NewDecapsulationKey768 parses a decapsulation key from a 64-byte -// seed in the "d || z" form. The seed must be uniformly random. -func NewDecapsulationKey768(seed []byte) (*DecapsulationKey768, error) { - // The actual logic is in a separate function to outline this allocation. - dk := &DecapsulationKey768{} - return newKeyFromSeed(dk, seed) -} - -func newKeyFromSeed(dk *DecapsulationKey768, seed []byte) (*DecapsulationKey768, error) { - if len(seed) != SeedSize { - return nil, errors.New("mlkem768: invalid seed length") - } - d := (*[32]byte)(seed[:32]) - z := (*[32]byte)(seed[32:]) - return kemKeyGen(dk, d, z), nil -} - -// kemKeyGen generates a decapsulation key. -// -// It implements ML-KEM.KeyGen_internal according to FIPS 203, Algorithm 16, and -// K-PKE.KeyGen according to FIPS 203, Algorithm 13. The two are merged to save -// copies and allocations. -func kemKeyGen(dk *DecapsulationKey768, d, z *[32]byte) *DecapsulationKey768 { - if dk == nil { - dk = &DecapsulationKey768{} - } - dk.d = *d - dk.z = *z - - g := sha3.New512() - g.Write(d[:]) - g.Write([]byte{k}) // Module dimension as a domain separator. - G := g.Sum(make([]byte, 0, 64)) - ρ, σ := G[:32], G[32:] - dk.ρ = [32]byte(ρ) - - A := &dk.a - for i := byte(0); i < k; i++ { - for j := byte(0); j < k; j++ { - A[i*k+j] = sampleNTT(ρ, j, i) - } - } - - var N byte - s := &dk.s - for i := range s { - s[i] = ntt(samplePolyCBD(σ, N)) - N++ - } - e := make([]nttElement, k) - for i := range e { - e[i] = ntt(samplePolyCBD(σ, N)) - N++ - } - - t := &dk.t - for i := range t { // t = A ◦ s + e - t[i] = e[i] - for j := range s { - t[i] = polyAdd(t[i], nttMul(A[i*k+j], s[j])) - } - } - - H := sha3.New256() - ek := dk.EncapsulationKey().Bytes() - H.Write(ek) - H.Sum(dk.h[:0]) - - return dk -} - -// Encapsulate generates a shared key and an associated ciphertext from an -// encapsulation key, drawing random bytes from crypto/rand. -// -// The shared key must be kept secret. -func (ek *EncapsulationKey768) Encapsulate() (ciphertext, sharedKey []byte) { - // The actual logic is in a separate function to outline this allocation. - var cc [CiphertextSize768]byte - return ek.encapsulate(&cc) -} - -func (ek *EncapsulationKey768) encapsulate(cc *[CiphertextSize768]byte) (ciphertext, sharedKey []byte) { - var m [messageSize]byte - rand.Read(m[:]) - // Note that the modulus check (step 2 of the encapsulation key check from - // FIPS 203, Section 7.2) is performed by polyByteDecode in parseEK. - return kemEncaps(cc, ek, &m) -} - -// kemEncaps generates a shared key and an associated ciphertext. -// -// It implements ML-KEM.Encaps_internal according to FIPS 203, Algorithm 17. -func kemEncaps(cc *[CiphertextSize768]byte, ek *EncapsulationKey768, m *[messageSize]byte) (c, K []byte) { - if cc == nil { - cc = &[CiphertextSize768]byte{} - } - - g := sha3.New512() - g.Write(m[:]) - g.Write(ek.h[:]) - G := g.Sum(nil) - K, r := G[:SharedKeySize], G[SharedKeySize:] - c = pkeEncrypt(cc, &ek.encryptionKey, m, r) - return c, K -} - -// NewEncapsulationKey768 parses an encapsulation key from its encoded form. -// If the encapsulation key is not valid, NewEncapsulationKey768 returns an error. -func NewEncapsulationKey768(encapsulationKey []byte) (*EncapsulationKey768, error) { - // The actual logic is in a separate function to outline this allocation. - ek := &EncapsulationKey768{} - return parseEK(ek, encapsulationKey) -} - -// parseEK parses an encryption key from its encoded form. -// -// It implements the initial stages of K-PKE.Encrypt according to FIPS 203, -// Algorithm 14. -func parseEK(ek *EncapsulationKey768, ekPKE []byte) (*EncapsulationKey768, error) { - if len(ekPKE) != encryptionKeySize { - return nil, errors.New("mlkem768: invalid encapsulation key length") - } - - ek.h = sha3.Sum256(ekPKE[:]) - - for i := range ek.t { - var err error - ek.t[i], err = polyByteDecode[nttElement](ekPKE[:encodingSize12]) - if err != nil { - return nil, err - } - ekPKE = ekPKE[encodingSize12:] - } - copy(ek.ρ[:], ekPKE) - - for i := byte(0); i < k; i++ { - for j := byte(0); j < k; j++ { - ek.a[i*k+j] = sampleNTT(ek.ρ[:], j, i) - } - } - - return ek, nil -} - -// pkeEncrypt encrypt a plaintext message. -// -// It implements K-PKE.Encrypt according to FIPS 203, Algorithm 14, although the -// computation of t and AT is done in parseEK. -func pkeEncrypt(cc *[CiphertextSize768]byte, ex *encryptionKey, m *[messageSize]byte, rnd []byte) []byte { - var N byte - r, e1 := make([]nttElement, k), make([]ringElement, k) - for i := range r { - r[i] = ntt(samplePolyCBD(rnd, N)) - N++ - } - for i := range e1 { - e1[i] = samplePolyCBD(rnd, N) - N++ - } - e2 := samplePolyCBD(rnd, N) - - u := make([]ringElement, k) // NTT⁻¹(AT ◦ r) + e1 - for i := range u { - u[i] = e1[i] - for j := range r { - // Note that i and j are inverted, as we need the transposed of A. - u[i] = polyAdd(u[i], inverseNTT(nttMul(ex.a[j*k+i], r[j]))) - } - } - - μ := ringDecodeAndDecompress1(m) - - var vNTT nttElement // t⊺ ◦ r - for i := range ex.t { - vNTT = polyAdd(vNTT, nttMul(ex.t[i], r[i])) - } - v := polyAdd(polyAdd(inverseNTT(vNTT), e2), μ) - - c := cc[:0] - for _, f := range u { - c = ringCompressAndEncode10(c, f) - } - c = ringCompressAndEncode4(c, v) - - return c -} - -// Decapsulate generates a shared key from a ciphertext and a decapsulation key. -// If the ciphertext is not valid, Decapsulate returns an error. -// -// The shared key must be kept secret. -func (dk *DecapsulationKey768) Decapsulate(ciphertext []byte) (sharedKey []byte, err error) { - if len(ciphertext) != CiphertextSize768 { - return nil, errors.New("mlkem768: invalid ciphertext length") - } - c := (*[CiphertextSize768]byte)(ciphertext) - // Note that the hash check (step 3 of the decapsulation input check from - // FIPS 203, Section 7.3) is foregone as a DecapsulationKey is always - // validly generated by ML-KEM.KeyGen_internal. - return kemDecaps(dk, c), nil -} - -// kemDecaps produces a shared key from a ciphertext. -// -// It implements ML-KEM.Decaps_internal according to FIPS 203, Algorithm 18. -func kemDecaps(dk *DecapsulationKey768, c *[CiphertextSize768]byte) (K []byte) { - m := pkeDecrypt(&dk.decryptionKey, c) - g := sha3.New512() - g.Write(m[:]) - g.Write(dk.h[:]) - G := g.Sum(make([]byte, 0, 64)) - Kprime, r := G[:SharedKeySize], G[SharedKeySize:] - J := sha3.NewShake256() - J.Write(dk.z[:]) - J.Write(c[:]) - Kout := make([]byte, SharedKeySize) - J.Read(Kout) - var cc [CiphertextSize768]byte - c1 := pkeEncrypt(&cc, &dk.encryptionKey, (*[32]byte)(m), r) - - subtle.ConstantTimeCopy(subtle.ConstantTimeCompare(c[:], c1), Kout, Kprime) - return Kout -} - -// pkeDecrypt decrypts a ciphertext. -// -// It implements K-PKE.Decrypt according to FIPS 203, Algorithm 15, -// although s is retained from kemKeyGen. -func pkeDecrypt(dx *decryptionKey, c *[CiphertextSize768]byte) []byte { - u := make([]ringElement, k) - for i := range u { - b := (*[encodingSize10]byte)(c[encodingSize10*i : encodingSize10*(i+1)]) - u[i] = ringDecodeAndDecompress10(b) - } - - b := (*[encodingSize4]byte)(c[encodingSize10*k:]) - v := ringDecodeAndDecompress4(b) - - var mask nttElement // s⊺ ◦ NTT(u) - for i := range dx.s { - mask = polyAdd(mask, nttMul(dx.s[i], ntt(u[i]))) - } - w := polySub(v, inverseNTT(mask)) - - return ringCompressAndEncode1(nil, w) -} diff --git a/src/crypto/internal/mlkem768/mlkem768_test.go b/src/crypto/internal/mlkem768/mlkem768_test.go deleted file mode 100644 index 58dc138810..0000000000 --- a/src/crypto/internal/mlkem768/mlkem768_test.go +++ /dev/null @@ -1,236 +0,0 @@ -// Copyright 2023 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -package mlkem768 - -import ( - "bytes" - "crypto/rand" - _ "embed" - "encoding/hex" - "flag" - "testing" - - "golang.org/x/crypto/sha3" -) - -func TestRoundTrip(t *testing.T) { - dk, err := GenerateKey768() - if err != nil { - t.Fatal(err) - } - c, Ke := dk.EncapsulationKey().Encapsulate() - Kd, err := dk.Decapsulate(c) - if err != nil { - t.Fatal(err) - } - if !bytes.Equal(Ke, Kd) { - t.Fail() - } - - dk1, err := GenerateKey768() - if err != nil { - t.Fatal(err) - } - if bytes.Equal(dk.EncapsulationKey().Bytes(), dk1.EncapsulationKey().Bytes()) { - t.Fail() - } - if bytes.Equal(dk.Bytes(), dk1.Bytes()) { - t.Fail() - } - - c1, Ke1 := dk.EncapsulationKey().Encapsulate() - if bytes.Equal(c, c1) { - t.Fail() - } - if bytes.Equal(Ke, Ke1) { - t.Fail() - } -} - -func TestBadLengths(t *testing.T) { - dk, err := GenerateKey768() - if err != nil { - t.Fatal(err) - } - ek := dk.EncapsulationKey() - ekBytes := dk.EncapsulationKey().Bytes() - c, _ := ek.Encapsulate() - - for i := 0; i < len(ekBytes)-1; i++ { - if _, err := NewEncapsulationKey768(ekBytes[:i]); err == nil { - t.Errorf("expected error for ek length %d", i) - } - } - ekLong := ekBytes - for i := 0; i < 100; i++ { - ekLong = append(ekLong, 0) - if _, err := NewEncapsulationKey768(ekLong); err == nil { - t.Errorf("expected error for ek length %d", len(ekLong)) - } - } - - for i := 0; i < len(c)-1; i++ { - if _, err := dk.Decapsulate(c[:i]); err == nil { - t.Errorf("expected error for c length %d", i) - } - } - cLong := c - for i := 0; i < 100; i++ { - cLong = append(cLong, 0) - if _, err := dk.Decapsulate(cLong); err == nil { - t.Errorf("expected error for c length %d", len(cLong)) - } - } -} - -var millionFlag = flag.Bool("million", false, "run the million vector test") - -// TestAccumulated accumulates 10k (or 100, or 1M) random vectors and checks the -// hash of the result, to avoid checking in 150MB of test vectors. -func TestAccumulated(t *testing.T) { - n := 10000 - expected := "8a518cc63da366322a8e7a818c7a0d63483cb3528d34a4cf42f35d5ad73f22fc" - if testing.Short() { - n = 100 - expected = "1114b1b6699ed191734fa339376afa7e285c9e6acf6ff0177d346696ce564415" - } - if *millionFlag { - n = 1000000 - expected = "424bf8f0e8ae99b78d788a6e2e8e9cdaf9773fc0c08a6f433507cb559edfd0f0" - } - - s := sha3.NewShake128() - o := sha3.NewShake128() - seed := make([]byte, SeedSize) - var msg [messageSize]byte - ct1 := make([]byte, CiphertextSize768) - - for i := 0; i < n; i++ { - s.Read(seed) - dk, err := NewDecapsulationKey768(seed) - if err != nil { - t.Fatal(err) - } - ek := dk.EncapsulationKey() - o.Write(ek.Bytes()) - - s.Read(msg[:]) - ct, k := kemEncaps(nil, ek, &msg) - o.Write(ct) - o.Write(k) - - kk, err := dk.Decapsulate(ct) - if err != nil { - t.Fatal(err) - } - if !bytes.Equal(kk, k) { - t.Errorf("k: got %x, expected %x", kk, k) - } - - s.Read(ct1) - k1, err := dk.Decapsulate(ct1) - if err != nil { - t.Fatal(err) - } - o.Write(k1) - } - - got := hex.EncodeToString(o.Sum(nil)) - if got != expected { - t.Errorf("got %s, expected %s", got, expected) - } -} - -var sink byte - -func BenchmarkKeyGen(b *testing.B) { - var dk DecapsulationKey768 - var d, z [32]byte - rand.Read(d[:]) - rand.Read(z[:]) - b.ResetTimer() - for i := 0; i < b.N; i++ { - dk := kemKeyGen(&dk, &d, &z) - sink ^= dk.EncapsulationKey().Bytes()[0] - } -} - -func BenchmarkEncaps(b *testing.B) { - seed := make([]byte, SeedSize) - rand.Read(seed) - var m [messageSize]byte - rand.Read(m[:]) - dk, err := NewDecapsulationKey768(seed) - if err != nil { - b.Fatal(err) - } - ekBytes := dk.EncapsulationKey().Bytes() - var c [CiphertextSize768]byte - b.ResetTimer() - for i := 0; i < b.N; i++ { - ek, err := NewEncapsulationKey768(ekBytes) - if err != nil { - b.Fatal(err) - } - c, K := kemEncaps(&c, ek, &m) - sink ^= c[0] ^ K[0] - } -} - -func BenchmarkDecaps(b *testing.B) { - dk, err := GenerateKey768() - if err != nil { - b.Fatal(err) - } - ek := dk.EncapsulationKey() - c, _ := ek.Encapsulate() - b.ResetTimer() - for i := 0; i < b.N; i++ { - K := kemDecaps(dk, (*[CiphertextSize768]byte)(c)) - sink ^= K[0] - } -} - -func BenchmarkRoundTrip(b *testing.B) { - dk, err := GenerateKey768() - if err != nil { - b.Fatal(err) - } - ek := dk.EncapsulationKey() - ekBytes := ek.Bytes() - c, _ := ek.Encapsulate() - if err != nil { - b.Fatal(err) - } - b.Run("Alice", func(b *testing.B) { - for i := 0; i < b.N; i++ { - dkS, err := GenerateKey768() - if err != nil { - b.Fatal(err) - } - ekS := dkS.EncapsulationKey().Bytes() - sink ^= ekS[0] - - Ks, err := dk.Decapsulate(c) - if err != nil { - b.Fatal(err) - } - sink ^= Ks[0] - } - }) - b.Run("Bob", func(b *testing.B) { - for i := 0; i < b.N; i++ { - ek, err := NewEncapsulationKey768(ekBytes) - if err != nil { - b.Fatal(err) - } - cS, Ks := ek.Encapsulate() - if err != nil { - b.Fatal(err) - } - sink ^= cS[0] ^ Ks[0] - } - }) -} diff --git a/src/crypto/tls/handshake_client.go b/src/crypto/tls/handshake_client.go index 1c14476d60..ea9c4c50c5 100644 --- a/src/crypto/tls/handshake_client.go +++ b/src/crypto/tls/handshake_client.go @@ -10,9 +10,9 @@ import ( "crypto" "crypto/ecdsa" "crypto/ed25519" + "crypto/internal/fips/mlkem" "crypto/internal/fips/tls13" "crypto/internal/hpke" - "crypto/internal/mlkem768" "crypto/rsa" "crypto/subtle" "crypto/x509" @@ -160,11 +160,11 @@ func (c *Conn) makeClientHello() (*clientHelloMsg, *keySharePrivateKeys, *echCon if err != nil { return nil, nil, nil, err } - seed := make([]byte, mlkem768.SeedSize) + seed := make([]byte, mlkem.SeedSize) if _, err := io.ReadFull(config.rand(), seed); err != nil { return nil, nil, nil, err } - keyShareKeys.kyber, err = mlkem768.NewDecapsulationKey768(seed) + keyShareKeys.kyber, err = mlkem.NewDecapsulationKey768(seed) if err != nil { return nil, nil, nil, err } diff --git a/src/crypto/tls/handshake_client_tls13.go b/src/crypto/tls/handshake_client_tls13.go index fbec7431a1..6ce83b9623 100644 --- a/src/crypto/tls/handshake_client_tls13.go +++ b/src/crypto/tls/handshake_client_tls13.go @@ -10,8 +10,8 @@ import ( "crypto" "crypto/hmac" "crypto/internal/fips/hkdf" + "crypto/internal/fips/mlkem" "crypto/internal/fips/tls13" - "crypto/internal/mlkem768" "crypto/rsa" "crypto/subtle" "errors" @@ -481,7 +481,7 @@ func (hs *clientHandshakeStateTLS13) establishHandshakeKeys() error { ecdhePeerData := hs.serverHello.serverShare.data if hs.serverHello.serverShare.group == x25519Kyber768Draft00 { - if len(ecdhePeerData) != x25519PublicKeySize+mlkem768.CiphertextSize768 { + if len(ecdhePeerData) != x25519PublicKeySize+mlkem.CiphertextSize768 { c.sendAlert(alertIllegalParameter) return errors.New("tls: invalid server key share") } diff --git a/src/crypto/tls/handshake_server_tls13.go b/src/crypto/tls/handshake_server_tls13.go index 3591aa1f11..aa1ffd908a 100644 --- a/src/crypto/tls/handshake_server_tls13.go +++ b/src/crypto/tls/handshake_server_tls13.go @@ -9,8 +9,8 @@ import ( "context" "crypto" "crypto/hmac" + "crypto/internal/fips/mlkem" "crypto/internal/fips/tls13" - "crypto/internal/mlkem768" "crypto/rsa" "errors" "hash" @@ -223,7 +223,7 @@ func (hs *serverHandshakeStateTLS13) processClientHello() error { ecdhData := clientKeyShare.data if selectedGroup == x25519Kyber768Draft00 { ecdhGroup = X25519 - if len(ecdhData) != x25519PublicKeySize+mlkem768.EncapsulationKeySize768 { + if len(ecdhData) != x25519PublicKeySize+mlkem.EncapsulationKeySize768 { c.sendAlert(alertIllegalParameter) return errors.New("tls: invalid Kyber client key share") } diff --git a/src/crypto/tls/key_schedule.go b/src/crypto/tls/key_schedule.go index 8377807ba5..99229ea834 100644 --- a/src/crypto/tls/key_schedule.go +++ b/src/crypto/tls/key_schedule.go @@ -7,13 +7,12 @@ package tls import ( "crypto/ecdh" "crypto/hmac" + "crypto/internal/fips/mlkem" + "crypto/internal/fips/sha3" "crypto/internal/fips/tls13" - "crypto/internal/mlkem768" "errors" "hash" "io" - - "golang.org/x/crypto/sha3" ) // This file contains the functions necessary to compute the TLS 1.3 key @@ -54,11 +53,11 @@ func (c *cipherSuiteTLS13) exportKeyingMaterial(s *tls13.MasterSecret, transcrip type keySharePrivateKeys struct { curveID CurveID ecdhe *ecdh.PrivateKey - kyber *mlkem768.DecapsulationKey768 + kyber *mlkem.DecapsulationKey768 } // kyberDecapsulate implements decapsulation according to Kyber Round 3. -func kyberDecapsulate(dk *mlkem768.DecapsulationKey768, c []byte) ([]byte, error) { +func kyberDecapsulate(dk *mlkem.DecapsulationKey768, c []byte) ([]byte, error) { K, err := dk.Decapsulate(c) if err != nil { return nil, err @@ -68,7 +67,7 @@ func kyberDecapsulate(dk *mlkem768.DecapsulationKey768, c []byte) ([]byte, error // kyberEncapsulate implements encapsulation according to Kyber Round 3. func kyberEncapsulate(ek []byte) (c, ss []byte, err error) { - k, err := mlkem768.NewEncapsulationKey768(ek) + k, err := mlkem.NewEncapsulationKey768(ek) if err != nil { return nil, nil, err } @@ -77,13 +76,14 @@ func kyberEncapsulate(ek []byte) (c, ss []byte, err error) { } func kyberSharedSecret(c, K []byte) []byte { - // Package mlkem768 implements ML-KEM, which compared to Kyber removed a + // Package mlkem implements ML-KEM, which compared to Kyber removed a // final hashing step. Compute SHAKE-256(K || SHA3-256(c), 32) to match Kyber. // See https://words.filippo.io/mlkem768/#bonus-track-using-a-ml-kem-implementation-as-kyber-v3. h := sha3.NewShake256() h.Write(K) - ch := sha3.Sum256(c) - h.Write(ch[:]) + ch := sha3.New256() + ch.Write(c) + h.Write(ch.Sum(nil)) out := make([]byte, 32) h.Read(out) return out diff --git a/src/crypto/tls/key_schedule_test.go b/src/crypto/tls/key_schedule_test.go index 766370ff21..0dc3601e47 100644 --- a/src/crypto/tls/key_schedule_test.go +++ b/src/crypto/tls/key_schedule_test.go @@ -6,8 +6,8 @@ package tls import ( "bytes" + "crypto/internal/fips/mlkem" "crypto/internal/fips/tls13" - "crypto/internal/mlkem768" "crypto/sha256" "encoding/hex" "strings" @@ -120,7 +120,7 @@ func TestTrafficKey(t *testing.T) { } func TestKyberEncapsulate(t *testing.T) { - dk, err := mlkem768.GenerateKey768() + dk, err := mlkem.GenerateKey768() if err != nil { t.Fatal(err) } diff --git a/src/go/build/deps_test.go b/src/go/build/deps_test.go index 9f7ec1eb9d..b999699a8d 100644 --- a/src/go/build/deps_test.go +++ b/src/go/build/deps_test.go @@ -462,6 +462,7 @@ var depsRules = ` < crypto/internal/fips/hmac < crypto/internal/fips/check < crypto/internal/fips/hkdf + < crypto/internal/fips/mlkem < crypto/internal/fips/ssh < crypto/internal/fips/tls12 < crypto/internal/fips/tls13 @@ -525,7 +526,6 @@ var depsRules = ` CRYPTO, FMT, math/big < crypto/internal/boring/bbig < crypto/rand - < crypto/internal/mlkem768 < crypto/ed25519 < encoding/asn1 < golang.org/x/crypto/cryptobyte/asn1 diff --git a/src/vendor/golang.org/x/crypto/sha3/doc.go b/src/vendor/golang.org/x/crypto/sha3/doc.go deleted file mode 100644 index 7e02309070..0000000000 --- a/src/vendor/golang.org/x/crypto/sha3/doc.go +++ /dev/null @@ -1,62 +0,0 @@ -// Copyright 2014 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -// Package sha3 implements the SHA-3 fixed-output-length hash functions and -// the SHAKE variable-output-length hash functions defined by FIPS-202. -// -// Both types of hash function use the "sponge" construction and the Keccak -// permutation. For a detailed specification see http://keccak.noekeon.org/ -// -// # Guidance -// -// If you aren't sure what function you need, use SHAKE256 with at least 64 -// bytes of output. The SHAKE instances are faster than the SHA3 instances; -// the latter have to allocate memory to conform to the hash.Hash interface. -// -// If you need a secret-key MAC (message authentication code), prepend the -// secret key to the input, hash with SHAKE256 and read at least 32 bytes of -// output. -// -// # Security strengths -// -// The SHA3-x (x equals 224, 256, 384, or 512) functions have a security -// strength against preimage attacks of x bits. Since they only produce "x" -// bits of output, their collision-resistance is only "x/2" bits. -// -// The SHAKE-256 and -128 functions have a generic security strength of 256 and -// 128 bits against all attacks, provided that at least 2x bits of their output -// is used. Requesting more than 64 or 32 bytes of output, respectively, does -// not increase the collision-resistance of the SHAKE functions. -// -// # The sponge construction -// -// A sponge builds a pseudo-random function from a public pseudo-random -// permutation, by applying the permutation to a state of "rate + capacity" -// bytes, but hiding "capacity" of the bytes. -// -// A sponge starts out with a zero state. To hash an input using a sponge, up -// to "rate" bytes of the input are XORed into the sponge's state. The sponge -// is then "full" and the permutation is applied to "empty" it. This process is -// repeated until all the input has been "absorbed". The input is then padded. -// The digest is "squeezed" from the sponge in the same way, except that output -// is copied out instead of input being XORed in. -// -// A sponge is parameterized by its generic security strength, which is equal -// to half its capacity; capacity + rate is equal to the permutation's width. -// Since the KeccakF-1600 permutation is 1600 bits (200 bytes) wide, this means -// that the security strength of a sponge instance is equal to (1600 - bitrate) / 2. -// -// # Recommendations -// -// The SHAKE functions are recommended for most new uses. They can produce -// output of arbitrary length. SHAKE256, with an output length of at least -// 64 bytes, provides 256-bit security against all attacks. The Keccak team -// recommends it for most applications upgrading from SHA2-512. (NIST chose a -// much stronger, but much slower, sponge instance for SHA3-512.) -// -// The SHA-3 functions are "drop-in" replacements for the SHA-2 functions. -// They produce output of the same length, with the same security strengths -// against all attacks. This means, in particular, that SHA3-256 only has -// 128-bit collision resistance, because its output length is 32 bytes. -package sha3 diff --git a/src/vendor/golang.org/x/crypto/sha3/hashes.go b/src/vendor/golang.org/x/crypto/sha3/hashes.go deleted file mode 100644 index c544b29e5f..0000000000 --- a/src/vendor/golang.org/x/crypto/sha3/hashes.go +++ /dev/null @@ -1,109 +0,0 @@ -// Copyright 2014 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -package sha3 - -// This file provides functions for creating instances of the SHA-3 -// and SHAKE hash functions, as well as utility functions for hashing -// bytes. - -import ( - "crypto" - "hash" -) - -// New224 creates a new SHA3-224 hash. -// Its generic security strength is 224 bits against preimage attacks, -// and 112 bits against collision attacks. -func New224() hash.Hash { - return new224() -} - -// New256 creates a new SHA3-256 hash. -// Its generic security strength is 256 bits against preimage attacks, -// and 128 bits against collision attacks. -func New256() hash.Hash { - return new256() -} - -// New384 creates a new SHA3-384 hash. -// Its generic security strength is 384 bits against preimage attacks, -// and 192 bits against collision attacks. -func New384() hash.Hash { - return new384() -} - -// New512 creates a new SHA3-512 hash. -// Its generic security strength is 512 bits against preimage attacks, -// and 256 bits against collision attacks. -func New512() hash.Hash { - return new512() -} - -func init() { - crypto.RegisterHash(crypto.SHA3_224, New224) - crypto.RegisterHash(crypto.SHA3_256, New256) - crypto.RegisterHash(crypto.SHA3_384, New384) - crypto.RegisterHash(crypto.SHA3_512, New512) -} - -func new224Generic() *state { - return &state{rate: 144, outputLen: 28, dsbyte: 0x06} -} - -func new256Generic() *state { - return &state{rate: 136, outputLen: 32, dsbyte: 0x06} -} - -func new384Generic() *state { - return &state{rate: 104, outputLen: 48, dsbyte: 0x06} -} - -func new512Generic() *state { - return &state{rate: 72, outputLen: 64, dsbyte: 0x06} -} - -// NewLegacyKeccak256 creates a new Keccak-256 hash. -// -// Only use this function if you require compatibility with an existing cryptosystem -// that uses non-standard padding. All other users should use New256 instead. -func NewLegacyKeccak256() hash.Hash { return &state{rate: 136, outputLen: 32, dsbyte: 0x01} } - -// NewLegacyKeccak512 creates a new Keccak-512 hash. -// -// Only use this function if you require compatibility with an existing cryptosystem -// that uses non-standard padding. All other users should use New512 instead. -func NewLegacyKeccak512() hash.Hash { return &state{rate: 72, outputLen: 64, dsbyte: 0x01} } - -// Sum224 returns the SHA3-224 digest of the data. -func Sum224(data []byte) (digest [28]byte) { - h := New224() - h.Write(data) - h.Sum(digest[:0]) - return -} - -// Sum256 returns the SHA3-256 digest of the data. -func Sum256(data []byte) (digest [32]byte) { - h := New256() - h.Write(data) - h.Sum(digest[:0]) - return -} - -// Sum384 returns the SHA3-384 digest of the data. -func Sum384(data []byte) (digest [48]byte) { - h := New384() - h.Write(data) - h.Sum(digest[:0]) - return -} - -// Sum512 returns the SHA3-512 digest of the data. -func Sum512(data []byte) (digest [64]byte) { - h := New512() - h.Write(data) - h.Sum(digest[:0]) - return -} diff --git a/src/vendor/golang.org/x/crypto/sha3/hashes_noasm.go b/src/vendor/golang.org/x/crypto/sha3/hashes_noasm.go deleted file mode 100644 index 9d85fb6214..0000000000 --- a/src/vendor/golang.org/x/crypto/sha3/hashes_noasm.go +++ /dev/null @@ -1,23 +0,0 @@ -// Copyright 2023 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -//go:build !gc || purego || !s390x - -package sha3 - -func new224() *state { - return new224Generic() -} - -func new256() *state { - return new256Generic() -} - -func new384() *state { - return new384Generic() -} - -func new512() *state { - return new512Generic() -} diff --git a/src/vendor/golang.org/x/crypto/sha3/keccakf.go b/src/vendor/golang.org/x/crypto/sha3/keccakf.go deleted file mode 100644 index ce48b1dd3e..0000000000 --- a/src/vendor/golang.org/x/crypto/sha3/keccakf.go +++ /dev/null @@ -1,414 +0,0 @@ -// Copyright 2014 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -//go:build !amd64 || purego || !gc - -package sha3 - -import "math/bits" - -// rc stores the round constants for use in the ι step. -var rc = [24]uint64{ - 0x0000000000000001, - 0x0000000000008082, - 0x800000000000808A, - 0x8000000080008000, - 0x000000000000808B, - 0x0000000080000001, - 0x8000000080008081, - 0x8000000000008009, - 0x000000000000008A, - 0x0000000000000088, - 0x0000000080008009, - 0x000000008000000A, - 0x000000008000808B, - 0x800000000000008B, - 0x8000000000008089, - 0x8000000000008003, - 0x8000000000008002, - 0x8000000000000080, - 0x000000000000800A, - 0x800000008000000A, - 0x8000000080008081, - 0x8000000000008080, - 0x0000000080000001, - 0x8000000080008008, -} - -// keccakF1600 applies the Keccak permutation to a 1600b-wide -// state represented as a slice of 25 uint64s. -func keccakF1600(a *[25]uint64) { - // Implementation translated from Keccak-inplace.c - // in the keccak reference code. - var t, bc0, bc1, bc2, bc3, bc4, d0, d1, d2, d3, d4 uint64 - - for i := 0; i < 24; i += 4 { - // Combines the 5 steps in each round into 2 steps. - // Unrolls 4 rounds per loop and spreads some steps across rounds. - - // Round 1 - bc0 = a[0] ^ a[5] ^ a[10] ^ a[15] ^ a[20] - bc1 = a[1] ^ a[6] ^ a[11] ^ a[16] ^ a[21] - bc2 = a[2] ^ a[7] ^ a[12] ^ a[17] ^ a[22] - bc3 = a[3] ^ a[8] ^ a[13] ^ a[18] ^ a[23] - bc4 = a[4] ^ a[9] ^ a[14] ^ a[19] ^ a[24] - d0 = bc4 ^ (bc1<<1 | bc1>>63) - d1 = bc0 ^ (bc2<<1 | bc2>>63) - d2 = bc1 ^ (bc3<<1 | bc3>>63) - d3 = bc2 ^ (bc4<<1 | bc4>>63) - d4 = bc3 ^ (bc0<<1 | bc0>>63) - - bc0 = a[0] ^ d0 - t = a[6] ^ d1 - bc1 = bits.RotateLeft64(t, 44) - t = a[12] ^ d2 - bc2 = bits.RotateLeft64(t, 43) - t = a[18] ^ d3 - bc3 = bits.RotateLeft64(t, 21) - t = a[24] ^ d4 - bc4 = bits.RotateLeft64(t, 14) - a[0] = bc0 ^ (bc2 &^ bc1) ^ rc[i] - a[6] = bc1 ^ (bc3 &^ bc2) - a[12] = bc2 ^ (bc4 &^ bc3) - a[18] = bc3 ^ (bc0 &^ bc4) - a[24] = bc4 ^ (bc1 &^ bc0) - - t = a[10] ^ d0 - bc2 = bits.RotateLeft64(t, 3) - t = a[16] ^ d1 - bc3 = bits.RotateLeft64(t, 45) - t = a[22] ^ d2 - bc4 = bits.RotateLeft64(t, 61) - t = a[3] ^ d3 - bc0 = bits.RotateLeft64(t, 28) - t = a[9] ^ d4 - bc1 = bits.RotateLeft64(t, 20) - a[10] = bc0 ^ (bc2 &^ bc1) - a[16] = bc1 ^ (bc3 &^ bc2) - a[22] = bc2 ^ (bc4 &^ bc3) - a[3] = bc3 ^ (bc0 &^ bc4) - a[9] = bc4 ^ (bc1 &^ bc0) - - t = a[20] ^ d0 - bc4 = bits.RotateLeft64(t, 18) - t = a[1] ^ d1 - bc0 = bits.RotateLeft64(t, 1) - t = a[7] ^ d2 - bc1 = bits.RotateLeft64(t, 6) - t = a[13] ^ d3 - bc2 = bits.RotateLeft64(t, 25) - t = a[19] ^ d4 - bc3 = bits.RotateLeft64(t, 8) - a[20] = bc0 ^ (bc2 &^ bc1) - a[1] = bc1 ^ (bc3 &^ bc2) - a[7] = bc2 ^ (bc4 &^ bc3) - a[13] = bc3 ^ (bc0 &^ bc4) - a[19] = bc4 ^ (bc1 &^ bc0) - - t = a[5] ^ d0 - bc1 = bits.RotateLeft64(t, 36) - t = a[11] ^ d1 - bc2 = bits.RotateLeft64(t, 10) - t = a[17] ^ d2 - bc3 = bits.RotateLeft64(t, 15) - t = a[23] ^ d3 - bc4 = bits.RotateLeft64(t, 56) - t = a[4] ^ d4 - bc0 = bits.RotateLeft64(t, 27) - a[5] = bc0 ^ (bc2 &^ bc1) - a[11] = bc1 ^ (bc3 &^ bc2) - a[17] = bc2 ^ (bc4 &^ bc3) - a[23] = bc3 ^ (bc0 &^ bc4) - a[4] = bc4 ^ (bc1 &^ bc0) - - t = a[15] ^ d0 - bc3 = bits.RotateLeft64(t, 41) - t = a[21] ^ d1 - bc4 = bits.RotateLeft64(t, 2) - t = a[2] ^ d2 - bc0 = bits.RotateLeft64(t, 62) - t = a[8] ^ d3 - bc1 = bits.RotateLeft64(t, 55) - t = a[14] ^ d4 - bc2 = bits.RotateLeft64(t, 39) - a[15] = bc0 ^ (bc2 &^ bc1) - a[21] = bc1 ^ (bc3 &^ bc2) - a[2] = bc2 ^ (bc4 &^ bc3) - a[8] = bc3 ^ (bc0 &^ bc4) - a[14] = bc4 ^ (bc1 &^ bc0) - - // Round 2 - bc0 = a[0] ^ a[5] ^ a[10] ^ a[15] ^ a[20] - bc1 = a[1] ^ a[6] ^ a[11] ^ a[16] ^ a[21] - bc2 = a[2] ^ a[7] ^ a[12] ^ a[17] ^ a[22] - bc3 = a[3] ^ a[8] ^ a[13] ^ a[18] ^ a[23] - bc4 = a[4] ^ a[9] ^ a[14] ^ a[19] ^ a[24] - d0 = bc4 ^ (bc1<<1 | bc1>>63) - d1 = bc0 ^ (bc2<<1 | bc2>>63) - d2 = bc1 ^ (bc3<<1 | bc3>>63) - d3 = bc2 ^ (bc4<<1 | bc4>>63) - d4 = bc3 ^ (bc0<<1 | bc0>>63) - - bc0 = a[0] ^ d0 - t = a[16] ^ d1 - bc1 = bits.RotateLeft64(t, 44) - t = a[7] ^ d2 - bc2 = bits.RotateLeft64(t, 43) - t = a[23] ^ d3 - bc3 = bits.RotateLeft64(t, 21) - t = a[14] ^ d4 - bc4 = bits.RotateLeft64(t, 14) - a[0] = bc0 ^ (bc2 &^ bc1) ^ rc[i+1] - a[16] = bc1 ^ (bc3 &^ bc2) - a[7] = bc2 ^ (bc4 &^ bc3) - a[23] = bc3 ^ (bc0 &^ bc4) - a[14] = bc4 ^ (bc1 &^ bc0) - - t = a[20] ^ d0 - bc2 = bits.RotateLeft64(t, 3) - t = a[11] ^ d1 - bc3 = bits.RotateLeft64(t, 45) - t = a[2] ^ d2 - bc4 = bits.RotateLeft64(t, 61) - t = a[18] ^ d3 - bc0 = bits.RotateLeft64(t, 28) - t = a[9] ^ d4 - bc1 = bits.RotateLeft64(t, 20) - a[20] = bc0 ^ (bc2 &^ bc1) - a[11] = bc1 ^ (bc3 &^ bc2) - a[2] = bc2 ^ (bc4 &^ bc3) - a[18] = bc3 ^ (bc0 &^ bc4) - a[9] = bc4 ^ (bc1 &^ bc0) - - t = a[15] ^ d0 - bc4 = bits.RotateLeft64(t, 18) - t = a[6] ^ d1 - bc0 = bits.RotateLeft64(t, 1) - t = a[22] ^ d2 - bc1 = bits.RotateLeft64(t, 6) - t = a[13] ^ d3 - bc2 = bits.RotateLeft64(t, 25) - t = a[4] ^ d4 - bc3 = bits.RotateLeft64(t, 8) - a[15] = bc0 ^ (bc2 &^ bc1) - a[6] = bc1 ^ (bc3 &^ bc2) - a[22] = bc2 ^ (bc4 &^ bc3) - a[13] = bc3 ^ (bc0 &^ bc4) - a[4] = bc4 ^ (bc1 &^ bc0) - - t = a[10] ^ d0 - bc1 = bits.RotateLeft64(t, 36) - t = a[1] ^ d1 - bc2 = bits.RotateLeft64(t, 10) - t = a[17] ^ d2 - bc3 = bits.RotateLeft64(t, 15) - t = a[8] ^ d3 - bc4 = bits.RotateLeft64(t, 56) - t = a[24] ^ d4 - bc0 = bits.RotateLeft64(t, 27) - a[10] = bc0 ^ (bc2 &^ bc1) - a[1] = bc1 ^ (bc3 &^ bc2) - a[17] = bc2 ^ (bc4 &^ bc3) - a[8] = bc3 ^ (bc0 &^ bc4) - a[24] = bc4 ^ (bc1 &^ bc0) - - t = a[5] ^ d0 - bc3 = bits.RotateLeft64(t, 41) - t = a[21] ^ d1 - bc4 = bits.RotateLeft64(t, 2) - t = a[12] ^ d2 - bc0 = bits.RotateLeft64(t, 62) - t = a[3] ^ d3 - bc1 = bits.RotateLeft64(t, 55) - t = a[19] ^ d4 - bc2 = bits.RotateLeft64(t, 39) - a[5] = bc0 ^ (bc2 &^ bc1) - a[21] = bc1 ^ (bc3 &^ bc2) - a[12] = bc2 ^ (bc4 &^ bc3) - a[3] = bc3 ^ (bc0 &^ bc4) - a[19] = bc4 ^ (bc1 &^ bc0) - - // Round 3 - bc0 = a[0] ^ a[5] ^ a[10] ^ a[15] ^ a[20] - bc1 = a[1] ^ a[6] ^ a[11] ^ a[16] ^ a[21] - bc2 = a[2] ^ a[7] ^ a[12] ^ a[17] ^ a[22] - bc3 = a[3] ^ a[8] ^ a[13] ^ a[18] ^ a[23] - bc4 = a[4] ^ a[9] ^ a[14] ^ a[19] ^ a[24] - d0 = bc4 ^ (bc1<<1 | bc1>>63) - d1 = bc0 ^ (bc2<<1 | bc2>>63) - d2 = bc1 ^ (bc3<<1 | bc3>>63) - d3 = bc2 ^ (bc4<<1 | bc4>>63) - d4 = bc3 ^ (bc0<<1 | bc0>>63) - - bc0 = a[0] ^ d0 - t = a[11] ^ d1 - bc1 = bits.RotateLeft64(t, 44) - t = a[22] ^ d2 - bc2 = bits.RotateLeft64(t, 43) - t = a[8] ^ d3 - bc3 = bits.RotateLeft64(t, 21) - t = a[19] ^ d4 - bc4 = bits.RotateLeft64(t, 14) - a[0] = bc0 ^ (bc2 &^ bc1) ^ rc[i+2] - a[11] = bc1 ^ (bc3 &^ bc2) - a[22] = bc2 ^ (bc4 &^ bc3) - a[8] = bc3 ^ (bc0 &^ bc4) - a[19] = bc4 ^ (bc1 &^ bc0) - - t = a[15] ^ d0 - bc2 = bits.RotateLeft64(t, 3) - t = a[1] ^ d1 - bc3 = bits.RotateLeft64(t, 45) - t = a[12] ^ d2 - bc4 = bits.RotateLeft64(t, 61) - t = a[23] ^ d3 - bc0 = bits.RotateLeft64(t, 28) - t = a[9] ^ d4 - bc1 = bits.RotateLeft64(t, 20) - a[15] = bc0 ^ (bc2 &^ bc1) - a[1] = bc1 ^ (bc3 &^ bc2) - a[12] = bc2 ^ (bc4 &^ bc3) - a[23] = bc3 ^ (bc0 &^ bc4) - a[9] = bc4 ^ (bc1 &^ bc0) - - t = a[5] ^ d0 - bc4 = bits.RotateLeft64(t, 18) - t = a[16] ^ d1 - bc0 = bits.RotateLeft64(t, 1) - t = a[2] ^ d2 - bc1 = bits.RotateLeft64(t, 6) - t = a[13] ^ d3 - bc2 = bits.RotateLeft64(t, 25) - t = a[24] ^ d4 - bc3 = bits.RotateLeft64(t, 8) - a[5] = bc0 ^ (bc2 &^ bc1) - a[16] = bc1 ^ (bc3 &^ bc2) - a[2] = bc2 ^ (bc4 &^ bc3) - a[13] = bc3 ^ (bc0 &^ bc4) - a[24] = bc4 ^ (bc1 &^ bc0) - - t = a[20] ^ d0 - bc1 = bits.RotateLeft64(t, 36) - t = a[6] ^ d1 - bc2 = bits.RotateLeft64(t, 10) - t = a[17] ^ d2 - bc3 = bits.RotateLeft64(t, 15) - t = a[3] ^ d3 - bc4 = bits.RotateLeft64(t, 56) - t = a[14] ^ d4 - bc0 = bits.RotateLeft64(t, 27) - a[20] = bc0 ^ (bc2 &^ bc1) - a[6] = bc1 ^ (bc3 &^ bc2) - a[17] = bc2 ^ (bc4 &^ bc3) - a[3] = bc3 ^ (bc0 &^ bc4) - a[14] = bc4 ^ (bc1 &^ bc0) - - t = a[10] ^ d0 - bc3 = bits.RotateLeft64(t, 41) - t = a[21] ^ d1 - bc4 = bits.RotateLeft64(t, 2) - t = a[7] ^ d2 - bc0 = bits.RotateLeft64(t, 62) - t = a[18] ^ d3 - bc1 = bits.RotateLeft64(t, 55) - t = a[4] ^ d4 - bc2 = bits.RotateLeft64(t, 39) - a[10] = bc0 ^ (bc2 &^ bc1) - a[21] = bc1 ^ (bc3 &^ bc2) - a[7] = bc2 ^ (bc4 &^ bc3) - a[18] = bc3 ^ (bc0 &^ bc4) - a[4] = bc4 ^ (bc1 &^ bc0) - - // Round 4 - bc0 = a[0] ^ a[5] ^ a[10] ^ a[15] ^ a[20] - bc1 = a[1] ^ a[6] ^ a[11] ^ a[16] ^ a[21] - bc2 = a[2] ^ a[7] ^ a[12] ^ a[17] ^ a[22] - bc3 = a[3] ^ a[8] ^ a[13] ^ a[18] ^ a[23] - bc4 = a[4] ^ a[9] ^ a[14] ^ a[19] ^ a[24] - d0 = bc4 ^ (bc1<<1 | bc1>>63) - d1 = bc0 ^ (bc2<<1 | bc2>>63) - d2 = bc1 ^ (bc3<<1 | bc3>>63) - d3 = bc2 ^ (bc4<<1 | bc4>>63) - d4 = bc3 ^ (bc0<<1 | bc0>>63) - - bc0 = a[0] ^ d0 - t = a[1] ^ d1 - bc1 = bits.RotateLeft64(t, 44) - t = a[2] ^ d2 - bc2 = bits.RotateLeft64(t, 43) - t = a[3] ^ d3 - bc3 = bits.RotateLeft64(t, 21) - t = a[4] ^ d4 - bc4 = bits.RotateLeft64(t, 14) - a[0] = bc0 ^ (bc2 &^ bc1) ^ rc[i+3] - a[1] = bc1 ^ (bc3 &^ bc2) - a[2] = bc2 ^ (bc4 &^ bc3) - a[3] = bc3 ^ (bc0 &^ bc4) - a[4] = bc4 ^ (bc1 &^ bc0) - - t = a[5] ^ d0 - bc2 = bits.RotateLeft64(t, 3) - t = a[6] ^ d1 - bc3 = bits.RotateLeft64(t, 45) - t = a[7] ^ d2 - bc4 = bits.RotateLeft64(t, 61) - t = a[8] ^ d3 - bc0 = bits.RotateLeft64(t, 28) - t = a[9] ^ d4 - bc1 = bits.RotateLeft64(t, 20) - a[5] = bc0 ^ (bc2 &^ bc1) - a[6] = bc1 ^ (bc3 &^ bc2) - a[7] = bc2 ^ (bc4 &^ bc3) - a[8] = bc3 ^ (bc0 &^ bc4) - a[9] = bc4 ^ (bc1 &^ bc0) - - t = a[10] ^ d0 - bc4 = bits.RotateLeft64(t, 18) - t = a[11] ^ d1 - bc0 = bits.RotateLeft64(t, 1) - t = a[12] ^ d2 - bc1 = bits.RotateLeft64(t, 6) - t = a[13] ^ d3 - bc2 = bits.RotateLeft64(t, 25) - t = a[14] ^ d4 - bc3 = bits.RotateLeft64(t, 8) - a[10] = bc0 ^ (bc2 &^ bc1) - a[11] = bc1 ^ (bc3 &^ bc2) - a[12] = bc2 ^ (bc4 &^ bc3) - a[13] = bc3 ^ (bc0 &^ bc4) - a[14] = bc4 ^ (bc1 &^ bc0) - - t = a[15] ^ d0 - bc1 = bits.RotateLeft64(t, 36) - t = a[16] ^ d1 - bc2 = bits.RotateLeft64(t, 10) - t = a[17] ^ d2 - bc3 = bits.RotateLeft64(t, 15) - t = a[18] ^ d3 - bc4 = bits.RotateLeft64(t, 56) - t = a[19] ^ d4 - bc0 = bits.RotateLeft64(t, 27) - a[15] = bc0 ^ (bc2 &^ bc1) - a[16] = bc1 ^ (bc3 &^ bc2) - a[17] = bc2 ^ (bc4 &^ bc3) - a[18] = bc3 ^ (bc0 &^ bc4) - a[19] = bc4 ^ (bc1 &^ bc0) - - t = a[20] ^ d0 - bc3 = bits.RotateLeft64(t, 41) - t = a[21] ^ d1 - bc4 = bits.RotateLeft64(t, 2) - t = a[22] ^ d2 - bc0 = bits.RotateLeft64(t, 62) - t = a[23] ^ d3 - bc1 = bits.RotateLeft64(t, 55) - t = a[24] ^ d4 - bc2 = bits.RotateLeft64(t, 39) - a[20] = bc0 ^ (bc2 &^ bc1) - a[21] = bc1 ^ (bc3 &^ bc2) - a[22] = bc2 ^ (bc4 &^ bc3) - a[23] = bc3 ^ (bc0 &^ bc4) - a[24] = bc4 ^ (bc1 &^ bc0) - } -} diff --git a/src/vendor/golang.org/x/crypto/sha3/keccakf_amd64.go b/src/vendor/golang.org/x/crypto/sha3/keccakf_amd64.go deleted file mode 100644 index b908696be5..0000000000 --- a/src/vendor/golang.org/x/crypto/sha3/keccakf_amd64.go +++ /dev/null @@ -1,13 +0,0 @@ -// Copyright 2015 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -//go:build amd64 && !purego && gc - -package sha3 - -// This function is implemented in keccakf_amd64.s. - -//go:noescape - -func keccakF1600(a *[25]uint64) diff --git a/src/vendor/golang.org/x/crypto/sha3/keccakf_amd64.s b/src/vendor/golang.org/x/crypto/sha3/keccakf_amd64.s deleted file mode 100644 index 99e2f16e97..0000000000 --- a/src/vendor/golang.org/x/crypto/sha3/keccakf_amd64.s +++ /dev/null @@ -1,5419 +0,0 @@ -// Code generated by command: go run keccakf_amd64_asm.go -out ../keccakf_amd64.s -pkg sha3. DO NOT EDIT. - -//go:build amd64 && !purego && gc - -// func keccakF1600(a *[25]uint64) -TEXT ·keccakF1600(SB), $200-8 - MOVQ a+0(FP), DI - - // Convert the user state into an internal state - NOTQ 8(DI) - NOTQ 16(DI) - NOTQ 64(DI) - NOTQ 96(DI) - NOTQ 136(DI) - NOTQ 160(DI) - - // Execute the KeccakF permutation - MOVQ (DI), SI - MOVQ 8(DI), BP - MOVQ 32(DI), R15 - XORQ 40(DI), SI - XORQ 48(DI), BP - XORQ 72(DI), R15 - XORQ 80(DI), SI - XORQ 88(DI), BP - XORQ 112(DI), R15 - XORQ 120(DI), SI - XORQ 128(DI), BP - XORQ 152(DI), R15 - XORQ 160(DI), SI - XORQ 168(DI), BP - MOVQ 176(DI), DX - MOVQ 184(DI), R8 - XORQ 192(DI), R15 - - // Prepare round - MOVQ BP, BX - ROLQ $0x01, BX - MOVQ 16(DI), R12 - XORQ 56(DI), DX - XORQ R15, BX - XORQ 96(DI), R12 - XORQ 136(DI), DX - XORQ DX, R12 - MOVQ R12, CX - ROLQ $0x01, CX - MOVQ 24(DI), R13 - XORQ 64(DI), R8 - XORQ SI, CX - XORQ 104(DI), R13 - XORQ 144(DI), R8 - XORQ R8, R13 - MOVQ R13, DX - ROLQ $0x01, DX - MOVQ R15, R8 - XORQ BP, DX - ROLQ $0x01, R8 - MOVQ SI, R9 - XORQ R12, R8 - ROLQ $0x01, R9 - - // Result b - MOVQ (DI), R10 - MOVQ 48(DI), R11 - XORQ R13, R9 - MOVQ 96(DI), R12 - MOVQ 144(DI), R13 - MOVQ 192(DI), R14 - XORQ CX, R11 - ROLQ $0x2c, R11 - XORQ DX, R12 - XORQ BX, R10 - ROLQ $0x2b, R12 - MOVQ R11, SI - MOVQ $0x0000000000000001, AX - ORQ R12, SI - XORQ R10, AX - XORQ AX, SI - MOVQ SI, (SP) - XORQ R9, R14 - ROLQ $0x0e, R14 - MOVQ R10, R15 - ANDQ R11, R15 - XORQ R14, R15 - MOVQ R15, 32(SP) - XORQ R8, R13 - ROLQ $0x15, R13 - MOVQ R13, AX - ANDQ R14, AX - XORQ R12, AX - MOVQ AX, 16(SP) - NOTQ R12 - ORQ R10, R14 - ORQ R13, R12 - XORQ R13, R14 - XORQ R11, R12 - MOVQ R14, 24(SP) - MOVQ R12, 8(SP) - MOVQ R12, BP - - // Result g - MOVQ 72(DI), R11 - XORQ R9, R11 - MOVQ 80(DI), R12 - ROLQ $0x14, R11 - XORQ BX, R12 - ROLQ $0x03, R12 - MOVQ 24(DI), R10 - MOVQ R11, AX - ORQ R12, AX - XORQ R8, R10 - MOVQ 128(DI), R13 - MOVQ 176(DI), R14 - ROLQ $0x1c, R10 - XORQ R10, AX - MOVQ AX, 40(SP) - XORQ AX, SI - XORQ CX, R13 - ROLQ $0x2d, R13 - MOVQ R12, AX - ANDQ R13, AX - XORQ R11, AX - MOVQ AX, 48(SP) - XORQ AX, BP - XORQ DX, R14 - ROLQ $0x3d, R14 - MOVQ R14, AX - ORQ R10, AX - XORQ R13, AX - MOVQ AX, 64(SP) - ANDQ R11, R10 - XORQ R14, R10 - MOVQ R10, 72(SP) - NOTQ R14 - XORQ R10, R15 - ORQ R14, R13 - XORQ R12, R13 - MOVQ R13, 56(SP) - - // Result k - MOVQ 8(DI), R10 - MOVQ 56(DI), R11 - MOVQ 104(DI), R12 - MOVQ 152(DI), R13 - MOVQ 160(DI), R14 - XORQ DX, R11 - ROLQ $0x06, R11 - XORQ R8, R12 - ROLQ $0x19, R12 - MOVQ R11, AX - ORQ R12, AX - XORQ CX, R10 - ROLQ $0x01, R10 - XORQ R10, AX - MOVQ AX, 80(SP) - XORQ AX, SI - XORQ R9, R13 - ROLQ $0x08, R13 - MOVQ R12, AX - ANDQ R13, AX - XORQ R11, AX - MOVQ AX, 88(SP) - XORQ AX, BP - XORQ BX, R14 - ROLQ $0x12, R14 - NOTQ R13 - MOVQ R13, AX - ANDQ R14, AX - XORQ R12, AX - MOVQ AX, 96(SP) - MOVQ R14, AX - ORQ R10, AX - XORQ R13, AX - MOVQ AX, 104(SP) - ANDQ R11, R10 - XORQ R14, R10 - MOVQ R10, 112(SP) - XORQ R10, R15 - - // Result m - MOVQ 40(DI), R11 - XORQ BX, R11 - MOVQ 88(DI), R12 - ROLQ $0x24, R11 - XORQ CX, R12 - MOVQ 32(DI), R10 - ROLQ $0x0a, R12 - MOVQ R11, AX - MOVQ 136(DI), R13 - ANDQ R12, AX - XORQ R9, R10 - MOVQ 184(DI), R14 - ROLQ $0x1b, R10 - XORQ R10, AX - MOVQ AX, 120(SP) - XORQ AX, SI - XORQ DX, R13 - ROLQ $0x0f, R13 - MOVQ R12, AX - ORQ R13, AX - XORQ R11, AX - MOVQ AX, 128(SP) - XORQ AX, BP - XORQ R8, R14 - ROLQ $0x38, R14 - NOTQ R13 - MOVQ R13, AX - ORQ R14, AX - XORQ R12, AX - MOVQ AX, 136(SP) - ORQ R10, R11 - XORQ R14, R11 - MOVQ R11, 152(SP) - ANDQ R10, R14 - XORQ R13, R14 - MOVQ R14, 144(SP) - XORQ R11, R15 - - // Result s - MOVQ 16(DI), R10 - MOVQ 64(DI), R11 - MOVQ 112(DI), R12 - XORQ DX, R10 - MOVQ 120(DI), R13 - ROLQ $0x3e, R10 - XORQ R8, R11 - MOVQ 168(DI), R14 - ROLQ $0x37, R11 - XORQ R9, R12 - MOVQ R10, R9 - XORQ CX, R14 - ROLQ $0x02, R14 - ANDQ R11, R9 - XORQ R14, R9 - MOVQ R9, 192(SP) - ROLQ $0x27, R12 - XORQ R9, R15 - NOTQ R11 - XORQ BX, R13 - MOVQ R11, BX - ANDQ R12, BX - XORQ R10, BX - MOVQ BX, 160(SP) - XORQ BX, SI - ROLQ $0x29, R13 - MOVQ R12, CX - ORQ R13, CX - XORQ R11, CX - MOVQ CX, 168(SP) - XORQ CX, BP - MOVQ R13, DX - MOVQ R14, R8 - ANDQ R14, DX - ORQ R10, R8 - XORQ R12, DX - XORQ R13, R8 - MOVQ DX, 176(SP) - MOVQ R8, 184(SP) - - // Prepare round - MOVQ BP, BX - ROLQ $0x01, BX - MOVQ 16(SP), R12 - XORQ 56(SP), DX - XORQ R15, BX - XORQ 96(SP), R12 - XORQ 136(SP), DX - XORQ DX, R12 - MOVQ R12, CX - ROLQ $0x01, CX - MOVQ 24(SP), R13 - XORQ 64(SP), R8 - XORQ SI, CX - XORQ 104(SP), R13 - XORQ 144(SP), R8 - XORQ R8, R13 - MOVQ R13, DX - ROLQ $0x01, DX - MOVQ R15, R8 - XORQ BP, DX - ROLQ $0x01, R8 - MOVQ SI, R9 - XORQ R12, R8 - ROLQ $0x01, R9 - - // Result b - MOVQ (SP), R10 - MOVQ 48(SP), R11 - XORQ R13, R9 - MOVQ 96(SP), R12 - MOVQ 144(SP), R13 - MOVQ 192(SP), R14 - XORQ CX, R11 - ROLQ $0x2c, R11 - XORQ DX, R12 - XORQ BX, R10 - ROLQ $0x2b, R12 - MOVQ R11, SI - MOVQ $0x0000000000008082, AX - ORQ R12, SI - XORQ R10, AX - XORQ AX, SI - MOVQ SI, (DI) - XORQ R9, R14 - ROLQ $0x0e, R14 - MOVQ R10, R15 - ANDQ R11, R15 - XORQ R14, R15 - MOVQ R15, 32(DI) - XORQ R8, R13 - ROLQ $0x15, R13 - MOVQ R13, AX - ANDQ R14, AX - XORQ R12, AX - MOVQ AX, 16(DI) - NOTQ R12 - ORQ R10, R14 - ORQ R13, R12 - XORQ R13, R14 - XORQ R11, R12 - MOVQ R14, 24(DI) - MOVQ R12, 8(DI) - MOVQ R12, BP - - // Result g - MOVQ 72(SP), R11 - XORQ R9, R11 - MOVQ 80(SP), R12 - ROLQ $0x14, R11 - XORQ BX, R12 - ROLQ $0x03, R12 - MOVQ 24(SP), R10 - MOVQ R11, AX - ORQ R12, AX - XORQ R8, R10 - MOVQ 128(SP), R13 - MOVQ 176(SP), R14 - ROLQ $0x1c, R10 - XORQ R10, AX - MOVQ AX, 40(DI) - XORQ AX, SI - XORQ CX, R13 - ROLQ $0x2d, R13 - MOVQ R12, AX - ANDQ R13, AX - XORQ R11, AX - MOVQ AX, 48(DI) - XORQ AX, BP - XORQ DX, R14 - ROLQ $0x3d, R14 - MOVQ R14, AX - ORQ R10, AX - XORQ R13, AX - MOVQ AX, 64(DI) - ANDQ R11, R10 - XORQ R14, R10 - MOVQ R10, 72(DI) - NOTQ R14 - XORQ R10, R15 - ORQ R14, R13 - XORQ R12, R13 - MOVQ R13, 56(DI) - - // Result k - MOVQ 8(SP), R10 - MOVQ 56(SP), R11 - MOVQ 104(SP), R12 - MOVQ 152(SP), R13 - MOVQ 160(SP), R14 - XORQ DX, R11 - ROLQ $0x06, R11 - XORQ R8, R12 - ROLQ $0x19, R12 - MOVQ R11, AX - ORQ R12, AX - XORQ CX, R10 - ROLQ $0x01, R10 - XORQ R10, AX - MOVQ AX, 80(DI) - XORQ AX, SI - XORQ R9, R13 - ROLQ $0x08, R13 - MOVQ R12, AX - ANDQ R13, AX - XORQ R11, AX - MOVQ AX, 88(DI) - XORQ AX, BP - XORQ BX, R14 - ROLQ $0x12, R14 - NOTQ R13 - MOVQ R13, AX - ANDQ R14, AX - XORQ R12, AX - MOVQ AX, 96(DI) - MOVQ R14, AX - ORQ R10, AX - XORQ R13, AX - MOVQ AX, 104(DI) - ANDQ R11, R10 - XORQ R14, R10 - MOVQ R10, 112(DI) - XORQ R10, R15 - - // Result m - MOVQ 40(SP), R11 - XORQ BX, R11 - MOVQ 88(SP), R12 - ROLQ $0x24, R11 - XORQ CX, R12 - MOVQ 32(SP), R10 - ROLQ $0x0a, R12 - MOVQ R11, AX - MOVQ 136(SP), R13 - ANDQ R12, AX - XORQ R9, R10 - MOVQ 184(SP), R14 - ROLQ $0x1b, R10 - XORQ R10, AX - MOVQ AX, 120(DI) - XORQ AX, SI - XORQ DX, R13 - ROLQ $0x0f, R13 - MOVQ R12, AX - ORQ R13, AX - XORQ R11, AX - MOVQ AX, 128(DI) - XORQ AX, BP - XORQ R8, R14 - ROLQ $0x38, R14 - NOTQ R13 - MOVQ R13, AX - ORQ R14, AX - XORQ R12, AX - MOVQ AX, 136(DI) - ORQ R10, R11 - XORQ R14, R11 - MOVQ R11, 152(DI) - ANDQ R10, R14 - XORQ R13, R14 - MOVQ R14, 144(DI) - XORQ R11, R15 - - // Result s - MOVQ 16(SP), R10 - MOVQ 64(SP), R11 - MOVQ 112(SP), R12 - XORQ DX, R10 - MOVQ 120(SP), R13 - ROLQ $0x3e, R10 - XORQ R8, R11 - MOVQ 168(SP), R14 - ROLQ $0x37, R11 - XORQ R9, R12 - MOVQ R10, R9 - XORQ CX, R14 - ROLQ $0x02, R14 - ANDQ R11, R9 - XORQ R14, R9 - MOVQ R9, 192(DI) - ROLQ $0x27, R12 - XORQ R9, R15 - NOTQ R11 - XORQ BX, R13 - MOVQ R11, BX - ANDQ R12, BX - XORQ R10, BX - MOVQ BX, 160(DI) - XORQ BX, SI - ROLQ $0x29, R13 - MOVQ R12, CX - ORQ R13, CX - XORQ R11, CX - MOVQ CX, 168(DI) - XORQ CX, BP - MOVQ R13, DX - MOVQ R14, R8 - ANDQ R14, DX - ORQ R10, R8 - XORQ R12, DX - XORQ R13, R8 - MOVQ DX, 176(DI) - MOVQ R8, 184(DI) - - // Prepare round - MOVQ BP, BX - ROLQ $0x01, BX - MOVQ 16(DI), R12 - XORQ 56(DI), DX - XORQ R15, BX - XORQ 96(DI), R12 - XORQ 136(DI), DX - XORQ DX, R12 - MOVQ R12, CX - ROLQ $0x01, CX - MOVQ 24(DI), R13 - XORQ 64(DI), R8 - XORQ SI, CX - XORQ 104(DI), R13 - XORQ 144(DI), R8 - XORQ R8, R13 - MOVQ R13, DX - ROLQ $0x01, DX - MOVQ R15, R8 - XORQ BP, DX - ROLQ $0x01, R8 - MOVQ SI, R9 - XORQ R12, R8 - ROLQ $0x01, R9 - - // Result b - MOVQ (DI), R10 - MOVQ 48(DI), R11 - XORQ R13, R9 - MOVQ 96(DI), R12 - MOVQ 144(DI), R13 - MOVQ 192(DI), R14 - XORQ CX, R11 - ROLQ $0x2c, R11 - XORQ DX, R12 - XORQ BX, R10 - ROLQ $0x2b, R12 - MOVQ R11, SI - MOVQ $0x800000000000808a, AX - ORQ R12, SI - XORQ R10, AX - XORQ AX, SI - MOVQ SI, (SP) - XORQ R9, R14 - ROLQ $0x0e, R14 - MOVQ R10, R15 - ANDQ R11, R15 - XORQ R14, R15 - MOVQ R15, 32(SP) - XORQ R8, R13 - ROLQ $0x15, R13 - MOVQ R13, AX - ANDQ R14, AX - XORQ R12, AX - MOVQ AX, 16(SP) - NOTQ R12 - ORQ R10, R14 - ORQ R13, R12 - XORQ R13, R14 - XORQ R11, R12 - MOVQ R14, 24(SP) - MOVQ R12, 8(SP) - MOVQ R12, BP - - // Result g - MOVQ 72(DI), R11 - XORQ R9, R11 - MOVQ 80(DI), R12 - ROLQ $0x14, R11 - XORQ BX, R12 - ROLQ $0x03, R12 - MOVQ 24(DI), R10 - MOVQ R11, AX - ORQ R12, AX - XORQ R8, R10 - MOVQ 128(DI), R13 - MOVQ 176(DI), R14 - ROLQ $0x1c, R10 - XORQ R10, AX - MOVQ AX, 40(SP) - XORQ AX, SI - XORQ CX, R13 - ROLQ $0x2d, R13 - MOVQ R12, AX - ANDQ R13, AX - XORQ R11, AX - MOVQ AX, 48(SP) - XORQ AX, BP - XORQ DX, R14 - ROLQ $0x3d, R14 - MOVQ R14, AX - ORQ R10, AX - XORQ R13, AX - MOVQ AX, 64(SP) - ANDQ R11, R10 - XORQ R14, R10 - MOVQ R10, 72(SP) - NOTQ R14 - XORQ R10, R15 - ORQ R14, R13 - XORQ R12, R13 - MOVQ R13, 56(SP) - - // Result k - MOVQ 8(DI), R10 - MOVQ 56(DI), R11 - MOVQ 104(DI), R12 - MOVQ 152(DI), R13 - MOVQ 160(DI), R14 - XORQ DX, R11 - ROLQ $0x06, R11 - XORQ R8, R12 - ROLQ $0x19, R12 - MOVQ R11, AX - ORQ R12, AX - XORQ CX, R10 - ROLQ $0x01, R10 - XORQ R10, AX - MOVQ AX, 80(SP) - XORQ AX, SI - XORQ R9, R13 - ROLQ $0x08, R13 - MOVQ R12, AX - ANDQ R13, AX - XORQ R11, AX - MOVQ AX, 88(SP) - XORQ AX, BP - XORQ BX, R14 - ROLQ $0x12, R14 - NOTQ R13 - MOVQ R13, AX - ANDQ R14, AX - XORQ R12, AX - MOVQ AX, 96(SP) - MOVQ R14, AX - ORQ R10, AX - XORQ R13, AX - MOVQ AX, 104(SP) - ANDQ R11, R10 - XORQ R14, R10 - MOVQ R10, 112(SP) - XORQ R10, R15 - - // Result m - MOVQ 40(DI), R11 - XORQ BX, R11 - MOVQ 88(DI), R12 - ROLQ $0x24, R11 - XORQ CX, R12 - MOVQ 32(DI), R10 - ROLQ $0x0a, R12 - MOVQ R11, AX - MOVQ 136(DI), R13 - ANDQ R12, AX - XORQ R9, R10 - MOVQ 184(DI), R14 - ROLQ $0x1b, R10 - XORQ R10, AX - MOVQ AX, 120(SP) - XORQ AX, SI - XORQ DX, R13 - ROLQ $0x0f, R13 - MOVQ R12, AX - ORQ R13, AX - XORQ R11, AX - MOVQ AX, 128(SP) - XORQ AX, BP - XORQ R8, R14 - ROLQ $0x38, R14 - NOTQ R13 - MOVQ R13, AX - ORQ R14, AX - XORQ R12, AX - MOVQ AX, 136(SP) - ORQ R10, R11 - XORQ R14, R11 - MOVQ R11, 152(SP) - ANDQ R10, R14 - XORQ R13, R14 - MOVQ R14, 144(SP) - XORQ R11, R15 - - // Result s - MOVQ 16(DI), R10 - MOVQ 64(DI), R11 - MOVQ 112(DI), R12 - XORQ DX, R10 - MOVQ 120(DI), R13 - ROLQ $0x3e, R10 - XORQ R8, R11 - MOVQ 168(DI), R14 - ROLQ $0x37, R11 - XORQ R9, R12 - MOVQ R10, R9 - XORQ CX, R14 - ROLQ $0x02, R14 - ANDQ R11, R9 - XORQ R14, R9 - MOVQ R9, 192(SP) - ROLQ $0x27, R12 - XORQ R9, R15 - NOTQ R11 - XORQ BX, R13 - MOVQ R11, BX - ANDQ R12, BX - XORQ R10, BX - MOVQ BX, 160(SP) - XORQ BX, SI - ROLQ $0x29, R13 - MOVQ R12, CX - ORQ R13, CX - XORQ R11, CX - MOVQ CX, 168(SP) - XORQ CX, BP - MOVQ R13, DX - MOVQ R14, R8 - ANDQ R14, DX - ORQ R10, R8 - XORQ R12, DX - XORQ R13, R8 - MOVQ DX, 176(SP) - MOVQ R8, 184(SP) - - // Prepare round - MOVQ BP, BX - ROLQ $0x01, BX - MOVQ 16(SP), R12 - XORQ 56(SP), DX - XORQ R15, BX - XORQ 96(SP), R12 - XORQ 136(SP), DX - XORQ DX, R12 - MOVQ R12, CX - ROLQ $0x01, CX - MOVQ 24(SP), R13 - XORQ 64(SP), R8 - XORQ SI, CX - XORQ 104(SP), R13 - XORQ 144(SP), R8 - XORQ R8, R13 - MOVQ R13, DX - ROLQ $0x01, DX - MOVQ R15, R8 - XORQ BP, DX - ROLQ $0x01, R8 - MOVQ SI, R9 - XORQ R12, R8 - ROLQ $0x01, R9 - - // Result b - MOVQ (SP), R10 - MOVQ 48(SP), R11 - XORQ R13, R9 - MOVQ 96(SP), R12 - MOVQ 144(SP), R13 - MOVQ 192(SP), R14 - XORQ CX, R11 - ROLQ $0x2c, R11 - XORQ DX, R12 - XORQ BX, R10 - ROLQ $0x2b, R12 - MOVQ R11, SI - MOVQ $0x8000000080008000, AX - ORQ R12, SI - XORQ R10, AX - XORQ AX, SI - MOVQ SI, (DI) - XORQ R9, R14 - ROLQ $0x0e, R14 - MOVQ R10, R15 - ANDQ R11, R15 - XORQ R14, R15 - MOVQ R15, 32(DI) - XORQ R8, R13 - ROLQ $0x15, R13 - MOVQ R13, AX - ANDQ R14, AX - XORQ R12, AX - MOVQ AX, 16(DI) - NOTQ R12 - ORQ R10, R14 - ORQ R13, R12 - XORQ R13, R14 - XORQ R11, R12 - MOVQ R14, 24(DI) - MOVQ R12, 8(DI) - MOVQ R12, BP - - // Result g - MOVQ 72(SP), R11 - XORQ R9, R11 - MOVQ 80(SP), R12 - ROLQ $0x14, R11 - XORQ BX, R12 - ROLQ $0x03, R12 - MOVQ 24(SP), R10 - MOVQ R11, AX - ORQ R12, AX - XORQ R8, R10 - MOVQ 128(SP), R13 - MOVQ 176(SP), R14 - ROLQ $0x1c, R10 - XORQ R10, AX - MOVQ AX, 40(DI) - XORQ AX, SI - XORQ CX, R13 - ROLQ $0x2d, R13 - MOVQ R12, AX - ANDQ R13, AX - XORQ R11, AX - MOVQ AX, 48(DI) - XORQ AX, BP - XORQ DX, R14 - ROLQ $0x3d, R14 - MOVQ R14, AX - ORQ R10, AX - XORQ R13, AX - MOVQ AX, 64(DI) - ANDQ R11, R10 - XORQ R14, R10 - MOVQ R10, 72(DI) - NOTQ R14 - XORQ R10, R15 - ORQ R14, R13 - XORQ R12, R13 - MOVQ R13, 56(DI) - - // Result k - MOVQ 8(SP), R10 - MOVQ 56(SP), R11 - MOVQ 104(SP), R12 - MOVQ 152(SP), R13 - MOVQ 160(SP), R14 - XORQ DX, R11 - ROLQ $0x06, R11 - XORQ R8, R12 - ROLQ $0x19, R12 - MOVQ R11, AX - ORQ R12, AX - XORQ CX, R10 - ROLQ $0x01, R10 - XORQ R10, AX - MOVQ AX, 80(DI) - XORQ AX, SI - XORQ R9, R13 - ROLQ $0x08, R13 - MOVQ R12, AX - ANDQ R13, AX - XORQ R11, AX - MOVQ AX, 88(DI) - XORQ AX, BP - XORQ BX, R14 - ROLQ $0x12, R14 - NOTQ R13 - MOVQ R13, AX - ANDQ R14, AX - XORQ R12, AX - MOVQ AX, 96(DI) - MOVQ R14, AX - ORQ R10, AX - XORQ R13, AX - MOVQ AX, 104(DI) - ANDQ R11, R10 - XORQ R14, R10 - MOVQ R10, 112(DI) - XORQ R10, R15 - - // Result m - MOVQ 40(SP), R11 - XORQ BX, R11 - MOVQ 88(SP), R12 - ROLQ $0x24, R11 - XORQ CX, R12 - MOVQ 32(SP), R10 - ROLQ $0x0a, R12 - MOVQ R11, AX - MOVQ 136(SP), R13 - ANDQ R12, AX - XORQ R9, R10 - MOVQ 184(SP), R14 - ROLQ $0x1b, R10 - XORQ R10, AX - MOVQ AX, 120(DI) - XORQ AX, SI - XORQ DX, R13 - ROLQ $0x0f, R13 - MOVQ R12, AX - ORQ R13, AX - XORQ R11, AX - MOVQ AX, 128(DI) - XORQ AX, BP - XORQ R8, R14 - ROLQ $0x38, R14 - NOTQ R13 - MOVQ R13, AX - ORQ R14, AX - XORQ R12, AX - MOVQ AX, 136(DI) - ORQ R10, R11 - XORQ R14, R11 - MOVQ R11, 152(DI) - ANDQ R10, R14 - XORQ R13, R14 - MOVQ R14, 144(DI) - XORQ R11, R15 - - // Result s - MOVQ 16(SP), R10 - MOVQ 64(SP), R11 - MOVQ 112(SP), R12 - XORQ DX, R10 - MOVQ 120(SP), R13 - ROLQ $0x3e, R10 - XORQ R8, R11 - MOVQ 168(SP), R14 - ROLQ $0x37, R11 - XORQ R9, R12 - MOVQ R10, R9 - XORQ CX, R14 - ROLQ $0x02, R14 - ANDQ R11, R9 - XORQ R14, R9 - MOVQ R9, 192(DI) - ROLQ $0x27, R12 - XORQ R9, R15 - NOTQ R11 - XORQ BX, R13 - MOVQ R11, BX - ANDQ R12, BX - XORQ R10, BX - MOVQ BX, 160(DI) - XORQ BX, SI - ROLQ $0x29, R13 - MOVQ R12, CX - ORQ R13, CX - XORQ R11, CX - MOVQ CX, 168(DI) - XORQ CX, BP - MOVQ R13, DX - MOVQ R14, R8 - ANDQ R14, DX - ORQ R10, R8 - XORQ R12, DX - XORQ R13, R8 - MOVQ DX, 176(DI) - MOVQ R8, 184(DI) - - // Prepare round - MOVQ BP, BX - ROLQ $0x01, BX - MOVQ 16(DI), R12 - XORQ 56(DI), DX - XORQ R15, BX - XORQ 96(DI), R12 - XORQ 136(DI), DX - XORQ DX, R12 - MOVQ R12, CX - ROLQ $0x01, CX - MOVQ 24(DI), R13 - XORQ 64(DI), R8 - XORQ SI, CX - XORQ 104(DI), R13 - XORQ 144(DI), R8 - XORQ R8, R13 - MOVQ R13, DX - ROLQ $0x01, DX - MOVQ R15, R8 - XORQ BP, DX - ROLQ $0x01, R8 - MOVQ SI, R9 - XORQ R12, R8 - ROLQ $0x01, R9 - - // Result b - MOVQ (DI), R10 - MOVQ 48(DI), R11 - XORQ R13, R9 - MOVQ 96(DI), R12 - MOVQ 144(DI), R13 - MOVQ 192(DI), R14 - XORQ CX, R11 - ROLQ $0x2c, R11 - XORQ DX, R12 - XORQ BX, R10 - ROLQ $0x2b, R12 - MOVQ R11, SI - MOVQ $0x000000000000808b, AX - ORQ R12, SI - XORQ R10, AX - XORQ AX, SI - MOVQ SI, (SP) - XORQ R9, R14 - ROLQ $0x0e, R14 - MOVQ R10, R15 - ANDQ R11, R15 - XORQ R14, R15 - MOVQ R15, 32(SP) - XORQ R8, R13 - ROLQ $0x15, R13 - MOVQ R13, AX - ANDQ R14, AX - XORQ R12, AX - MOVQ AX, 16(SP) - NOTQ R12 - ORQ R10, R14 - ORQ R13, R12 - XORQ R13, R14 - XORQ R11, R12 - MOVQ R14, 24(SP) - MOVQ R12, 8(SP) - MOVQ R12, BP - - // Result g - MOVQ 72(DI), R11 - XORQ R9, R11 - MOVQ 80(DI), R12 - ROLQ $0x14, R11 - XORQ BX, R12 - ROLQ $0x03, R12 - MOVQ 24(DI), R10 - MOVQ R11, AX - ORQ R12, AX - XORQ R8, R10 - MOVQ 128(DI), R13 - MOVQ 176(DI), R14 - ROLQ $0x1c, R10 - XORQ R10, AX - MOVQ AX, 40(SP) - XORQ AX, SI - XORQ CX, R13 - ROLQ $0x2d, R13 - MOVQ R12, AX - ANDQ R13, AX - XORQ R11, AX - MOVQ AX, 48(SP) - XORQ AX, BP - XORQ DX, R14 - ROLQ $0x3d, R14 - MOVQ R14, AX - ORQ R10, AX - XORQ R13, AX - MOVQ AX, 64(SP) - ANDQ R11, R10 - XORQ R14, R10 - MOVQ R10, 72(SP) - NOTQ R14 - XORQ R10, R15 - ORQ R14, R13 - XORQ R12, R13 - MOVQ R13, 56(SP) - - // Result k - MOVQ 8(DI), R10 - MOVQ 56(DI), R11 - MOVQ 104(DI), R12 - MOVQ 152(DI), R13 - MOVQ 160(DI), R14 - XORQ DX, R11 - ROLQ $0x06, R11 - XORQ R8, R12 - ROLQ $0x19, R12 - MOVQ R11, AX - ORQ R12, AX - XORQ CX, R10 - ROLQ $0x01, R10 - XORQ R10, AX - MOVQ AX, 80(SP) - XORQ AX, SI - XORQ R9, R13 - ROLQ $0x08, R13 - MOVQ R12, AX - ANDQ R13, AX - XORQ R11, AX - MOVQ AX, 88(SP) - XORQ AX, BP - XORQ BX, R14 - ROLQ $0x12, R14 - NOTQ R13 - MOVQ R13, AX - ANDQ R14, AX - XORQ R12, AX - MOVQ AX, 96(SP) - MOVQ R14, AX - ORQ R10, AX - XORQ R13, AX - MOVQ AX, 104(SP) - ANDQ R11, R10 - XORQ R14, R10 - MOVQ R10, 112(SP) - XORQ R10, R15 - - // Result m - MOVQ 40(DI), R11 - XORQ BX, R11 - MOVQ 88(DI), R12 - ROLQ $0x24, R11 - XORQ CX, R12 - MOVQ 32(DI), R10 - ROLQ $0x0a, R12 - MOVQ R11, AX - MOVQ 136(DI), R13 - ANDQ R12, AX - XORQ R9, R10 - MOVQ 184(DI), R14 - ROLQ $0x1b, R10 - XORQ R10, AX - MOVQ AX, 120(SP) - XORQ AX, SI - XORQ DX, R13 - ROLQ $0x0f, R13 - MOVQ R12, AX - ORQ R13, AX - XORQ R11, AX - MOVQ AX, 128(SP) - XORQ AX, BP - XORQ R8, R14 - ROLQ $0x38, R14 - NOTQ R13 - MOVQ R13, AX - ORQ R14, AX - XORQ R12, AX - MOVQ AX, 136(SP) - ORQ R10, R11 - XORQ R14, R11 - MOVQ R11, 152(SP) - ANDQ R10, R14 - XORQ R13, R14 - MOVQ R14, 144(SP) - XORQ R11, R15 - - // Result s - MOVQ 16(DI), R10 - MOVQ 64(DI), R11 - MOVQ 112(DI), R12 - XORQ DX, R10 - MOVQ 120(DI), R13 - ROLQ $0x3e, R10 - XORQ R8, R11 - MOVQ 168(DI), R14 - ROLQ $0x37, R11 - XORQ R9, R12 - MOVQ R10, R9 - XORQ CX, R14 - ROLQ $0x02, R14 - ANDQ R11, R9 - XORQ R14, R9 - MOVQ R9, 192(SP) - ROLQ $0x27, R12 - XORQ R9, R15 - NOTQ R11 - XORQ BX, R13 - MOVQ R11, BX - ANDQ R12, BX - XORQ R10, BX - MOVQ BX, 160(SP) - XORQ BX, SI - ROLQ $0x29, R13 - MOVQ R12, CX - ORQ R13, CX - XORQ R11, CX - MOVQ CX, 168(SP) - XORQ CX, BP - MOVQ R13, DX - MOVQ R14, R8 - ANDQ R14, DX - ORQ R10, R8 - XORQ R12, DX - XORQ R13, R8 - MOVQ DX, 176(SP) - MOVQ R8, 184(SP) - - // Prepare round - MOVQ BP, BX - ROLQ $0x01, BX - MOVQ 16(SP), R12 - XORQ 56(SP), DX - XORQ R15, BX - XORQ 96(SP), R12 - XORQ 136(SP), DX - XORQ DX, R12 - MOVQ R12, CX - ROLQ $0x01, CX - MOVQ 24(SP), R13 - XORQ 64(SP), R8 - XORQ SI, CX - XORQ 104(SP), R13 - XORQ 144(SP), R8 - XORQ R8, R13 - MOVQ R13, DX - ROLQ $0x01, DX - MOVQ R15, R8 - XORQ BP, DX - ROLQ $0x01, R8 - MOVQ SI, R9 - XORQ R12, R8 - ROLQ $0x01, R9 - - // Result b - MOVQ (SP), R10 - MOVQ 48(SP), R11 - XORQ R13, R9 - MOVQ 96(SP), R12 - MOVQ 144(SP), R13 - MOVQ 192(SP), R14 - XORQ CX, R11 - ROLQ $0x2c, R11 - XORQ DX, R12 - XORQ BX, R10 - ROLQ $0x2b, R12 - MOVQ R11, SI - MOVQ $0x0000000080000001, AX - ORQ R12, SI - XORQ R10, AX - XORQ AX, SI - MOVQ SI, (DI) - XORQ R9, R14 - ROLQ $0x0e, R14 - MOVQ R10, R15 - ANDQ R11, R15 - XORQ R14, R15 - MOVQ R15, 32(DI) - XORQ R8, R13 - ROLQ $0x15, R13 - MOVQ R13, AX - ANDQ R14, AX - XORQ R12, AX - MOVQ AX, 16(DI) - NOTQ R12 - ORQ R10, R14 - ORQ R13, R12 - XORQ R13, R14 - XORQ R11, R12 - MOVQ R14, 24(DI) - MOVQ R12, 8(DI) - MOVQ R12, BP - - // Result g - MOVQ 72(SP), R11 - XORQ R9, R11 - MOVQ 80(SP), R12 - ROLQ $0x14, R11 - XORQ BX, R12 - ROLQ $0x03, R12 - MOVQ 24(SP), R10 - MOVQ R11, AX - ORQ R12, AX - XORQ R8, R10 - MOVQ 128(SP), R13 - MOVQ 176(SP), R14 - ROLQ $0x1c, R10 - XORQ R10, AX - MOVQ AX, 40(DI) - XORQ AX, SI - XORQ CX, R13 - ROLQ $0x2d, R13 - MOVQ R12, AX - ANDQ R13, AX - XORQ R11, AX - MOVQ AX, 48(DI) - XORQ AX, BP - XORQ DX, R14 - ROLQ $0x3d, R14 - MOVQ R14, AX - ORQ R10, AX - XORQ R13, AX - MOVQ AX, 64(DI) - ANDQ R11, R10 - XORQ R14, R10 - MOVQ R10, 72(DI) - NOTQ R14 - XORQ R10, R15 - ORQ R14, R13 - XORQ R12, R13 - MOVQ R13, 56(DI) - - // Result k - MOVQ 8(SP), R10 - MOVQ 56(SP), R11 - MOVQ 104(SP), R12 - MOVQ 152(SP), R13 - MOVQ 160(SP), R14 - XORQ DX, R11 - ROLQ $0x06, R11 - XORQ R8, R12 - ROLQ $0x19, R12 - MOVQ R11, AX - ORQ R12, AX - XORQ CX, R10 - ROLQ $0x01, R10 - XORQ R10, AX - MOVQ AX, 80(DI) - XORQ AX, SI - XORQ R9, R13 - ROLQ $0x08, R13 - MOVQ R12, AX - ANDQ R13, AX - XORQ R11, AX - MOVQ AX, 88(DI) - XORQ AX, BP - XORQ BX, R14 - ROLQ $0x12, R14 - NOTQ R13 - MOVQ R13, AX - ANDQ R14, AX - XORQ R12, AX - MOVQ AX, 96(DI) - MOVQ R14, AX - ORQ R10, AX - XORQ R13, AX - MOVQ AX, 104(DI) - ANDQ R11, R10 - XORQ R14, R10 - MOVQ R10, 112(DI) - XORQ R10, R15 - - // Result m - MOVQ 40(SP), R11 - XORQ BX, R11 - MOVQ 88(SP), R12 - ROLQ $0x24, R11 - XORQ CX, R12 - MOVQ 32(SP), R10 - ROLQ $0x0a, R12 - MOVQ R11, AX - MOVQ 136(SP), R13 - ANDQ R12, AX - XORQ R9, R10 - MOVQ 184(SP), R14 - ROLQ $0x1b, R10 - XORQ R10, AX - MOVQ AX, 120(DI) - XORQ AX, SI - XORQ DX, R13 - ROLQ $0x0f, R13 - MOVQ R12, AX - ORQ R13, AX - XORQ R11, AX - MOVQ AX, 128(DI) - XORQ AX, BP - XORQ R8, R14 - ROLQ $0x38, R14 - NOTQ R13 - MOVQ R13, AX - ORQ R14, AX - XORQ R12, AX - MOVQ AX, 136(DI) - ORQ R10, R11 - XORQ R14, R11 - MOVQ R11, 152(DI) - ANDQ R10, R14 - XORQ R13, R14 - MOVQ R14, 144(DI) - XORQ R11, R15 - - // Result s - MOVQ 16(SP), R10 - MOVQ 64(SP), R11 - MOVQ 112(SP), R12 - XORQ DX, R10 - MOVQ 120(SP), R13 - ROLQ $0x3e, R10 - XORQ R8, R11 - MOVQ 168(SP), R14 - ROLQ $0x37, R11 - XORQ R9, R12 - MOVQ R10, R9 - XORQ CX, R14 - ROLQ $0x02, R14 - ANDQ R11, R9 - XORQ R14, R9 - MOVQ R9, 192(DI) - ROLQ $0x27, R12 - XORQ R9, R15 - NOTQ R11 - XORQ BX, R13 - MOVQ R11, BX - ANDQ R12, BX - XORQ R10, BX - MOVQ BX, 160(DI) - XORQ BX, SI - ROLQ $0x29, R13 - MOVQ R12, CX - ORQ R13, CX - XORQ R11, CX - MOVQ CX, 168(DI) - XORQ CX, BP - MOVQ R13, DX - MOVQ R14, R8 - ANDQ R14, DX - ORQ R10, R8 - XORQ R12, DX - XORQ R13, R8 - MOVQ DX, 176(DI) - MOVQ R8, 184(DI) - - // Prepare round - MOVQ BP, BX - ROLQ $0x01, BX - MOVQ 16(DI), R12 - XORQ 56(DI), DX - XORQ R15, BX - XORQ 96(DI), R12 - XORQ 136(DI), DX - XORQ DX, R12 - MOVQ R12, CX - ROLQ $0x01, CX - MOVQ 24(DI), R13 - XORQ 64(DI), R8 - XORQ SI, CX - XORQ 104(DI), R13 - XORQ 144(DI), R8 - XORQ R8, R13 - MOVQ R13, DX - ROLQ $0x01, DX - MOVQ R15, R8 - XORQ BP, DX - ROLQ $0x01, R8 - MOVQ SI, R9 - XORQ R12, R8 - ROLQ $0x01, R9 - - // Result b - MOVQ (DI), R10 - MOVQ 48(DI), R11 - XORQ R13, R9 - MOVQ 96(DI), R12 - MOVQ 144(DI), R13 - MOVQ 192(DI), R14 - XORQ CX, R11 - ROLQ $0x2c, R11 - XORQ DX, R12 - XORQ BX, R10 - ROLQ $0x2b, R12 - MOVQ R11, SI - MOVQ $0x8000000080008081, AX - ORQ R12, SI - XORQ R10, AX - XORQ AX, SI - MOVQ SI, (SP) - XORQ R9, R14 - ROLQ $0x0e, R14 - MOVQ R10, R15 - ANDQ R11, R15 - XORQ R14, R15 - MOVQ R15, 32(SP) - XORQ R8, R13 - ROLQ $0x15, R13 - MOVQ R13, AX - ANDQ R14, AX - XORQ R12, AX - MOVQ AX, 16(SP) - NOTQ R12 - ORQ R10, R14 - ORQ R13, R12 - XORQ R13, R14 - XORQ R11, R12 - MOVQ R14, 24(SP) - MOVQ R12, 8(SP) - MOVQ R12, BP - - // Result g - MOVQ 72(DI), R11 - XORQ R9, R11 - MOVQ 80(DI), R12 - ROLQ $0x14, R11 - XORQ BX, R12 - ROLQ $0x03, R12 - MOVQ 24(DI), R10 - MOVQ R11, AX - ORQ R12, AX - XORQ R8, R10 - MOVQ 128(DI), R13 - MOVQ 176(DI), R14 - ROLQ $0x1c, R10 - XORQ R10, AX - MOVQ AX, 40(SP) - XORQ AX, SI - XORQ CX, R13 - ROLQ $0x2d, R13 - MOVQ R12, AX - ANDQ R13, AX - XORQ R11, AX - MOVQ AX, 48(SP) - XORQ AX, BP - XORQ DX, R14 - ROLQ $0x3d, R14 - MOVQ R14, AX - ORQ R10, AX - XORQ R13, AX - MOVQ AX, 64(SP) - ANDQ R11, R10 - XORQ R14, R10 - MOVQ R10, 72(SP) - NOTQ R14 - XORQ R10, R15 - ORQ R14, R13 - XORQ R12, R13 - MOVQ R13, 56(SP) - - // Result k - MOVQ 8(DI), R10 - MOVQ 56(DI), R11 - MOVQ 104(DI), R12 - MOVQ 152(DI), R13 - MOVQ 160(DI), R14 - XORQ DX, R11 - ROLQ $0x06, R11 - XORQ R8, R12 - ROLQ $0x19, R12 - MOVQ R11, AX - ORQ R12, AX - XORQ CX, R10 - ROLQ $0x01, R10 - XORQ R10, AX - MOVQ AX, 80(SP) - XORQ AX, SI - XORQ R9, R13 - ROLQ $0x08, R13 - MOVQ R12, AX - ANDQ R13, AX - XORQ R11, AX - MOVQ AX, 88(SP) - XORQ AX, BP - XORQ BX, R14 - ROLQ $0x12, R14 - NOTQ R13 - MOVQ R13, AX - ANDQ R14, AX - XORQ R12, AX - MOVQ AX, 96(SP) - MOVQ R14, AX - ORQ R10, AX - XORQ R13, AX - MOVQ AX, 104(SP) - ANDQ R11, R10 - XORQ R14, R10 - MOVQ R10, 112(SP) - XORQ R10, R15 - - // Result m - MOVQ 40(DI), R11 - XORQ BX, R11 - MOVQ 88(DI), R12 - ROLQ $0x24, R11 - XORQ CX, R12 - MOVQ 32(DI), R10 - ROLQ $0x0a, R12 - MOVQ R11, AX - MOVQ 136(DI), R13 - ANDQ R12, AX - XORQ R9, R10 - MOVQ 184(DI), R14 - ROLQ $0x1b, R10 - XORQ R10, AX - MOVQ AX, 120(SP) - XORQ AX, SI - XORQ DX, R13 - ROLQ $0x0f, R13 - MOVQ R12, AX - ORQ R13, AX - XORQ R11, AX - MOVQ AX, 128(SP) - XORQ AX, BP - XORQ R8, R14 - ROLQ $0x38, R14 - NOTQ R13 - MOVQ R13, AX - ORQ R14, AX - XORQ R12, AX - MOVQ AX, 136(SP) - ORQ R10, R11 - XORQ R14, R11 - MOVQ R11, 152(SP) - ANDQ R10, R14 - XORQ R13, R14 - MOVQ R14, 144(SP) - XORQ R11, R15 - - // Result s - MOVQ 16(DI), R10 - MOVQ 64(DI), R11 - MOVQ 112(DI), R12 - XORQ DX, R10 - MOVQ 120(DI), R13 - ROLQ $0x3e, R10 - XORQ R8, R11 - MOVQ 168(DI), R14 - ROLQ $0x37, R11 - XORQ R9, R12 - MOVQ R10, R9 - XORQ CX, R14 - ROLQ $0x02, R14 - ANDQ R11, R9 - XORQ R14, R9 - MOVQ R9, 192(SP) - ROLQ $0x27, R12 - XORQ R9, R15 - NOTQ R11 - XORQ BX, R13 - MOVQ R11, BX - ANDQ R12, BX - XORQ R10, BX - MOVQ BX, 160(SP) - XORQ BX, SI - ROLQ $0x29, R13 - MOVQ R12, CX - ORQ R13, CX - XORQ R11, CX - MOVQ CX, 168(SP) - XORQ CX, BP - MOVQ R13, DX - MOVQ R14, R8 - ANDQ R14, DX - ORQ R10, R8 - XORQ R12, DX - XORQ R13, R8 - MOVQ DX, 176(SP) - MOVQ R8, 184(SP) - - // Prepare round - MOVQ BP, BX - ROLQ $0x01, BX - MOVQ 16(SP), R12 - XORQ 56(SP), DX - XORQ R15, BX - XORQ 96(SP), R12 - XORQ 136(SP), DX - XORQ DX, R12 - MOVQ R12, CX - ROLQ $0x01, CX - MOVQ 24(SP), R13 - XORQ 64(SP), R8 - XORQ SI, CX - XORQ 104(SP), R13 - XORQ 144(SP), R8 - XORQ R8, R13 - MOVQ R13, DX - ROLQ $0x01, DX - MOVQ R15, R8 - XORQ BP, DX - ROLQ $0x01, R8 - MOVQ SI, R9 - XORQ R12, R8 - ROLQ $0x01, R9 - - // Result b - MOVQ (SP), R10 - MOVQ 48(SP), R11 - XORQ R13, R9 - MOVQ 96(SP), R12 - MOVQ 144(SP), R13 - MOVQ 192(SP), R14 - XORQ CX, R11 - ROLQ $0x2c, R11 - XORQ DX, R12 - XORQ BX, R10 - ROLQ $0x2b, R12 - MOVQ R11, SI - MOVQ $0x8000000000008009, AX - ORQ R12, SI - XORQ R10, AX - XORQ AX, SI - MOVQ SI, (DI) - XORQ R9, R14 - ROLQ $0x0e, R14 - MOVQ R10, R15 - ANDQ R11, R15 - XORQ R14, R15 - MOVQ R15, 32(DI) - XORQ R8, R13 - ROLQ $0x15, R13 - MOVQ R13, AX - ANDQ R14, AX - XORQ R12, AX - MOVQ AX, 16(DI) - NOTQ R12 - ORQ R10, R14 - ORQ R13, R12 - XORQ R13, R14 - XORQ R11, R12 - MOVQ R14, 24(DI) - MOVQ R12, 8(DI) - MOVQ R12, BP - - // Result g - MOVQ 72(SP), R11 - XORQ R9, R11 - MOVQ 80(SP), R12 - ROLQ $0x14, R11 - XORQ BX, R12 - ROLQ $0x03, R12 - MOVQ 24(SP), R10 - MOVQ R11, AX - ORQ R12, AX - XORQ R8, R10 - MOVQ 128(SP), R13 - MOVQ 176(SP), R14 - ROLQ $0x1c, R10 - XORQ R10, AX - MOVQ AX, 40(DI) - XORQ AX, SI - XORQ CX, R13 - ROLQ $0x2d, R13 - MOVQ R12, AX - ANDQ R13, AX - XORQ R11, AX - MOVQ AX, 48(DI) - XORQ AX, BP - XORQ DX, R14 - ROLQ $0x3d, R14 - MOVQ R14, AX - ORQ R10, AX - XORQ R13, AX - MOVQ AX, 64(DI) - ANDQ R11, R10 - XORQ R14, R10 - MOVQ R10, 72(DI) - NOTQ R14 - XORQ R10, R15 - ORQ R14, R13 - XORQ R12, R13 - MOVQ R13, 56(DI) - - // Result k - MOVQ 8(SP), R10 - MOVQ 56(SP), R11 - MOVQ 104(SP), R12 - MOVQ 152(SP), R13 - MOVQ 160(SP), R14 - XORQ DX, R11 - ROLQ $0x06, R11 - XORQ R8, R12 - ROLQ $0x19, R12 - MOVQ R11, AX - ORQ R12, AX - XORQ CX, R10 - ROLQ $0x01, R10 - XORQ R10, AX - MOVQ AX, 80(DI) - XORQ AX, SI - XORQ R9, R13 - ROLQ $0x08, R13 - MOVQ R12, AX - ANDQ R13, AX - XORQ R11, AX - MOVQ AX, 88(DI) - XORQ AX, BP - XORQ BX, R14 - ROLQ $0x12, R14 - NOTQ R13 - MOVQ R13, AX - ANDQ R14, AX - XORQ R12, AX - MOVQ AX, 96(DI) - MOVQ R14, AX - ORQ R10, AX - XORQ R13, AX - MOVQ AX, 104(DI) - ANDQ R11, R10 - XORQ R14, R10 - MOVQ R10, 112(DI) - XORQ R10, R15 - - // Result m - MOVQ 40(SP), R11 - XORQ BX, R11 - MOVQ 88(SP), R12 - ROLQ $0x24, R11 - XORQ CX, R12 - MOVQ 32(SP), R10 - ROLQ $0x0a, R12 - MOVQ R11, AX - MOVQ 136(SP), R13 - ANDQ R12, AX - XORQ R9, R10 - MOVQ 184(SP), R14 - ROLQ $0x1b, R10 - XORQ R10, AX - MOVQ AX, 120(DI) - XORQ AX, SI - XORQ DX, R13 - ROLQ $0x0f, R13 - MOVQ R12, AX - ORQ R13, AX - XORQ R11, AX - MOVQ AX, 128(DI) - XORQ AX, BP - XORQ R8, R14 - ROLQ $0x38, R14 - NOTQ R13 - MOVQ R13, AX - ORQ R14, AX - XORQ R12, AX - MOVQ AX, 136(DI) - ORQ R10, R11 - XORQ R14, R11 - MOVQ R11, 152(DI) - ANDQ R10, R14 - XORQ R13, R14 - MOVQ R14, 144(DI) - XORQ R11, R15 - - // Result s - MOVQ 16(SP), R10 - MOVQ 64(SP), R11 - MOVQ 112(SP), R12 - XORQ DX, R10 - MOVQ 120(SP), R13 - ROLQ $0x3e, R10 - XORQ R8, R11 - MOVQ 168(SP), R14 - ROLQ $0x37, R11 - XORQ R9, R12 - MOVQ R10, R9 - XORQ CX, R14 - ROLQ $0x02, R14 - ANDQ R11, R9 - XORQ R14, R9 - MOVQ R9, 192(DI) - ROLQ $0x27, R12 - XORQ R9, R15 - NOTQ R11 - XORQ BX, R13 - MOVQ R11, BX - ANDQ R12, BX - XORQ R10, BX - MOVQ BX, 160(DI) - XORQ BX, SI - ROLQ $0x29, R13 - MOVQ R12, CX - ORQ R13, CX - XORQ R11, CX - MOVQ CX, 168(DI) - XORQ CX, BP - MOVQ R13, DX - MOVQ R14, R8 - ANDQ R14, DX - ORQ R10, R8 - XORQ R12, DX - XORQ R13, R8 - MOVQ DX, 176(DI) - MOVQ R8, 184(DI) - - // Prepare round - MOVQ BP, BX - ROLQ $0x01, BX - MOVQ 16(DI), R12 - XORQ 56(DI), DX - XORQ R15, BX - XORQ 96(DI), R12 - XORQ 136(DI), DX - XORQ DX, R12 - MOVQ R12, CX - ROLQ $0x01, CX - MOVQ 24(DI), R13 - XORQ 64(DI), R8 - XORQ SI, CX - XORQ 104(DI), R13 - XORQ 144(DI), R8 - XORQ R8, R13 - MOVQ R13, DX - ROLQ $0x01, DX - MOVQ R15, R8 - XORQ BP, DX - ROLQ $0x01, R8 - MOVQ SI, R9 - XORQ R12, R8 - ROLQ $0x01, R9 - - // Result b - MOVQ (DI), R10 - MOVQ 48(DI), R11 - XORQ R13, R9 - MOVQ 96(DI), R12 - MOVQ 144(DI), R13 - MOVQ 192(DI), R14 - XORQ CX, R11 - ROLQ $0x2c, R11 - XORQ DX, R12 - XORQ BX, R10 - ROLQ $0x2b, R12 - MOVQ R11, SI - MOVQ $0x000000000000008a, AX - ORQ R12, SI - XORQ R10, AX - XORQ AX, SI - MOVQ SI, (SP) - XORQ R9, R14 - ROLQ $0x0e, R14 - MOVQ R10, R15 - ANDQ R11, R15 - XORQ R14, R15 - MOVQ R15, 32(SP) - XORQ R8, R13 - ROLQ $0x15, R13 - MOVQ R13, AX - ANDQ R14, AX - XORQ R12, AX - MOVQ AX, 16(SP) - NOTQ R12 - ORQ R10, R14 - ORQ R13, R12 - XORQ R13, R14 - XORQ R11, R12 - MOVQ R14, 24(SP) - MOVQ R12, 8(SP) - MOVQ R12, BP - - // Result g - MOVQ 72(DI), R11 - XORQ R9, R11 - MOVQ 80(DI), R12 - ROLQ $0x14, R11 - XORQ BX, R12 - ROLQ $0x03, R12 - MOVQ 24(DI), R10 - MOVQ R11, AX - ORQ R12, AX - XORQ R8, R10 - MOVQ 128(DI), R13 - MOVQ 176(DI), R14 - ROLQ $0x1c, R10 - XORQ R10, AX - MOVQ AX, 40(SP) - XORQ AX, SI - XORQ CX, R13 - ROLQ $0x2d, R13 - MOVQ R12, AX - ANDQ R13, AX - XORQ R11, AX - MOVQ AX, 48(SP) - XORQ AX, BP - XORQ DX, R14 - ROLQ $0x3d, R14 - MOVQ R14, AX - ORQ R10, AX - XORQ R13, AX - MOVQ AX, 64(SP) - ANDQ R11, R10 - XORQ R14, R10 - MOVQ R10, 72(SP) - NOTQ R14 - XORQ R10, R15 - ORQ R14, R13 - XORQ R12, R13 - MOVQ R13, 56(SP) - - // Result k - MOVQ 8(DI), R10 - MOVQ 56(DI), R11 - MOVQ 104(DI), R12 - MOVQ 152(DI), R13 - MOVQ 160(DI), R14 - XORQ DX, R11 - ROLQ $0x06, R11 - XORQ R8, R12 - ROLQ $0x19, R12 - MOVQ R11, AX - ORQ R12, AX - XORQ CX, R10 - ROLQ $0x01, R10 - XORQ R10, AX - MOVQ AX, 80(SP) - XORQ AX, SI - XORQ R9, R13 - ROLQ $0x08, R13 - MOVQ R12, AX - ANDQ R13, AX - XORQ R11, AX - MOVQ AX, 88(SP) - XORQ AX, BP - XORQ BX, R14 - ROLQ $0x12, R14 - NOTQ R13 - MOVQ R13, AX - ANDQ R14, AX - XORQ R12, AX - MOVQ AX, 96(SP) - MOVQ R14, AX - ORQ R10, AX - XORQ R13, AX - MOVQ AX, 104(SP) - ANDQ R11, R10 - XORQ R14, R10 - MOVQ R10, 112(SP) - XORQ R10, R15 - - // Result m - MOVQ 40(DI), R11 - XORQ BX, R11 - MOVQ 88(DI), R12 - ROLQ $0x24, R11 - XORQ CX, R12 - MOVQ 32(DI), R10 - ROLQ $0x0a, R12 - MOVQ R11, AX - MOVQ 136(DI), R13 - ANDQ R12, AX - XORQ R9, R10 - MOVQ 184(DI), R14 - ROLQ $0x1b, R10 - XORQ R10, AX - MOVQ AX, 120(SP) - XORQ AX, SI - XORQ DX, R13 - ROLQ $0x0f, R13 - MOVQ R12, AX - ORQ R13, AX - XORQ R11, AX - MOVQ AX, 128(SP) - XORQ AX, BP - XORQ R8, R14 - ROLQ $0x38, R14 - NOTQ R13 - MOVQ R13, AX - ORQ R14, AX - XORQ R12, AX - MOVQ AX, 136(SP) - ORQ R10, R11 - XORQ R14, R11 - MOVQ R11, 152(SP) - ANDQ R10, R14 - XORQ R13, R14 - MOVQ R14, 144(SP) - XORQ R11, R15 - - // Result s - MOVQ 16(DI), R10 - MOVQ 64(DI), R11 - MOVQ 112(DI), R12 - XORQ DX, R10 - MOVQ 120(DI), R13 - ROLQ $0x3e, R10 - XORQ R8, R11 - MOVQ 168(DI), R14 - ROLQ $0x37, R11 - XORQ R9, R12 - MOVQ R10, R9 - XORQ CX, R14 - ROLQ $0x02, R14 - ANDQ R11, R9 - XORQ R14, R9 - MOVQ R9, 192(SP) - ROLQ $0x27, R12 - XORQ R9, R15 - NOTQ R11 - XORQ BX, R13 - MOVQ R11, BX - ANDQ R12, BX - XORQ R10, BX - MOVQ BX, 160(SP) - XORQ BX, SI - ROLQ $0x29, R13 - MOVQ R12, CX - ORQ R13, CX - XORQ R11, CX - MOVQ CX, 168(SP) - XORQ CX, BP - MOVQ R13, DX - MOVQ R14, R8 - ANDQ R14, DX - ORQ R10, R8 - XORQ R12, DX - XORQ R13, R8 - MOVQ DX, 176(SP) - MOVQ R8, 184(SP) - - // Prepare round - MOVQ BP, BX - ROLQ $0x01, BX - MOVQ 16(SP), R12 - XORQ 56(SP), DX - XORQ R15, BX - XORQ 96(SP), R12 - XORQ 136(SP), DX - XORQ DX, R12 - MOVQ R12, CX - ROLQ $0x01, CX - MOVQ 24(SP), R13 - XORQ 64(SP), R8 - XORQ SI, CX - XORQ 104(SP), R13 - XORQ 144(SP), R8 - XORQ R8, R13 - MOVQ R13, DX - ROLQ $0x01, DX - MOVQ R15, R8 - XORQ BP, DX - ROLQ $0x01, R8 - MOVQ SI, R9 - XORQ R12, R8 - ROLQ $0x01, R9 - - // Result b - MOVQ (SP), R10 - MOVQ 48(SP), R11 - XORQ R13, R9 - MOVQ 96(SP), R12 - MOVQ 144(SP), R13 - MOVQ 192(SP), R14 - XORQ CX, R11 - ROLQ $0x2c, R11 - XORQ DX, R12 - XORQ BX, R10 - ROLQ $0x2b, R12 - MOVQ R11, SI - MOVQ $0x0000000000000088, AX - ORQ R12, SI - XORQ R10, AX - XORQ AX, SI - MOVQ SI, (DI) - XORQ R9, R14 - ROLQ $0x0e, R14 - MOVQ R10, R15 - ANDQ R11, R15 - XORQ R14, R15 - MOVQ R15, 32(DI) - XORQ R8, R13 - ROLQ $0x15, R13 - MOVQ R13, AX - ANDQ R14, AX - XORQ R12, AX - MOVQ AX, 16(DI) - NOTQ R12 - ORQ R10, R14 - ORQ R13, R12 - XORQ R13, R14 - XORQ R11, R12 - MOVQ R14, 24(DI) - MOVQ R12, 8(DI) - MOVQ R12, BP - - // Result g - MOVQ 72(SP), R11 - XORQ R9, R11 - MOVQ 80(SP), R12 - ROLQ $0x14, R11 - XORQ BX, R12 - ROLQ $0x03, R12 - MOVQ 24(SP), R10 - MOVQ R11, AX - ORQ R12, AX - XORQ R8, R10 - MOVQ 128(SP), R13 - MOVQ 176(SP), R14 - ROLQ $0x1c, R10 - XORQ R10, AX - MOVQ AX, 40(DI) - XORQ AX, SI - XORQ CX, R13 - ROLQ $0x2d, R13 - MOVQ R12, AX - ANDQ R13, AX - XORQ R11, AX - MOVQ AX, 48(DI) - XORQ AX, BP - XORQ DX, R14 - ROLQ $0x3d, R14 - MOVQ R14, AX - ORQ R10, AX - XORQ R13, AX - MOVQ AX, 64(DI) - ANDQ R11, R10 - XORQ R14, R10 - MOVQ R10, 72(DI) - NOTQ R14 - XORQ R10, R15 - ORQ R14, R13 - XORQ R12, R13 - MOVQ R13, 56(DI) - - // Result k - MOVQ 8(SP), R10 - MOVQ 56(SP), R11 - MOVQ 104(SP), R12 - MOVQ 152(SP), R13 - MOVQ 160(SP), R14 - XORQ DX, R11 - ROLQ $0x06, R11 - XORQ R8, R12 - ROLQ $0x19, R12 - MOVQ R11, AX - ORQ R12, AX - XORQ CX, R10 - ROLQ $0x01, R10 - XORQ R10, AX - MOVQ AX, 80(DI) - XORQ AX, SI - XORQ R9, R13 - ROLQ $0x08, R13 - MOVQ R12, AX - ANDQ R13, AX - XORQ R11, AX - MOVQ AX, 88(DI) - XORQ AX, BP - XORQ BX, R14 - ROLQ $0x12, R14 - NOTQ R13 - MOVQ R13, AX - ANDQ R14, AX - XORQ R12, AX - MOVQ AX, 96(DI) - MOVQ R14, AX - ORQ R10, AX - XORQ R13, AX - MOVQ AX, 104(DI) - ANDQ R11, R10 - XORQ R14, R10 - MOVQ R10, 112(DI) - XORQ R10, R15 - - // Result m - MOVQ 40(SP), R11 - XORQ BX, R11 - MOVQ 88(SP), R12 - ROLQ $0x24, R11 - XORQ CX, R12 - MOVQ 32(SP), R10 - ROLQ $0x0a, R12 - MOVQ R11, AX - MOVQ 136(SP), R13 - ANDQ R12, AX - XORQ R9, R10 - MOVQ 184(SP), R14 - ROLQ $0x1b, R10 - XORQ R10, AX - MOVQ AX, 120(DI) - XORQ AX, SI - XORQ DX, R13 - ROLQ $0x0f, R13 - MOVQ R12, AX - ORQ R13, AX - XORQ R11, AX - MOVQ AX, 128(DI) - XORQ AX, BP - XORQ R8, R14 - ROLQ $0x38, R14 - NOTQ R13 - MOVQ R13, AX - ORQ R14, AX - XORQ R12, AX - MOVQ AX, 136(DI) - ORQ R10, R11 - XORQ R14, R11 - MOVQ R11, 152(DI) - ANDQ R10, R14 - XORQ R13, R14 - MOVQ R14, 144(DI) - XORQ R11, R15 - - // Result s - MOVQ 16(SP), R10 - MOVQ 64(SP), R11 - MOVQ 112(SP), R12 - XORQ DX, R10 - MOVQ 120(SP), R13 - ROLQ $0x3e, R10 - XORQ R8, R11 - MOVQ 168(SP), R14 - ROLQ $0x37, R11 - XORQ R9, R12 - MOVQ R10, R9 - XORQ CX, R14 - ROLQ $0x02, R14 - ANDQ R11, R9 - XORQ R14, R9 - MOVQ R9, 192(DI) - ROLQ $0x27, R12 - XORQ R9, R15 - NOTQ R11 - XORQ BX, R13 - MOVQ R11, BX - ANDQ R12, BX - XORQ R10, BX - MOVQ BX, 160(DI) - XORQ BX, SI - ROLQ $0x29, R13 - MOVQ R12, CX - ORQ R13, CX - XORQ R11, CX - MOVQ CX, 168(DI) - XORQ CX, BP - MOVQ R13, DX - MOVQ R14, R8 - ANDQ R14, DX - ORQ R10, R8 - XORQ R12, DX - XORQ R13, R8 - MOVQ DX, 176(DI) - MOVQ R8, 184(DI) - - // Prepare round - MOVQ BP, BX - ROLQ $0x01, BX - MOVQ 16(DI), R12 - XORQ 56(DI), DX - XORQ R15, BX - XORQ 96(DI), R12 - XORQ 136(DI), DX - XORQ DX, R12 - MOVQ R12, CX - ROLQ $0x01, CX - MOVQ 24(DI), R13 - XORQ 64(DI), R8 - XORQ SI, CX - XORQ 104(DI), R13 - XORQ 144(DI), R8 - XORQ R8, R13 - MOVQ R13, DX - ROLQ $0x01, DX - MOVQ R15, R8 - XORQ BP, DX - ROLQ $0x01, R8 - MOVQ SI, R9 - XORQ R12, R8 - ROLQ $0x01, R9 - - // Result b - MOVQ (DI), R10 - MOVQ 48(DI), R11 - XORQ R13, R9 - MOVQ 96(DI), R12 - MOVQ 144(DI), R13 - MOVQ 192(DI), R14 - XORQ CX, R11 - ROLQ $0x2c, R11 - XORQ DX, R12 - XORQ BX, R10 - ROLQ $0x2b, R12 - MOVQ R11, SI - MOVQ $0x0000000080008009, AX - ORQ R12, SI - XORQ R10, AX - XORQ AX, SI - MOVQ SI, (SP) - XORQ R9, R14 - ROLQ $0x0e, R14 - MOVQ R10, R15 - ANDQ R11, R15 - XORQ R14, R15 - MOVQ R15, 32(SP) - XORQ R8, R13 - ROLQ $0x15, R13 - MOVQ R13, AX - ANDQ R14, AX - XORQ R12, AX - MOVQ AX, 16(SP) - NOTQ R12 - ORQ R10, R14 - ORQ R13, R12 - XORQ R13, R14 - XORQ R11, R12 - MOVQ R14, 24(SP) - MOVQ R12, 8(SP) - MOVQ R12, BP - - // Result g - MOVQ 72(DI), R11 - XORQ R9, R11 - MOVQ 80(DI), R12 - ROLQ $0x14, R11 - XORQ BX, R12 - ROLQ $0x03, R12 - MOVQ 24(DI), R10 - MOVQ R11, AX - ORQ R12, AX - XORQ R8, R10 - MOVQ 128(DI), R13 - MOVQ 176(DI), R14 - ROLQ $0x1c, R10 - XORQ R10, AX - MOVQ AX, 40(SP) - XORQ AX, SI - XORQ CX, R13 - ROLQ $0x2d, R13 - MOVQ R12, AX - ANDQ R13, AX - XORQ R11, AX - MOVQ AX, 48(SP) - XORQ AX, BP - XORQ DX, R14 - ROLQ $0x3d, R14 - MOVQ R14, AX - ORQ R10, AX - XORQ R13, AX - MOVQ AX, 64(SP) - ANDQ R11, R10 - XORQ R14, R10 - MOVQ R10, 72(SP) - NOTQ R14 - XORQ R10, R15 - ORQ R14, R13 - XORQ R12, R13 - MOVQ R13, 56(SP) - - // Result k - MOVQ 8(DI), R10 - MOVQ 56(DI), R11 - MOVQ 104(DI), R12 - MOVQ 152(DI), R13 - MOVQ 160(DI), R14 - XORQ DX, R11 - ROLQ $0x06, R11 - XORQ R8, R12 - ROLQ $0x19, R12 - MOVQ R11, AX - ORQ R12, AX - XORQ CX, R10 - ROLQ $0x01, R10 - XORQ R10, AX - MOVQ AX, 80(SP) - XORQ AX, SI - XORQ R9, R13 - ROLQ $0x08, R13 - MOVQ R12, AX - ANDQ R13, AX - XORQ R11, AX - MOVQ AX, 88(SP) - XORQ AX, BP - XORQ BX, R14 - ROLQ $0x12, R14 - NOTQ R13 - MOVQ R13, AX - ANDQ R14, AX - XORQ R12, AX - MOVQ AX, 96(SP) - MOVQ R14, AX - ORQ R10, AX - XORQ R13, AX - MOVQ AX, 104(SP) - ANDQ R11, R10 - XORQ R14, R10 - MOVQ R10, 112(SP) - XORQ R10, R15 - - // Result m - MOVQ 40(DI), R11 - XORQ BX, R11 - MOVQ 88(DI), R12 - ROLQ $0x24, R11 - XORQ CX, R12 - MOVQ 32(DI), R10 - ROLQ $0x0a, R12 - MOVQ R11, AX - MOVQ 136(DI), R13 - ANDQ R12, AX - XORQ R9, R10 - MOVQ 184(DI), R14 - ROLQ $0x1b, R10 - XORQ R10, AX - MOVQ AX, 120(SP) - XORQ AX, SI - XORQ DX, R13 - ROLQ $0x0f, R13 - MOVQ R12, AX - ORQ R13, AX - XORQ R11, AX - MOVQ AX, 128(SP) - XORQ AX, BP - XORQ R8, R14 - ROLQ $0x38, R14 - NOTQ R13 - MOVQ R13, AX - ORQ R14, AX - XORQ R12, AX - MOVQ AX, 136(SP) - ORQ R10, R11 - XORQ R14, R11 - MOVQ R11, 152(SP) - ANDQ R10, R14 - XORQ R13, R14 - MOVQ R14, 144(SP) - XORQ R11, R15 - - // Result s - MOVQ 16(DI), R10 - MOVQ 64(DI), R11 - MOVQ 112(DI), R12 - XORQ DX, R10 - MOVQ 120(DI), R13 - ROLQ $0x3e, R10 - XORQ R8, R11 - MOVQ 168(DI), R14 - ROLQ $0x37, R11 - XORQ R9, R12 - MOVQ R10, R9 - XORQ CX, R14 - ROLQ $0x02, R14 - ANDQ R11, R9 - XORQ R14, R9 - MOVQ R9, 192(SP) - ROLQ $0x27, R12 - XORQ R9, R15 - NOTQ R11 - XORQ BX, R13 - MOVQ R11, BX - ANDQ R12, BX - XORQ R10, BX - MOVQ BX, 160(SP) - XORQ BX, SI - ROLQ $0x29, R13 - MOVQ R12, CX - ORQ R13, CX - XORQ R11, CX - MOVQ CX, 168(SP) - XORQ CX, BP - MOVQ R13, DX - MOVQ R14, R8 - ANDQ R14, DX - ORQ R10, R8 - XORQ R12, DX - XORQ R13, R8 - MOVQ DX, 176(SP) - MOVQ R8, 184(SP) - - // Prepare round - MOVQ BP, BX - ROLQ $0x01, BX - MOVQ 16(SP), R12 - XORQ 56(SP), DX - XORQ R15, BX - XORQ 96(SP), R12 - XORQ 136(SP), DX - XORQ DX, R12 - MOVQ R12, CX - ROLQ $0x01, CX - MOVQ 24(SP), R13 - XORQ 64(SP), R8 - XORQ SI, CX - XORQ 104(SP), R13 - XORQ 144(SP), R8 - XORQ R8, R13 - MOVQ R13, DX - ROLQ $0x01, DX - MOVQ R15, R8 - XORQ BP, DX - ROLQ $0x01, R8 - MOVQ SI, R9 - XORQ R12, R8 - ROLQ $0x01, R9 - - // Result b - MOVQ (SP), R10 - MOVQ 48(SP), R11 - XORQ R13, R9 - MOVQ 96(SP), R12 - MOVQ 144(SP), R13 - MOVQ 192(SP), R14 - XORQ CX, R11 - ROLQ $0x2c, R11 - XORQ DX, R12 - XORQ BX, R10 - ROLQ $0x2b, R12 - MOVQ R11, SI - MOVQ $0x000000008000000a, AX - ORQ R12, SI - XORQ R10, AX - XORQ AX, SI - MOVQ SI, (DI) - XORQ R9, R14 - ROLQ $0x0e, R14 - MOVQ R10, R15 - ANDQ R11, R15 - XORQ R14, R15 - MOVQ R15, 32(DI) - XORQ R8, R13 - ROLQ $0x15, R13 - MOVQ R13, AX - ANDQ R14, AX - XORQ R12, AX - MOVQ AX, 16(DI) - NOTQ R12 - ORQ R10, R14 - ORQ R13, R12 - XORQ R13, R14 - XORQ R11, R12 - MOVQ R14, 24(DI) - MOVQ R12, 8(DI) - MOVQ R12, BP - - // Result g - MOVQ 72(SP), R11 - XORQ R9, R11 - MOVQ 80(SP), R12 - ROLQ $0x14, R11 - XORQ BX, R12 - ROLQ $0x03, R12 - MOVQ 24(SP), R10 - MOVQ R11, AX - ORQ R12, AX - XORQ R8, R10 - MOVQ 128(SP), R13 - MOVQ 176(SP), R14 - ROLQ $0x1c, R10 - XORQ R10, AX - MOVQ AX, 40(DI) - XORQ AX, SI - XORQ CX, R13 - ROLQ $0x2d, R13 - MOVQ R12, AX - ANDQ R13, AX - XORQ R11, AX - MOVQ AX, 48(DI) - XORQ AX, BP - XORQ DX, R14 - ROLQ $0x3d, R14 - MOVQ R14, AX - ORQ R10, AX - XORQ R13, AX - MOVQ AX, 64(DI) - ANDQ R11, R10 - XORQ R14, R10 - MOVQ R10, 72(DI) - NOTQ R14 - XORQ R10, R15 - ORQ R14, R13 - XORQ R12, R13 - MOVQ R13, 56(DI) - - // Result k - MOVQ 8(SP), R10 - MOVQ 56(SP), R11 - MOVQ 104(SP), R12 - MOVQ 152(SP), R13 - MOVQ 160(SP), R14 - XORQ DX, R11 - ROLQ $0x06, R11 - XORQ R8, R12 - ROLQ $0x19, R12 - MOVQ R11, AX - ORQ R12, AX - XORQ CX, R10 - ROLQ $0x01, R10 - XORQ R10, AX - MOVQ AX, 80(DI) - XORQ AX, SI - XORQ R9, R13 - ROLQ $0x08, R13 - MOVQ R12, AX - ANDQ R13, AX - XORQ R11, AX - MOVQ AX, 88(DI) - XORQ AX, BP - XORQ BX, R14 - ROLQ $0x12, R14 - NOTQ R13 - MOVQ R13, AX - ANDQ R14, AX - XORQ R12, AX - MOVQ AX, 96(DI) - MOVQ R14, AX - ORQ R10, AX - XORQ R13, AX - MOVQ AX, 104(DI) - ANDQ R11, R10 - XORQ R14, R10 - MOVQ R10, 112(DI) - XORQ R10, R15 - - // Result m - MOVQ 40(SP), R11 - XORQ BX, R11 - MOVQ 88(SP), R12 - ROLQ $0x24, R11 - XORQ CX, R12 - MOVQ 32(SP), R10 - ROLQ $0x0a, R12 - MOVQ R11, AX - MOVQ 136(SP), R13 - ANDQ R12, AX - XORQ R9, R10 - MOVQ 184(SP), R14 - ROLQ $0x1b, R10 - XORQ R10, AX - MOVQ AX, 120(DI) - XORQ AX, SI - XORQ DX, R13 - ROLQ $0x0f, R13 - MOVQ R12, AX - ORQ R13, AX - XORQ R11, AX - MOVQ AX, 128(DI) - XORQ AX, BP - XORQ R8, R14 - ROLQ $0x38, R14 - NOTQ R13 - MOVQ R13, AX - ORQ R14, AX - XORQ R12, AX - MOVQ AX, 136(DI) - ORQ R10, R11 - XORQ R14, R11 - MOVQ R11, 152(DI) - ANDQ R10, R14 - XORQ R13, R14 - MOVQ R14, 144(DI) - XORQ R11, R15 - - // Result s - MOVQ 16(SP), R10 - MOVQ 64(SP), R11 - MOVQ 112(SP), R12 - XORQ DX, R10 - MOVQ 120(SP), R13 - ROLQ $0x3e, R10 - XORQ R8, R11 - MOVQ 168(SP), R14 - ROLQ $0x37, R11 - XORQ R9, R12 - MOVQ R10, R9 - XORQ CX, R14 - ROLQ $0x02, R14 - ANDQ R11, R9 - XORQ R14, R9 - MOVQ R9, 192(DI) - ROLQ $0x27, R12 - XORQ R9, R15 - NOTQ R11 - XORQ BX, R13 - MOVQ R11, BX - ANDQ R12, BX - XORQ R10, BX - MOVQ BX, 160(DI) - XORQ BX, SI - ROLQ $0x29, R13 - MOVQ R12, CX - ORQ R13, CX - XORQ R11, CX - MOVQ CX, 168(DI) - XORQ CX, BP - MOVQ R13, DX - MOVQ R14, R8 - ANDQ R14, DX - ORQ R10, R8 - XORQ R12, DX - XORQ R13, R8 - MOVQ DX, 176(DI) - MOVQ R8, 184(DI) - - // Prepare round - MOVQ BP, BX - ROLQ $0x01, BX - MOVQ 16(DI), R12 - XORQ 56(DI), DX - XORQ R15, BX - XORQ 96(DI), R12 - XORQ 136(DI), DX - XORQ DX, R12 - MOVQ R12, CX - ROLQ $0x01, CX - MOVQ 24(DI), R13 - XORQ 64(DI), R8 - XORQ SI, CX - XORQ 104(DI), R13 - XORQ 144(DI), R8 - XORQ R8, R13 - MOVQ R13, DX - ROLQ $0x01, DX - MOVQ R15, R8 - XORQ BP, DX - ROLQ $0x01, R8 - MOVQ SI, R9 - XORQ R12, R8 - ROLQ $0x01, R9 - - // Result b - MOVQ (DI), R10 - MOVQ 48(DI), R11 - XORQ R13, R9 - MOVQ 96(DI), R12 - MOVQ 144(DI), R13 - MOVQ 192(DI), R14 - XORQ CX, R11 - ROLQ $0x2c, R11 - XORQ DX, R12 - XORQ BX, R10 - ROLQ $0x2b, R12 - MOVQ R11, SI - MOVQ $0x000000008000808b, AX - ORQ R12, SI - XORQ R10, AX - XORQ AX, SI - MOVQ SI, (SP) - XORQ R9, R14 - ROLQ $0x0e, R14 - MOVQ R10, R15 - ANDQ R11, R15 - XORQ R14, R15 - MOVQ R15, 32(SP) - XORQ R8, R13 - ROLQ $0x15, R13 - MOVQ R13, AX - ANDQ R14, AX - XORQ R12, AX - MOVQ AX, 16(SP) - NOTQ R12 - ORQ R10, R14 - ORQ R13, R12 - XORQ R13, R14 - XORQ R11, R12 - MOVQ R14, 24(SP) - MOVQ R12, 8(SP) - MOVQ R12, BP - - // Result g - MOVQ 72(DI), R11 - XORQ R9, R11 - MOVQ 80(DI), R12 - ROLQ $0x14, R11 - XORQ BX, R12 - ROLQ $0x03, R12 - MOVQ 24(DI), R10 - MOVQ R11, AX - ORQ R12, AX - XORQ R8, R10 - MOVQ 128(DI), R13 - MOVQ 176(DI), R14 - ROLQ $0x1c, R10 - XORQ R10, AX - MOVQ AX, 40(SP) - XORQ AX, SI - XORQ CX, R13 - ROLQ $0x2d, R13 - MOVQ R12, AX - ANDQ R13, AX - XORQ R11, AX - MOVQ AX, 48(SP) - XORQ AX, BP - XORQ DX, R14 - ROLQ $0x3d, R14 - MOVQ R14, AX - ORQ R10, AX - XORQ R13, AX - MOVQ AX, 64(SP) - ANDQ R11, R10 - XORQ R14, R10 - MOVQ R10, 72(SP) - NOTQ R14 - XORQ R10, R15 - ORQ R14, R13 - XORQ R12, R13 - MOVQ R13, 56(SP) - - // Result k - MOVQ 8(DI), R10 - MOVQ 56(DI), R11 - MOVQ 104(DI), R12 - MOVQ 152(DI), R13 - MOVQ 160(DI), R14 - XORQ DX, R11 - ROLQ $0x06, R11 - XORQ R8, R12 - ROLQ $0x19, R12 - MOVQ R11, AX - ORQ R12, AX - XORQ CX, R10 - ROLQ $0x01, R10 - XORQ R10, AX - MOVQ AX, 80(SP) - XORQ AX, SI - XORQ R9, R13 - ROLQ $0x08, R13 - MOVQ R12, AX - ANDQ R13, AX - XORQ R11, AX - MOVQ AX, 88(SP) - XORQ AX, BP - XORQ BX, R14 - ROLQ $0x12, R14 - NOTQ R13 - MOVQ R13, AX - ANDQ R14, AX - XORQ R12, AX - MOVQ AX, 96(SP) - MOVQ R14, AX - ORQ R10, AX - XORQ R13, AX - MOVQ AX, 104(SP) - ANDQ R11, R10 - XORQ R14, R10 - MOVQ R10, 112(SP) - XORQ R10, R15 - - // Result m - MOVQ 40(DI), R11 - XORQ BX, R11 - MOVQ 88(DI), R12 - ROLQ $0x24, R11 - XORQ CX, R12 - MOVQ 32(DI), R10 - ROLQ $0x0a, R12 - MOVQ R11, AX - MOVQ 136(DI), R13 - ANDQ R12, AX - XORQ R9, R10 - MOVQ 184(DI), R14 - ROLQ $0x1b, R10 - XORQ R10, AX - MOVQ AX, 120(SP) - XORQ AX, SI - XORQ DX, R13 - ROLQ $0x0f, R13 - MOVQ R12, AX - ORQ R13, AX - XORQ R11, AX - MOVQ AX, 128(SP) - XORQ AX, BP - XORQ R8, R14 - ROLQ $0x38, R14 - NOTQ R13 - MOVQ R13, AX - ORQ R14, AX - XORQ R12, AX - MOVQ AX, 136(SP) - ORQ R10, R11 - XORQ R14, R11 - MOVQ R11, 152(SP) - ANDQ R10, R14 - XORQ R13, R14 - MOVQ R14, 144(SP) - XORQ R11, R15 - - // Result s - MOVQ 16(DI), R10 - MOVQ 64(DI), R11 - MOVQ 112(DI), R12 - XORQ DX, R10 - MOVQ 120(DI), R13 - ROLQ $0x3e, R10 - XORQ R8, R11 - MOVQ 168(DI), R14 - ROLQ $0x37, R11 - XORQ R9, R12 - MOVQ R10, R9 - XORQ CX, R14 - ROLQ $0x02, R14 - ANDQ R11, R9 - XORQ R14, R9 - MOVQ R9, 192(SP) - ROLQ $0x27, R12 - XORQ R9, R15 - NOTQ R11 - XORQ BX, R13 - MOVQ R11, BX - ANDQ R12, BX - XORQ R10, BX - MOVQ BX, 160(SP) - XORQ BX, SI - ROLQ $0x29, R13 - MOVQ R12, CX - ORQ R13, CX - XORQ R11, CX - MOVQ CX, 168(SP) - XORQ CX, BP - MOVQ R13, DX - MOVQ R14, R8 - ANDQ R14, DX - ORQ R10, R8 - XORQ R12, DX - XORQ R13, R8 - MOVQ DX, 176(SP) - MOVQ R8, 184(SP) - - // Prepare round - MOVQ BP, BX - ROLQ $0x01, BX - MOVQ 16(SP), R12 - XORQ 56(SP), DX - XORQ R15, BX - XORQ 96(SP), R12 - XORQ 136(SP), DX - XORQ DX, R12 - MOVQ R12, CX - ROLQ $0x01, CX - MOVQ 24(SP), R13 - XORQ 64(SP), R8 - XORQ SI, CX - XORQ 104(SP), R13 - XORQ 144(SP), R8 - XORQ R8, R13 - MOVQ R13, DX - ROLQ $0x01, DX - MOVQ R15, R8 - XORQ BP, DX - ROLQ $0x01, R8 - MOVQ SI, R9 - XORQ R12, R8 - ROLQ $0x01, R9 - - // Result b - MOVQ (SP), R10 - MOVQ 48(SP), R11 - XORQ R13, R9 - MOVQ 96(SP), R12 - MOVQ 144(SP), R13 - MOVQ 192(SP), R14 - XORQ CX, R11 - ROLQ $0x2c, R11 - XORQ DX, R12 - XORQ BX, R10 - ROLQ $0x2b, R12 - MOVQ R11, SI - MOVQ $0x800000000000008b, AX - ORQ R12, SI - XORQ R10, AX - XORQ AX, SI - MOVQ SI, (DI) - XORQ R9, R14 - ROLQ $0x0e, R14 - MOVQ R10, R15 - ANDQ R11, R15 - XORQ R14, R15 - MOVQ R15, 32(DI) - XORQ R8, R13 - ROLQ $0x15, R13 - MOVQ R13, AX - ANDQ R14, AX - XORQ R12, AX - MOVQ AX, 16(DI) - NOTQ R12 - ORQ R10, R14 - ORQ R13, R12 - XORQ R13, R14 - XORQ R11, R12 - MOVQ R14, 24(DI) - MOVQ R12, 8(DI) - MOVQ R12, BP - - // Result g - MOVQ 72(SP), R11 - XORQ R9, R11 - MOVQ 80(SP), R12 - ROLQ $0x14, R11 - XORQ BX, R12 - ROLQ $0x03, R12 - MOVQ 24(SP), R10 - MOVQ R11, AX - ORQ R12, AX - XORQ R8, R10 - MOVQ 128(SP), R13 - MOVQ 176(SP), R14 - ROLQ $0x1c, R10 - XORQ R10, AX - MOVQ AX, 40(DI) - XORQ AX, SI - XORQ CX, R13 - ROLQ $0x2d, R13 - MOVQ R12, AX - ANDQ R13, AX - XORQ R11, AX - MOVQ AX, 48(DI) - XORQ AX, BP - XORQ DX, R14 - ROLQ $0x3d, R14 - MOVQ R14, AX - ORQ R10, AX - XORQ R13, AX - MOVQ AX, 64(DI) - ANDQ R11, R10 - XORQ R14, R10 - MOVQ R10, 72(DI) - NOTQ R14 - XORQ R10, R15 - ORQ R14, R13 - XORQ R12, R13 - MOVQ R13, 56(DI) - - // Result k - MOVQ 8(SP), R10 - MOVQ 56(SP), R11 - MOVQ 104(SP), R12 - MOVQ 152(SP), R13 - MOVQ 160(SP), R14 - XORQ DX, R11 - ROLQ $0x06, R11 - XORQ R8, R12 - ROLQ $0x19, R12 - MOVQ R11, AX - ORQ R12, AX - XORQ CX, R10 - ROLQ $0x01, R10 - XORQ R10, AX - MOVQ AX, 80(DI) - XORQ AX, SI - XORQ R9, R13 - ROLQ $0x08, R13 - MOVQ R12, AX - ANDQ R13, AX - XORQ R11, AX - MOVQ AX, 88(DI) - XORQ AX, BP - XORQ BX, R14 - ROLQ $0x12, R14 - NOTQ R13 - MOVQ R13, AX - ANDQ R14, AX - XORQ R12, AX - MOVQ AX, 96(DI) - MOVQ R14, AX - ORQ R10, AX - XORQ R13, AX - MOVQ AX, 104(DI) - ANDQ R11, R10 - XORQ R14, R10 - MOVQ R10, 112(DI) - XORQ R10, R15 - - // Result m - MOVQ 40(SP), R11 - XORQ BX, R11 - MOVQ 88(SP), R12 - ROLQ $0x24, R11 - XORQ CX, R12 - MOVQ 32(SP), R10 - ROLQ $0x0a, R12 - MOVQ R11, AX - MOVQ 136(SP), R13 - ANDQ R12, AX - XORQ R9, R10 - MOVQ 184(SP), R14 - ROLQ $0x1b, R10 - XORQ R10, AX - MOVQ AX, 120(DI) - XORQ AX, SI - XORQ DX, R13 - ROLQ $0x0f, R13 - MOVQ R12, AX - ORQ R13, AX - XORQ R11, AX - MOVQ AX, 128(DI) - XORQ AX, BP - XORQ R8, R14 - ROLQ $0x38, R14 - NOTQ R13 - MOVQ R13, AX - ORQ R14, AX - XORQ R12, AX - MOVQ AX, 136(DI) - ORQ R10, R11 - XORQ R14, R11 - MOVQ R11, 152(DI) - ANDQ R10, R14 - XORQ R13, R14 - MOVQ R14, 144(DI) - XORQ R11, R15 - - // Result s - MOVQ 16(SP), R10 - MOVQ 64(SP), R11 - MOVQ 112(SP), R12 - XORQ DX, R10 - MOVQ 120(SP), R13 - ROLQ $0x3e, R10 - XORQ R8, R11 - MOVQ 168(SP), R14 - ROLQ $0x37, R11 - XORQ R9, R12 - MOVQ R10, R9 - XORQ CX, R14 - ROLQ $0x02, R14 - ANDQ R11, R9 - XORQ R14, R9 - MOVQ R9, 192(DI) - ROLQ $0x27, R12 - XORQ R9, R15 - NOTQ R11 - XORQ BX, R13 - MOVQ R11, BX - ANDQ R12, BX - XORQ R10, BX - MOVQ BX, 160(DI) - XORQ BX, SI - ROLQ $0x29, R13 - MOVQ R12, CX - ORQ R13, CX - XORQ R11, CX - MOVQ CX, 168(DI) - XORQ CX, BP - MOVQ R13, DX - MOVQ R14, R8 - ANDQ R14, DX - ORQ R10, R8 - XORQ R12, DX - XORQ R13, R8 - MOVQ DX, 176(DI) - MOVQ R8, 184(DI) - - // Prepare round - MOVQ BP, BX - ROLQ $0x01, BX - MOVQ 16(DI), R12 - XORQ 56(DI), DX - XORQ R15, BX - XORQ 96(DI), R12 - XORQ 136(DI), DX - XORQ DX, R12 - MOVQ R12, CX - ROLQ $0x01, CX - MOVQ 24(DI), R13 - XORQ 64(DI), R8 - XORQ SI, CX - XORQ 104(DI), R13 - XORQ 144(DI), R8 - XORQ R8, R13 - MOVQ R13, DX - ROLQ $0x01, DX - MOVQ R15, R8 - XORQ BP, DX - ROLQ $0x01, R8 - MOVQ SI, R9 - XORQ R12, R8 - ROLQ $0x01, R9 - - // Result b - MOVQ (DI), R10 - MOVQ 48(DI), R11 - XORQ R13, R9 - MOVQ 96(DI), R12 - MOVQ 144(DI), R13 - MOVQ 192(DI), R14 - XORQ CX, R11 - ROLQ $0x2c, R11 - XORQ DX, R12 - XORQ BX, R10 - ROLQ $0x2b, R12 - MOVQ R11, SI - MOVQ $0x8000000000008089, AX - ORQ R12, SI - XORQ R10, AX - XORQ AX, SI - MOVQ SI, (SP) - XORQ R9, R14 - ROLQ $0x0e, R14 - MOVQ R10, R15 - ANDQ R11, R15 - XORQ R14, R15 - MOVQ R15, 32(SP) - XORQ R8, R13 - ROLQ $0x15, R13 - MOVQ R13, AX - ANDQ R14, AX - XORQ R12, AX - MOVQ AX, 16(SP) - NOTQ R12 - ORQ R10, R14 - ORQ R13, R12 - XORQ R13, R14 - XORQ R11, R12 - MOVQ R14, 24(SP) - MOVQ R12, 8(SP) - MOVQ R12, BP - - // Result g - MOVQ 72(DI), R11 - XORQ R9, R11 - MOVQ 80(DI), R12 - ROLQ $0x14, R11 - XORQ BX, R12 - ROLQ $0x03, R12 - MOVQ 24(DI), R10 - MOVQ R11, AX - ORQ R12, AX - XORQ R8, R10 - MOVQ 128(DI), R13 - MOVQ 176(DI), R14 - ROLQ $0x1c, R10 - XORQ R10, AX - MOVQ AX, 40(SP) - XORQ AX, SI - XORQ CX, R13 - ROLQ $0x2d, R13 - MOVQ R12, AX - ANDQ R13, AX - XORQ R11, AX - MOVQ AX, 48(SP) - XORQ AX, BP - XORQ DX, R14 - ROLQ $0x3d, R14 - MOVQ R14, AX - ORQ R10, AX - XORQ R13, AX - MOVQ AX, 64(SP) - ANDQ R11, R10 - XORQ R14, R10 - MOVQ R10, 72(SP) - NOTQ R14 - XORQ R10, R15 - ORQ R14, R13 - XORQ R12, R13 - MOVQ R13, 56(SP) - - // Result k - MOVQ 8(DI), R10 - MOVQ 56(DI), R11 - MOVQ 104(DI), R12 - MOVQ 152(DI), R13 - MOVQ 160(DI), R14 - XORQ DX, R11 - ROLQ $0x06, R11 - XORQ R8, R12 - ROLQ $0x19, R12 - MOVQ R11, AX - ORQ R12, AX - XORQ CX, R10 - ROLQ $0x01, R10 - XORQ R10, AX - MOVQ AX, 80(SP) - XORQ AX, SI - XORQ R9, R13 - ROLQ $0x08, R13 - MOVQ R12, AX - ANDQ R13, AX - XORQ R11, AX - MOVQ AX, 88(SP) - XORQ AX, BP - XORQ BX, R14 - ROLQ $0x12, R14 - NOTQ R13 - MOVQ R13, AX - ANDQ R14, AX - XORQ R12, AX - MOVQ AX, 96(SP) - MOVQ R14, AX - ORQ R10, AX - XORQ R13, AX - MOVQ AX, 104(SP) - ANDQ R11, R10 - XORQ R14, R10 - MOVQ R10, 112(SP) - XORQ R10, R15 - - // Result m - MOVQ 40(DI), R11 - XORQ BX, R11 - MOVQ 88(DI), R12 - ROLQ $0x24, R11 - XORQ CX, R12 - MOVQ 32(DI), R10 - ROLQ $0x0a, R12 - MOVQ R11, AX - MOVQ 136(DI), R13 - ANDQ R12, AX - XORQ R9, R10 - MOVQ 184(DI), R14 - ROLQ $0x1b, R10 - XORQ R10, AX - MOVQ AX, 120(SP) - XORQ AX, SI - XORQ DX, R13 - ROLQ $0x0f, R13 - MOVQ R12, AX - ORQ R13, AX - XORQ R11, AX - MOVQ AX, 128(SP) - XORQ AX, BP - XORQ R8, R14 - ROLQ $0x38, R14 - NOTQ R13 - MOVQ R13, AX - ORQ R14, AX - XORQ R12, AX - MOVQ AX, 136(SP) - ORQ R10, R11 - XORQ R14, R11 - MOVQ R11, 152(SP) - ANDQ R10, R14 - XORQ R13, R14 - MOVQ R14, 144(SP) - XORQ R11, R15 - - // Result s - MOVQ 16(DI), R10 - MOVQ 64(DI), R11 - MOVQ 112(DI), R12 - XORQ DX, R10 - MOVQ 120(DI), R13 - ROLQ $0x3e, R10 - XORQ R8, R11 - MOVQ 168(DI), R14 - ROLQ $0x37, R11 - XORQ R9, R12 - MOVQ R10, R9 - XORQ CX, R14 - ROLQ $0x02, R14 - ANDQ R11, R9 - XORQ R14, R9 - MOVQ R9, 192(SP) - ROLQ $0x27, R12 - XORQ R9, R15 - NOTQ R11 - XORQ BX, R13 - MOVQ R11, BX - ANDQ R12, BX - XORQ R10, BX - MOVQ BX, 160(SP) - XORQ BX, SI - ROLQ $0x29, R13 - MOVQ R12, CX - ORQ R13, CX - XORQ R11, CX - MOVQ CX, 168(SP) - XORQ CX, BP - MOVQ R13, DX - MOVQ R14, R8 - ANDQ R14, DX - ORQ R10, R8 - XORQ R12, DX - XORQ R13, R8 - MOVQ DX, 176(SP) - MOVQ R8, 184(SP) - - // Prepare round - MOVQ BP, BX - ROLQ $0x01, BX - MOVQ 16(SP), R12 - XORQ 56(SP), DX - XORQ R15, BX - XORQ 96(SP), R12 - XORQ 136(SP), DX - XORQ DX, R12 - MOVQ R12, CX - ROLQ $0x01, CX - MOVQ 24(SP), R13 - XORQ 64(SP), R8 - XORQ SI, CX - XORQ 104(SP), R13 - XORQ 144(SP), R8 - XORQ R8, R13 - MOVQ R13, DX - ROLQ $0x01, DX - MOVQ R15, R8 - XORQ BP, DX - ROLQ $0x01, R8 - MOVQ SI, R9 - XORQ R12, R8 - ROLQ $0x01, R9 - - // Result b - MOVQ (SP), R10 - MOVQ 48(SP), R11 - XORQ R13, R9 - MOVQ 96(SP), R12 - MOVQ 144(SP), R13 - MOVQ 192(SP), R14 - XORQ CX, R11 - ROLQ $0x2c, R11 - XORQ DX, R12 - XORQ BX, R10 - ROLQ $0x2b, R12 - MOVQ R11, SI - MOVQ $0x8000000000008003, AX - ORQ R12, SI - XORQ R10, AX - XORQ AX, SI - MOVQ SI, (DI) - XORQ R9, R14 - ROLQ $0x0e, R14 - MOVQ R10, R15 - ANDQ R11, R15 - XORQ R14, R15 - MOVQ R15, 32(DI) - XORQ R8, R13 - ROLQ $0x15, R13 - MOVQ R13, AX - ANDQ R14, AX - XORQ R12, AX - MOVQ AX, 16(DI) - NOTQ R12 - ORQ R10, R14 - ORQ R13, R12 - XORQ R13, R14 - XORQ R11, R12 - MOVQ R14, 24(DI) - MOVQ R12, 8(DI) - MOVQ R12, BP - - // Result g - MOVQ 72(SP), R11 - XORQ R9, R11 - MOVQ 80(SP), R12 - ROLQ $0x14, R11 - XORQ BX, R12 - ROLQ $0x03, R12 - MOVQ 24(SP), R10 - MOVQ R11, AX - ORQ R12, AX - XORQ R8, R10 - MOVQ 128(SP), R13 - MOVQ 176(SP), R14 - ROLQ $0x1c, R10 - XORQ R10, AX - MOVQ AX, 40(DI) - XORQ AX, SI - XORQ CX, R13 - ROLQ $0x2d, R13 - MOVQ R12, AX - ANDQ R13, AX - XORQ R11, AX - MOVQ AX, 48(DI) - XORQ AX, BP - XORQ DX, R14 - ROLQ $0x3d, R14 - MOVQ R14, AX - ORQ R10, AX - XORQ R13, AX - MOVQ AX, 64(DI) - ANDQ R11, R10 - XORQ R14, R10 - MOVQ R10, 72(DI) - NOTQ R14 - XORQ R10, R15 - ORQ R14, R13 - XORQ R12, R13 - MOVQ R13, 56(DI) - - // Result k - MOVQ 8(SP), R10 - MOVQ 56(SP), R11 - MOVQ 104(SP), R12 - MOVQ 152(SP), R13 - MOVQ 160(SP), R14 - XORQ DX, R11 - ROLQ $0x06, R11 - XORQ R8, R12 - ROLQ $0x19, R12 - MOVQ R11, AX - ORQ R12, AX - XORQ CX, R10 - ROLQ $0x01, R10 - XORQ R10, AX - MOVQ AX, 80(DI) - XORQ AX, SI - XORQ R9, R13 - ROLQ $0x08, R13 - MOVQ R12, AX - ANDQ R13, AX - XORQ R11, AX - MOVQ AX, 88(DI) - XORQ AX, BP - XORQ BX, R14 - ROLQ $0x12, R14 - NOTQ R13 - MOVQ R13, AX - ANDQ R14, AX - XORQ R12, AX - MOVQ AX, 96(DI) - MOVQ R14, AX - ORQ R10, AX - XORQ R13, AX - MOVQ AX, 104(DI) - ANDQ R11, R10 - XORQ R14, R10 - MOVQ R10, 112(DI) - XORQ R10, R15 - - // Result m - MOVQ 40(SP), R11 - XORQ BX, R11 - MOVQ 88(SP), R12 - ROLQ $0x24, R11 - XORQ CX, R12 - MOVQ 32(SP), R10 - ROLQ $0x0a, R12 - MOVQ R11, AX - MOVQ 136(SP), R13 - ANDQ R12, AX - XORQ R9, R10 - MOVQ 184(SP), R14 - ROLQ $0x1b, R10 - XORQ R10, AX - MOVQ AX, 120(DI) - XORQ AX, SI - XORQ DX, R13 - ROLQ $0x0f, R13 - MOVQ R12, AX - ORQ R13, AX - XORQ R11, AX - MOVQ AX, 128(DI) - XORQ AX, BP - XORQ R8, R14 - ROLQ $0x38, R14 - NOTQ R13 - MOVQ R13, AX - ORQ R14, AX - XORQ R12, AX - MOVQ AX, 136(DI) - ORQ R10, R11 - XORQ R14, R11 - MOVQ R11, 152(DI) - ANDQ R10, R14 - XORQ R13, R14 - MOVQ R14, 144(DI) - XORQ R11, R15 - - // Result s - MOVQ 16(SP), R10 - MOVQ 64(SP), R11 - MOVQ 112(SP), R12 - XORQ DX, R10 - MOVQ 120(SP), R13 - ROLQ $0x3e, R10 - XORQ R8, R11 - MOVQ 168(SP), R14 - ROLQ $0x37, R11 - XORQ R9, R12 - MOVQ R10, R9 - XORQ CX, R14 - ROLQ $0x02, R14 - ANDQ R11, R9 - XORQ R14, R9 - MOVQ R9, 192(DI) - ROLQ $0x27, R12 - XORQ R9, R15 - NOTQ R11 - XORQ BX, R13 - MOVQ R11, BX - ANDQ R12, BX - XORQ R10, BX - MOVQ BX, 160(DI) - XORQ BX, SI - ROLQ $0x29, R13 - MOVQ R12, CX - ORQ R13, CX - XORQ R11, CX - MOVQ CX, 168(DI) - XORQ CX, BP - MOVQ R13, DX - MOVQ R14, R8 - ANDQ R14, DX - ORQ R10, R8 - XORQ R12, DX - XORQ R13, R8 - MOVQ DX, 176(DI) - MOVQ R8, 184(DI) - - // Prepare round - MOVQ BP, BX - ROLQ $0x01, BX - MOVQ 16(DI), R12 - XORQ 56(DI), DX - XORQ R15, BX - XORQ 96(DI), R12 - XORQ 136(DI), DX - XORQ DX, R12 - MOVQ R12, CX - ROLQ $0x01, CX - MOVQ 24(DI), R13 - XORQ 64(DI), R8 - XORQ SI, CX - XORQ 104(DI), R13 - XORQ 144(DI), R8 - XORQ R8, R13 - MOVQ R13, DX - ROLQ $0x01, DX - MOVQ R15, R8 - XORQ BP, DX - ROLQ $0x01, R8 - MOVQ SI, R9 - XORQ R12, R8 - ROLQ $0x01, R9 - - // Result b - MOVQ (DI), R10 - MOVQ 48(DI), R11 - XORQ R13, R9 - MOVQ 96(DI), R12 - MOVQ 144(DI), R13 - MOVQ 192(DI), R14 - XORQ CX, R11 - ROLQ $0x2c, R11 - XORQ DX, R12 - XORQ BX, R10 - ROLQ $0x2b, R12 - MOVQ R11, SI - MOVQ $0x8000000000008002, AX - ORQ R12, SI - XORQ R10, AX - XORQ AX, SI - MOVQ SI, (SP) - XORQ R9, R14 - ROLQ $0x0e, R14 - MOVQ R10, R15 - ANDQ R11, R15 - XORQ R14, R15 - MOVQ R15, 32(SP) - XORQ R8, R13 - ROLQ $0x15, R13 - MOVQ R13, AX - ANDQ R14, AX - XORQ R12, AX - MOVQ AX, 16(SP) - NOTQ R12 - ORQ R10, R14 - ORQ R13, R12 - XORQ R13, R14 - XORQ R11, R12 - MOVQ R14, 24(SP) - MOVQ R12, 8(SP) - MOVQ R12, BP - - // Result g - MOVQ 72(DI), R11 - XORQ R9, R11 - MOVQ 80(DI), R12 - ROLQ $0x14, R11 - XORQ BX, R12 - ROLQ $0x03, R12 - MOVQ 24(DI), R10 - MOVQ R11, AX - ORQ R12, AX - XORQ R8, R10 - MOVQ 128(DI), R13 - MOVQ 176(DI), R14 - ROLQ $0x1c, R10 - XORQ R10, AX - MOVQ AX, 40(SP) - XORQ AX, SI - XORQ CX, R13 - ROLQ $0x2d, R13 - MOVQ R12, AX - ANDQ R13, AX - XORQ R11, AX - MOVQ AX, 48(SP) - XORQ AX, BP - XORQ DX, R14 - ROLQ $0x3d, R14 - MOVQ R14, AX - ORQ R10, AX - XORQ R13, AX - MOVQ AX, 64(SP) - ANDQ R11, R10 - XORQ R14, R10 - MOVQ R10, 72(SP) - NOTQ R14 - XORQ R10, R15 - ORQ R14, R13 - XORQ R12, R13 - MOVQ R13, 56(SP) - - // Result k - MOVQ 8(DI), R10 - MOVQ 56(DI), R11 - MOVQ 104(DI), R12 - MOVQ 152(DI), R13 - MOVQ 160(DI), R14 - XORQ DX, R11 - ROLQ $0x06, R11 - XORQ R8, R12 - ROLQ $0x19, R12 - MOVQ R11, AX - ORQ R12, AX - XORQ CX, R10 - ROLQ $0x01, R10 - XORQ R10, AX - MOVQ AX, 80(SP) - XORQ AX, SI - XORQ R9, R13 - ROLQ $0x08, R13 - MOVQ R12, AX - ANDQ R13, AX - XORQ R11, AX - MOVQ AX, 88(SP) - XORQ AX, BP - XORQ BX, R14 - ROLQ $0x12, R14 - NOTQ R13 - MOVQ R13, AX - ANDQ R14, AX - XORQ R12, AX - MOVQ AX, 96(SP) - MOVQ R14, AX - ORQ R10, AX - XORQ R13, AX - MOVQ AX, 104(SP) - ANDQ R11, R10 - XORQ R14, R10 - MOVQ R10, 112(SP) - XORQ R10, R15 - - // Result m - MOVQ 40(DI), R11 - XORQ BX, R11 - MOVQ 88(DI), R12 - ROLQ $0x24, R11 - XORQ CX, R12 - MOVQ 32(DI), R10 - ROLQ $0x0a, R12 - MOVQ R11, AX - MOVQ 136(DI), R13 - ANDQ R12, AX - XORQ R9, R10 - MOVQ 184(DI), R14 - ROLQ $0x1b, R10 - XORQ R10, AX - MOVQ AX, 120(SP) - XORQ AX, SI - XORQ DX, R13 - ROLQ $0x0f, R13 - MOVQ R12, AX - ORQ R13, AX - XORQ R11, AX - MOVQ AX, 128(SP) - XORQ AX, BP - XORQ R8, R14 - ROLQ $0x38, R14 - NOTQ R13 - MOVQ R13, AX - ORQ R14, AX - XORQ R12, AX - MOVQ AX, 136(SP) - ORQ R10, R11 - XORQ R14, R11 - MOVQ R11, 152(SP) - ANDQ R10, R14 - XORQ R13, R14 - MOVQ R14, 144(SP) - XORQ R11, R15 - - // Result s - MOVQ 16(DI), R10 - MOVQ 64(DI), R11 - MOVQ 112(DI), R12 - XORQ DX, R10 - MOVQ 120(DI), R13 - ROLQ $0x3e, R10 - XORQ R8, R11 - MOVQ 168(DI), R14 - ROLQ $0x37, R11 - XORQ R9, R12 - MOVQ R10, R9 - XORQ CX, R14 - ROLQ $0x02, R14 - ANDQ R11, R9 - XORQ R14, R9 - MOVQ R9, 192(SP) - ROLQ $0x27, R12 - XORQ R9, R15 - NOTQ R11 - XORQ BX, R13 - MOVQ R11, BX - ANDQ R12, BX - XORQ R10, BX - MOVQ BX, 160(SP) - XORQ BX, SI - ROLQ $0x29, R13 - MOVQ R12, CX - ORQ R13, CX - XORQ R11, CX - MOVQ CX, 168(SP) - XORQ CX, BP - MOVQ R13, DX - MOVQ R14, R8 - ANDQ R14, DX - ORQ R10, R8 - XORQ R12, DX - XORQ R13, R8 - MOVQ DX, 176(SP) - MOVQ R8, 184(SP) - - // Prepare round - MOVQ BP, BX - ROLQ $0x01, BX - MOVQ 16(SP), R12 - XORQ 56(SP), DX - XORQ R15, BX - XORQ 96(SP), R12 - XORQ 136(SP), DX - XORQ DX, R12 - MOVQ R12, CX - ROLQ $0x01, CX - MOVQ 24(SP), R13 - XORQ 64(SP), R8 - XORQ SI, CX - XORQ 104(SP), R13 - XORQ 144(SP), R8 - XORQ R8, R13 - MOVQ R13, DX - ROLQ $0x01, DX - MOVQ R15, R8 - XORQ BP, DX - ROLQ $0x01, R8 - MOVQ SI, R9 - XORQ R12, R8 - ROLQ $0x01, R9 - - // Result b - MOVQ (SP), R10 - MOVQ 48(SP), R11 - XORQ R13, R9 - MOVQ 96(SP), R12 - MOVQ 144(SP), R13 - MOVQ 192(SP), R14 - XORQ CX, R11 - ROLQ $0x2c, R11 - XORQ DX, R12 - XORQ BX, R10 - ROLQ $0x2b, R12 - MOVQ R11, SI - MOVQ $0x8000000000000080, AX - ORQ R12, SI - XORQ R10, AX - XORQ AX, SI - MOVQ SI, (DI) - XORQ R9, R14 - ROLQ $0x0e, R14 - MOVQ R10, R15 - ANDQ R11, R15 - XORQ R14, R15 - MOVQ R15, 32(DI) - XORQ R8, R13 - ROLQ $0x15, R13 - MOVQ R13, AX - ANDQ R14, AX - XORQ R12, AX - MOVQ AX, 16(DI) - NOTQ R12 - ORQ R10, R14 - ORQ R13, R12 - XORQ R13, R14 - XORQ R11, R12 - MOVQ R14, 24(DI) - MOVQ R12, 8(DI) - MOVQ R12, BP - - // Result g - MOVQ 72(SP), R11 - XORQ R9, R11 - MOVQ 80(SP), R12 - ROLQ $0x14, R11 - XORQ BX, R12 - ROLQ $0x03, R12 - MOVQ 24(SP), R10 - MOVQ R11, AX - ORQ R12, AX - XORQ R8, R10 - MOVQ 128(SP), R13 - MOVQ 176(SP), R14 - ROLQ $0x1c, R10 - XORQ R10, AX - MOVQ AX, 40(DI) - XORQ AX, SI - XORQ CX, R13 - ROLQ $0x2d, R13 - MOVQ R12, AX - ANDQ R13, AX - XORQ R11, AX - MOVQ AX, 48(DI) - XORQ AX, BP - XORQ DX, R14 - ROLQ $0x3d, R14 - MOVQ R14, AX - ORQ R10, AX - XORQ R13, AX - MOVQ AX, 64(DI) - ANDQ R11, R10 - XORQ R14, R10 - MOVQ R10, 72(DI) - NOTQ R14 - XORQ R10, R15 - ORQ R14, R13 - XORQ R12, R13 - MOVQ R13, 56(DI) - - // Result k - MOVQ 8(SP), R10 - MOVQ 56(SP), R11 - MOVQ 104(SP), R12 - MOVQ 152(SP), R13 - MOVQ 160(SP), R14 - XORQ DX, R11 - ROLQ $0x06, R11 - XORQ R8, R12 - ROLQ $0x19, R12 - MOVQ R11, AX - ORQ R12, AX - XORQ CX, R10 - ROLQ $0x01, R10 - XORQ R10, AX - MOVQ AX, 80(DI) - XORQ AX, SI - XORQ R9, R13 - ROLQ $0x08, R13 - MOVQ R12, AX - ANDQ R13, AX - XORQ R11, AX - MOVQ AX, 88(DI) - XORQ AX, BP - XORQ BX, R14 - ROLQ $0x12, R14 - NOTQ R13 - MOVQ R13, AX - ANDQ R14, AX - XORQ R12, AX - MOVQ AX, 96(DI) - MOVQ R14, AX - ORQ R10, AX - XORQ R13, AX - MOVQ AX, 104(DI) - ANDQ R11, R10 - XORQ R14, R10 - MOVQ R10, 112(DI) - XORQ R10, R15 - - // Result m - MOVQ 40(SP), R11 - XORQ BX, R11 - MOVQ 88(SP), R12 - ROLQ $0x24, R11 - XORQ CX, R12 - MOVQ 32(SP), R10 - ROLQ $0x0a, R12 - MOVQ R11, AX - MOVQ 136(SP), R13 - ANDQ R12, AX - XORQ R9, R10 - MOVQ 184(SP), R14 - ROLQ $0x1b, R10 - XORQ R10, AX - MOVQ AX, 120(DI) - XORQ AX, SI - XORQ DX, R13 - ROLQ $0x0f, R13 - MOVQ R12, AX - ORQ R13, AX - XORQ R11, AX - MOVQ AX, 128(DI) - XORQ AX, BP - XORQ R8, R14 - ROLQ $0x38, R14 - NOTQ R13 - MOVQ R13, AX - ORQ R14, AX - XORQ R12, AX - MOVQ AX, 136(DI) - ORQ R10, R11 - XORQ R14, R11 - MOVQ R11, 152(DI) - ANDQ R10, R14 - XORQ R13, R14 - MOVQ R14, 144(DI) - XORQ R11, R15 - - // Result s - MOVQ 16(SP), R10 - MOVQ 64(SP), R11 - MOVQ 112(SP), R12 - XORQ DX, R10 - MOVQ 120(SP), R13 - ROLQ $0x3e, R10 - XORQ R8, R11 - MOVQ 168(SP), R14 - ROLQ $0x37, R11 - XORQ R9, R12 - MOVQ R10, R9 - XORQ CX, R14 - ROLQ $0x02, R14 - ANDQ R11, R9 - XORQ R14, R9 - MOVQ R9, 192(DI) - ROLQ $0x27, R12 - XORQ R9, R15 - NOTQ R11 - XORQ BX, R13 - MOVQ R11, BX - ANDQ R12, BX - XORQ R10, BX - MOVQ BX, 160(DI) - XORQ BX, SI - ROLQ $0x29, R13 - MOVQ R12, CX - ORQ R13, CX - XORQ R11, CX - MOVQ CX, 168(DI) - XORQ CX, BP - MOVQ R13, DX - MOVQ R14, R8 - ANDQ R14, DX - ORQ R10, R8 - XORQ R12, DX - XORQ R13, R8 - MOVQ DX, 176(DI) - MOVQ R8, 184(DI) - - // Prepare round - MOVQ BP, BX - ROLQ $0x01, BX - MOVQ 16(DI), R12 - XORQ 56(DI), DX - XORQ R15, BX - XORQ 96(DI), R12 - XORQ 136(DI), DX - XORQ DX, R12 - MOVQ R12, CX - ROLQ $0x01, CX - MOVQ 24(DI), R13 - XORQ 64(DI), R8 - XORQ SI, CX - XORQ 104(DI), R13 - XORQ 144(DI), R8 - XORQ R8, R13 - MOVQ R13, DX - ROLQ $0x01, DX - MOVQ R15, R8 - XORQ BP, DX - ROLQ $0x01, R8 - MOVQ SI, R9 - XORQ R12, R8 - ROLQ $0x01, R9 - - // Result b - MOVQ (DI), R10 - MOVQ 48(DI), R11 - XORQ R13, R9 - MOVQ 96(DI), R12 - MOVQ 144(DI), R13 - MOVQ 192(DI), R14 - XORQ CX, R11 - ROLQ $0x2c, R11 - XORQ DX, R12 - XORQ BX, R10 - ROLQ $0x2b, R12 - MOVQ R11, SI - MOVQ $0x000000000000800a, AX - ORQ R12, SI - XORQ R10, AX - XORQ AX, SI - MOVQ SI, (SP) - XORQ R9, R14 - ROLQ $0x0e, R14 - MOVQ R10, R15 - ANDQ R11, R15 - XORQ R14, R15 - MOVQ R15, 32(SP) - XORQ R8, R13 - ROLQ $0x15, R13 - MOVQ R13, AX - ANDQ R14, AX - XORQ R12, AX - MOVQ AX, 16(SP) - NOTQ R12 - ORQ R10, R14 - ORQ R13, R12 - XORQ R13, R14 - XORQ R11, R12 - MOVQ R14, 24(SP) - MOVQ R12, 8(SP) - MOVQ R12, BP - - // Result g - MOVQ 72(DI), R11 - XORQ R9, R11 - MOVQ 80(DI), R12 - ROLQ $0x14, R11 - XORQ BX, R12 - ROLQ $0x03, R12 - MOVQ 24(DI), R10 - MOVQ R11, AX - ORQ R12, AX - XORQ R8, R10 - MOVQ 128(DI), R13 - MOVQ 176(DI), R14 - ROLQ $0x1c, R10 - XORQ R10, AX - MOVQ AX, 40(SP) - XORQ AX, SI - XORQ CX, R13 - ROLQ $0x2d, R13 - MOVQ R12, AX - ANDQ R13, AX - XORQ R11, AX - MOVQ AX, 48(SP) - XORQ AX, BP - XORQ DX, R14 - ROLQ $0x3d, R14 - MOVQ R14, AX - ORQ R10, AX - XORQ R13, AX - MOVQ AX, 64(SP) - ANDQ R11, R10 - XORQ R14, R10 - MOVQ R10, 72(SP) - NOTQ R14 - XORQ R10, R15 - ORQ R14, R13 - XORQ R12, R13 - MOVQ R13, 56(SP) - - // Result k - MOVQ 8(DI), R10 - MOVQ 56(DI), R11 - MOVQ 104(DI), R12 - MOVQ 152(DI), R13 - MOVQ 160(DI), R14 - XORQ DX, R11 - ROLQ $0x06, R11 - XORQ R8, R12 - ROLQ $0x19, R12 - MOVQ R11, AX - ORQ R12, AX - XORQ CX, R10 - ROLQ $0x01, R10 - XORQ R10, AX - MOVQ AX, 80(SP) - XORQ AX, SI - XORQ R9, R13 - ROLQ $0x08, R13 - MOVQ R12, AX - ANDQ R13, AX - XORQ R11, AX - MOVQ AX, 88(SP) - XORQ AX, BP - XORQ BX, R14 - ROLQ $0x12, R14 - NOTQ R13 - MOVQ R13, AX - ANDQ R14, AX - XORQ R12, AX - MOVQ AX, 96(SP) - MOVQ R14, AX - ORQ R10, AX - XORQ R13, AX - MOVQ AX, 104(SP) - ANDQ R11, R10 - XORQ R14, R10 - MOVQ R10, 112(SP) - XORQ R10, R15 - - // Result m - MOVQ 40(DI), R11 - XORQ BX, R11 - MOVQ 88(DI), R12 - ROLQ $0x24, R11 - XORQ CX, R12 - MOVQ 32(DI), R10 - ROLQ $0x0a, R12 - MOVQ R11, AX - MOVQ 136(DI), R13 - ANDQ R12, AX - XORQ R9, R10 - MOVQ 184(DI), R14 - ROLQ $0x1b, R10 - XORQ R10, AX - MOVQ AX, 120(SP) - XORQ AX, SI - XORQ DX, R13 - ROLQ $0x0f, R13 - MOVQ R12, AX - ORQ R13, AX - XORQ R11, AX - MOVQ AX, 128(SP) - XORQ AX, BP - XORQ R8, R14 - ROLQ $0x38, R14 - NOTQ R13 - MOVQ R13, AX - ORQ R14, AX - XORQ R12, AX - MOVQ AX, 136(SP) - ORQ R10, R11 - XORQ R14, R11 - MOVQ R11, 152(SP) - ANDQ R10, R14 - XORQ R13, R14 - MOVQ R14, 144(SP) - XORQ R11, R15 - - // Result s - MOVQ 16(DI), R10 - MOVQ 64(DI), R11 - MOVQ 112(DI), R12 - XORQ DX, R10 - MOVQ 120(DI), R13 - ROLQ $0x3e, R10 - XORQ R8, R11 - MOVQ 168(DI), R14 - ROLQ $0x37, R11 - XORQ R9, R12 - MOVQ R10, R9 - XORQ CX, R14 - ROLQ $0x02, R14 - ANDQ R11, R9 - XORQ R14, R9 - MOVQ R9, 192(SP) - ROLQ $0x27, R12 - XORQ R9, R15 - NOTQ R11 - XORQ BX, R13 - MOVQ R11, BX - ANDQ R12, BX - XORQ R10, BX - MOVQ BX, 160(SP) - XORQ BX, SI - ROLQ $0x29, R13 - MOVQ R12, CX - ORQ R13, CX - XORQ R11, CX - MOVQ CX, 168(SP) - XORQ CX, BP - MOVQ R13, DX - MOVQ R14, R8 - ANDQ R14, DX - ORQ R10, R8 - XORQ R12, DX - XORQ R13, R8 - MOVQ DX, 176(SP) - MOVQ R8, 184(SP) - - // Prepare round - MOVQ BP, BX - ROLQ $0x01, BX - MOVQ 16(SP), R12 - XORQ 56(SP), DX - XORQ R15, BX - XORQ 96(SP), R12 - XORQ 136(SP), DX - XORQ DX, R12 - MOVQ R12, CX - ROLQ $0x01, CX - MOVQ 24(SP), R13 - XORQ 64(SP), R8 - XORQ SI, CX - XORQ 104(SP), R13 - XORQ 144(SP), R8 - XORQ R8, R13 - MOVQ R13, DX - ROLQ $0x01, DX - MOVQ R15, R8 - XORQ BP, DX - ROLQ $0x01, R8 - MOVQ SI, R9 - XORQ R12, R8 - ROLQ $0x01, R9 - - // Result b - MOVQ (SP), R10 - MOVQ 48(SP), R11 - XORQ R13, R9 - MOVQ 96(SP), R12 - MOVQ 144(SP), R13 - MOVQ 192(SP), R14 - XORQ CX, R11 - ROLQ $0x2c, R11 - XORQ DX, R12 - XORQ BX, R10 - ROLQ $0x2b, R12 - MOVQ R11, SI - MOVQ $0x800000008000000a, AX - ORQ R12, SI - XORQ R10, AX - XORQ AX, SI - MOVQ SI, (DI) - XORQ R9, R14 - ROLQ $0x0e, R14 - MOVQ R10, R15 - ANDQ R11, R15 - XORQ R14, R15 - MOVQ R15, 32(DI) - XORQ R8, R13 - ROLQ $0x15, R13 - MOVQ R13, AX - ANDQ R14, AX - XORQ R12, AX - MOVQ AX, 16(DI) - NOTQ R12 - ORQ R10, R14 - ORQ R13, R12 - XORQ R13, R14 - XORQ R11, R12 - MOVQ R14, 24(DI) - MOVQ R12, 8(DI) - MOVQ R12, BP - - // Result g - MOVQ 72(SP), R11 - XORQ R9, R11 - MOVQ 80(SP), R12 - ROLQ $0x14, R11 - XORQ BX, R12 - ROLQ $0x03, R12 - MOVQ 24(SP), R10 - MOVQ R11, AX - ORQ R12, AX - XORQ R8, R10 - MOVQ 128(SP), R13 - MOVQ 176(SP), R14 - ROLQ $0x1c, R10 - XORQ R10, AX - MOVQ AX, 40(DI) - XORQ AX, SI - XORQ CX, R13 - ROLQ $0x2d, R13 - MOVQ R12, AX - ANDQ R13, AX - XORQ R11, AX - MOVQ AX, 48(DI) - XORQ AX, BP - XORQ DX, R14 - ROLQ $0x3d, R14 - MOVQ R14, AX - ORQ R10, AX - XORQ R13, AX - MOVQ AX, 64(DI) - ANDQ R11, R10 - XORQ R14, R10 - MOVQ R10, 72(DI) - NOTQ R14 - XORQ R10, R15 - ORQ R14, R13 - XORQ R12, R13 - MOVQ R13, 56(DI) - - // Result k - MOVQ 8(SP), R10 - MOVQ 56(SP), R11 - MOVQ 104(SP), R12 - MOVQ 152(SP), R13 - MOVQ 160(SP), R14 - XORQ DX, R11 - ROLQ $0x06, R11 - XORQ R8, R12 - ROLQ $0x19, R12 - MOVQ R11, AX - ORQ R12, AX - XORQ CX, R10 - ROLQ $0x01, R10 - XORQ R10, AX - MOVQ AX, 80(DI) - XORQ AX, SI - XORQ R9, R13 - ROLQ $0x08, R13 - MOVQ R12, AX - ANDQ R13, AX - XORQ R11, AX - MOVQ AX, 88(DI) - XORQ AX, BP - XORQ BX, R14 - ROLQ $0x12, R14 - NOTQ R13 - MOVQ R13, AX - ANDQ R14, AX - XORQ R12, AX - MOVQ AX, 96(DI) - MOVQ R14, AX - ORQ R10, AX - XORQ R13, AX - MOVQ AX, 104(DI) - ANDQ R11, R10 - XORQ R14, R10 - MOVQ R10, 112(DI) - XORQ R10, R15 - - // Result m - MOVQ 40(SP), R11 - XORQ BX, R11 - MOVQ 88(SP), R12 - ROLQ $0x24, R11 - XORQ CX, R12 - MOVQ 32(SP), R10 - ROLQ $0x0a, R12 - MOVQ R11, AX - MOVQ 136(SP), R13 - ANDQ R12, AX - XORQ R9, R10 - MOVQ 184(SP), R14 - ROLQ $0x1b, R10 - XORQ R10, AX - MOVQ AX, 120(DI) - XORQ AX, SI - XORQ DX, R13 - ROLQ $0x0f, R13 - MOVQ R12, AX - ORQ R13, AX - XORQ R11, AX - MOVQ AX, 128(DI) - XORQ AX, BP - XORQ R8, R14 - ROLQ $0x38, R14 - NOTQ R13 - MOVQ R13, AX - ORQ R14, AX - XORQ R12, AX - MOVQ AX, 136(DI) - ORQ R10, R11 - XORQ R14, R11 - MOVQ R11, 152(DI) - ANDQ R10, R14 - XORQ R13, R14 - MOVQ R14, 144(DI) - XORQ R11, R15 - - // Result s - MOVQ 16(SP), R10 - MOVQ 64(SP), R11 - MOVQ 112(SP), R12 - XORQ DX, R10 - MOVQ 120(SP), R13 - ROLQ $0x3e, R10 - XORQ R8, R11 - MOVQ 168(SP), R14 - ROLQ $0x37, R11 - XORQ R9, R12 - MOVQ R10, R9 - XORQ CX, R14 - ROLQ $0x02, R14 - ANDQ R11, R9 - XORQ R14, R9 - MOVQ R9, 192(DI) - ROLQ $0x27, R12 - XORQ R9, R15 - NOTQ R11 - XORQ BX, R13 - MOVQ R11, BX - ANDQ R12, BX - XORQ R10, BX - MOVQ BX, 160(DI) - XORQ BX, SI - ROLQ $0x29, R13 - MOVQ R12, CX - ORQ R13, CX - XORQ R11, CX - MOVQ CX, 168(DI) - XORQ CX, BP - MOVQ R13, DX - MOVQ R14, R8 - ANDQ R14, DX - ORQ R10, R8 - XORQ R12, DX - XORQ R13, R8 - MOVQ DX, 176(DI) - MOVQ R8, 184(DI) - - // Prepare round - MOVQ BP, BX - ROLQ $0x01, BX - MOVQ 16(DI), R12 - XORQ 56(DI), DX - XORQ R15, BX - XORQ 96(DI), R12 - XORQ 136(DI), DX - XORQ DX, R12 - MOVQ R12, CX - ROLQ $0x01, CX - MOVQ 24(DI), R13 - XORQ 64(DI), R8 - XORQ SI, CX - XORQ 104(DI), R13 - XORQ 144(DI), R8 - XORQ R8, R13 - MOVQ R13, DX - ROLQ $0x01, DX - MOVQ R15, R8 - XORQ BP, DX - ROLQ $0x01, R8 - MOVQ SI, R9 - XORQ R12, R8 - ROLQ $0x01, R9 - - // Result b - MOVQ (DI), R10 - MOVQ 48(DI), R11 - XORQ R13, R9 - MOVQ 96(DI), R12 - MOVQ 144(DI), R13 - MOVQ 192(DI), R14 - XORQ CX, R11 - ROLQ $0x2c, R11 - XORQ DX, R12 - XORQ BX, R10 - ROLQ $0x2b, R12 - MOVQ R11, SI - MOVQ $0x8000000080008081, AX - ORQ R12, SI - XORQ R10, AX - XORQ AX, SI - MOVQ SI, (SP) - XORQ R9, R14 - ROLQ $0x0e, R14 - MOVQ R10, R15 - ANDQ R11, R15 - XORQ R14, R15 - MOVQ R15, 32(SP) - XORQ R8, R13 - ROLQ $0x15, R13 - MOVQ R13, AX - ANDQ R14, AX - XORQ R12, AX - MOVQ AX, 16(SP) - NOTQ R12 - ORQ R10, R14 - ORQ R13, R12 - XORQ R13, R14 - XORQ R11, R12 - MOVQ R14, 24(SP) - MOVQ R12, 8(SP) - MOVQ R12, BP - - // Result g - MOVQ 72(DI), R11 - XORQ R9, R11 - MOVQ 80(DI), R12 - ROLQ $0x14, R11 - XORQ BX, R12 - ROLQ $0x03, R12 - MOVQ 24(DI), R10 - MOVQ R11, AX - ORQ R12, AX - XORQ R8, R10 - MOVQ 128(DI), R13 - MOVQ 176(DI), R14 - ROLQ $0x1c, R10 - XORQ R10, AX - MOVQ AX, 40(SP) - XORQ AX, SI - XORQ CX, R13 - ROLQ $0x2d, R13 - MOVQ R12, AX - ANDQ R13, AX - XORQ R11, AX - MOVQ AX, 48(SP) - XORQ AX, BP - XORQ DX, R14 - ROLQ $0x3d, R14 - MOVQ R14, AX - ORQ R10, AX - XORQ R13, AX - MOVQ AX, 64(SP) - ANDQ R11, R10 - XORQ R14, R10 - MOVQ R10, 72(SP) - NOTQ R14 - XORQ R10, R15 - ORQ R14, R13 - XORQ R12, R13 - MOVQ R13, 56(SP) - - // Result k - MOVQ 8(DI), R10 - MOVQ 56(DI), R11 - MOVQ 104(DI), R12 - MOVQ 152(DI), R13 - MOVQ 160(DI), R14 - XORQ DX, R11 - ROLQ $0x06, R11 - XORQ R8, R12 - ROLQ $0x19, R12 - MOVQ R11, AX - ORQ R12, AX - XORQ CX, R10 - ROLQ $0x01, R10 - XORQ R10, AX - MOVQ AX, 80(SP) - XORQ AX, SI - XORQ R9, R13 - ROLQ $0x08, R13 - MOVQ R12, AX - ANDQ R13, AX - XORQ R11, AX - MOVQ AX, 88(SP) - XORQ AX, BP - XORQ BX, R14 - ROLQ $0x12, R14 - NOTQ R13 - MOVQ R13, AX - ANDQ R14, AX - XORQ R12, AX - MOVQ AX, 96(SP) - MOVQ R14, AX - ORQ R10, AX - XORQ R13, AX - MOVQ AX, 104(SP) - ANDQ R11, R10 - XORQ R14, R10 - MOVQ R10, 112(SP) - XORQ R10, R15 - - // Result m - MOVQ 40(DI), R11 - XORQ BX, R11 - MOVQ 88(DI), R12 - ROLQ $0x24, R11 - XORQ CX, R12 - MOVQ 32(DI), R10 - ROLQ $0x0a, R12 - MOVQ R11, AX - MOVQ 136(DI), R13 - ANDQ R12, AX - XORQ R9, R10 - MOVQ 184(DI), R14 - ROLQ $0x1b, R10 - XORQ R10, AX - MOVQ AX, 120(SP) - XORQ AX, SI - XORQ DX, R13 - ROLQ $0x0f, R13 - MOVQ R12, AX - ORQ R13, AX - XORQ R11, AX - MOVQ AX, 128(SP) - XORQ AX, BP - XORQ R8, R14 - ROLQ $0x38, R14 - NOTQ R13 - MOVQ R13, AX - ORQ R14, AX - XORQ R12, AX - MOVQ AX, 136(SP) - ORQ R10, R11 - XORQ R14, R11 - MOVQ R11, 152(SP) - ANDQ R10, R14 - XORQ R13, R14 - MOVQ R14, 144(SP) - XORQ R11, R15 - - // Result s - MOVQ 16(DI), R10 - MOVQ 64(DI), R11 - MOVQ 112(DI), R12 - XORQ DX, R10 - MOVQ 120(DI), R13 - ROLQ $0x3e, R10 - XORQ R8, R11 - MOVQ 168(DI), R14 - ROLQ $0x37, R11 - XORQ R9, R12 - MOVQ R10, R9 - XORQ CX, R14 - ROLQ $0x02, R14 - ANDQ R11, R9 - XORQ R14, R9 - MOVQ R9, 192(SP) - ROLQ $0x27, R12 - XORQ R9, R15 - NOTQ R11 - XORQ BX, R13 - MOVQ R11, BX - ANDQ R12, BX - XORQ R10, BX - MOVQ BX, 160(SP) - XORQ BX, SI - ROLQ $0x29, R13 - MOVQ R12, CX - ORQ R13, CX - XORQ R11, CX - MOVQ CX, 168(SP) - XORQ CX, BP - MOVQ R13, DX - MOVQ R14, R8 - ANDQ R14, DX - ORQ R10, R8 - XORQ R12, DX - XORQ R13, R8 - MOVQ DX, 176(SP) - MOVQ R8, 184(SP) - - // Prepare round - MOVQ BP, BX - ROLQ $0x01, BX - MOVQ 16(SP), R12 - XORQ 56(SP), DX - XORQ R15, BX - XORQ 96(SP), R12 - XORQ 136(SP), DX - XORQ DX, R12 - MOVQ R12, CX - ROLQ $0x01, CX - MOVQ 24(SP), R13 - XORQ 64(SP), R8 - XORQ SI, CX - XORQ 104(SP), R13 - XORQ 144(SP), R8 - XORQ R8, R13 - MOVQ R13, DX - ROLQ $0x01, DX - MOVQ R15, R8 - XORQ BP, DX - ROLQ $0x01, R8 - MOVQ SI, R9 - XORQ R12, R8 - ROLQ $0x01, R9 - - // Result b - MOVQ (SP), R10 - MOVQ 48(SP), R11 - XORQ R13, R9 - MOVQ 96(SP), R12 - MOVQ 144(SP), R13 - MOVQ 192(SP), R14 - XORQ CX, R11 - ROLQ $0x2c, R11 - XORQ DX, R12 - XORQ BX, R10 - ROLQ $0x2b, R12 - MOVQ R11, SI - MOVQ $0x8000000000008080, AX - ORQ R12, SI - XORQ R10, AX - XORQ AX, SI - MOVQ SI, (DI) - XORQ R9, R14 - ROLQ $0x0e, R14 - MOVQ R10, R15 - ANDQ R11, R15 - XORQ R14, R15 - MOVQ R15, 32(DI) - XORQ R8, R13 - ROLQ $0x15, R13 - MOVQ R13, AX - ANDQ R14, AX - XORQ R12, AX - MOVQ AX, 16(DI) - NOTQ R12 - ORQ R10, R14 - ORQ R13, R12 - XORQ R13, R14 - XORQ R11, R12 - MOVQ R14, 24(DI) - MOVQ R12, 8(DI) - MOVQ R12, BP - - // Result g - MOVQ 72(SP), R11 - XORQ R9, R11 - MOVQ 80(SP), R12 - ROLQ $0x14, R11 - XORQ BX, R12 - ROLQ $0x03, R12 - MOVQ 24(SP), R10 - MOVQ R11, AX - ORQ R12, AX - XORQ R8, R10 - MOVQ 128(SP), R13 - MOVQ 176(SP), R14 - ROLQ $0x1c, R10 - XORQ R10, AX - MOVQ AX, 40(DI) - XORQ AX, SI - XORQ CX, R13 - ROLQ $0x2d, R13 - MOVQ R12, AX - ANDQ R13, AX - XORQ R11, AX - MOVQ AX, 48(DI) - XORQ AX, BP - XORQ DX, R14 - ROLQ $0x3d, R14 - MOVQ R14, AX - ORQ R10, AX - XORQ R13, AX - MOVQ AX, 64(DI) - ANDQ R11, R10 - XORQ R14, R10 - MOVQ R10, 72(DI) - NOTQ R14 - XORQ R10, R15 - ORQ R14, R13 - XORQ R12, R13 - MOVQ R13, 56(DI) - - // Result k - MOVQ 8(SP), R10 - MOVQ 56(SP), R11 - MOVQ 104(SP), R12 - MOVQ 152(SP), R13 - MOVQ 160(SP), R14 - XORQ DX, R11 - ROLQ $0x06, R11 - XORQ R8, R12 - ROLQ $0x19, R12 - MOVQ R11, AX - ORQ R12, AX - XORQ CX, R10 - ROLQ $0x01, R10 - XORQ R10, AX - MOVQ AX, 80(DI) - XORQ AX, SI - XORQ R9, R13 - ROLQ $0x08, R13 - MOVQ R12, AX - ANDQ R13, AX - XORQ R11, AX - MOVQ AX, 88(DI) - XORQ AX, BP - XORQ BX, R14 - ROLQ $0x12, R14 - NOTQ R13 - MOVQ R13, AX - ANDQ R14, AX - XORQ R12, AX - MOVQ AX, 96(DI) - MOVQ R14, AX - ORQ R10, AX - XORQ R13, AX - MOVQ AX, 104(DI) - ANDQ R11, R10 - XORQ R14, R10 - MOVQ R10, 112(DI) - XORQ R10, R15 - - // Result m - MOVQ 40(SP), R11 - XORQ BX, R11 - MOVQ 88(SP), R12 - ROLQ $0x24, R11 - XORQ CX, R12 - MOVQ 32(SP), R10 - ROLQ $0x0a, R12 - MOVQ R11, AX - MOVQ 136(SP), R13 - ANDQ R12, AX - XORQ R9, R10 - MOVQ 184(SP), R14 - ROLQ $0x1b, R10 - XORQ R10, AX - MOVQ AX, 120(DI) - XORQ AX, SI - XORQ DX, R13 - ROLQ $0x0f, R13 - MOVQ R12, AX - ORQ R13, AX - XORQ R11, AX - MOVQ AX, 128(DI) - XORQ AX, BP - XORQ R8, R14 - ROLQ $0x38, R14 - NOTQ R13 - MOVQ R13, AX - ORQ R14, AX - XORQ R12, AX - MOVQ AX, 136(DI) - ORQ R10, R11 - XORQ R14, R11 - MOVQ R11, 152(DI) - ANDQ R10, R14 - XORQ R13, R14 - MOVQ R14, 144(DI) - XORQ R11, R15 - - // Result s - MOVQ 16(SP), R10 - MOVQ 64(SP), R11 - MOVQ 112(SP), R12 - XORQ DX, R10 - MOVQ 120(SP), R13 - ROLQ $0x3e, R10 - XORQ R8, R11 - MOVQ 168(SP), R14 - ROLQ $0x37, R11 - XORQ R9, R12 - MOVQ R10, R9 - XORQ CX, R14 - ROLQ $0x02, R14 - ANDQ R11, R9 - XORQ R14, R9 - MOVQ R9, 192(DI) - ROLQ $0x27, R12 - XORQ R9, R15 - NOTQ R11 - XORQ BX, R13 - MOVQ R11, BX - ANDQ R12, BX - XORQ R10, BX - MOVQ BX, 160(DI) - XORQ BX, SI - ROLQ $0x29, R13 - MOVQ R12, CX - ORQ R13, CX - XORQ R11, CX - MOVQ CX, 168(DI) - XORQ CX, BP - MOVQ R13, DX - MOVQ R14, R8 - ANDQ R14, DX - ORQ R10, R8 - XORQ R12, DX - XORQ R13, R8 - MOVQ DX, 176(DI) - MOVQ R8, 184(DI) - - // Prepare round - MOVQ BP, BX - ROLQ $0x01, BX - MOVQ 16(DI), R12 - XORQ 56(DI), DX - XORQ R15, BX - XORQ 96(DI), R12 - XORQ 136(DI), DX - XORQ DX, R12 - MOVQ R12, CX - ROLQ $0x01, CX - MOVQ 24(DI), R13 - XORQ 64(DI), R8 - XORQ SI, CX - XORQ 104(DI), R13 - XORQ 144(DI), R8 - XORQ R8, R13 - MOVQ R13, DX - ROLQ $0x01, DX - MOVQ R15, R8 - XORQ BP, DX - ROLQ $0x01, R8 - MOVQ SI, R9 - XORQ R12, R8 - ROLQ $0x01, R9 - - // Result b - MOVQ (DI), R10 - MOVQ 48(DI), R11 - XORQ R13, R9 - MOVQ 96(DI), R12 - MOVQ 144(DI), R13 - MOVQ 192(DI), R14 - XORQ CX, R11 - ROLQ $0x2c, R11 - XORQ DX, R12 - XORQ BX, R10 - ROLQ $0x2b, R12 - MOVQ R11, SI - MOVQ $0x0000000080000001, AX - ORQ R12, SI - XORQ R10, AX - XORQ AX, SI - MOVQ SI, (SP) - XORQ R9, R14 - ROLQ $0x0e, R14 - MOVQ R10, R15 - ANDQ R11, R15 - XORQ R14, R15 - MOVQ R15, 32(SP) - XORQ R8, R13 - ROLQ $0x15, R13 - MOVQ R13, AX - ANDQ R14, AX - XORQ R12, AX - MOVQ AX, 16(SP) - NOTQ R12 - ORQ R10, R14 - ORQ R13, R12 - XORQ R13, R14 - XORQ R11, R12 - MOVQ R14, 24(SP) - MOVQ R12, 8(SP) - MOVQ R12, BP - - // Result g - MOVQ 72(DI), R11 - XORQ R9, R11 - MOVQ 80(DI), R12 - ROLQ $0x14, R11 - XORQ BX, R12 - ROLQ $0x03, R12 - MOVQ 24(DI), R10 - MOVQ R11, AX - ORQ R12, AX - XORQ R8, R10 - MOVQ 128(DI), R13 - MOVQ 176(DI), R14 - ROLQ $0x1c, R10 - XORQ R10, AX - MOVQ AX, 40(SP) - XORQ AX, SI - XORQ CX, R13 - ROLQ $0x2d, R13 - MOVQ R12, AX - ANDQ R13, AX - XORQ R11, AX - MOVQ AX, 48(SP) - XORQ AX, BP - XORQ DX, R14 - ROLQ $0x3d, R14 - MOVQ R14, AX - ORQ R10, AX - XORQ R13, AX - MOVQ AX, 64(SP) - ANDQ R11, R10 - XORQ R14, R10 - MOVQ R10, 72(SP) - NOTQ R14 - XORQ R10, R15 - ORQ R14, R13 - XORQ R12, R13 - MOVQ R13, 56(SP) - - // Result k - MOVQ 8(DI), R10 - MOVQ 56(DI), R11 - MOVQ 104(DI), R12 - MOVQ 152(DI), R13 - MOVQ 160(DI), R14 - XORQ DX, R11 - ROLQ $0x06, R11 - XORQ R8, R12 - ROLQ $0x19, R12 - MOVQ R11, AX - ORQ R12, AX - XORQ CX, R10 - ROLQ $0x01, R10 - XORQ R10, AX - MOVQ AX, 80(SP) - XORQ AX, SI - XORQ R9, R13 - ROLQ $0x08, R13 - MOVQ R12, AX - ANDQ R13, AX - XORQ R11, AX - MOVQ AX, 88(SP) - XORQ AX, BP - XORQ BX, R14 - ROLQ $0x12, R14 - NOTQ R13 - MOVQ R13, AX - ANDQ R14, AX - XORQ R12, AX - MOVQ AX, 96(SP) - MOVQ R14, AX - ORQ R10, AX - XORQ R13, AX - MOVQ AX, 104(SP) - ANDQ R11, R10 - XORQ R14, R10 - MOVQ R10, 112(SP) - XORQ R10, R15 - - // Result m - MOVQ 40(DI), R11 - XORQ BX, R11 - MOVQ 88(DI), R12 - ROLQ $0x24, R11 - XORQ CX, R12 - MOVQ 32(DI), R10 - ROLQ $0x0a, R12 - MOVQ R11, AX - MOVQ 136(DI), R13 - ANDQ R12, AX - XORQ R9, R10 - MOVQ 184(DI), R14 - ROLQ $0x1b, R10 - XORQ R10, AX - MOVQ AX, 120(SP) - XORQ AX, SI - XORQ DX, R13 - ROLQ $0x0f, R13 - MOVQ R12, AX - ORQ R13, AX - XORQ R11, AX - MOVQ AX, 128(SP) - XORQ AX, BP - XORQ R8, R14 - ROLQ $0x38, R14 - NOTQ R13 - MOVQ R13, AX - ORQ R14, AX - XORQ R12, AX - MOVQ AX, 136(SP) - ORQ R10, R11 - XORQ R14, R11 - MOVQ R11, 152(SP) - ANDQ R10, R14 - XORQ R13, R14 - MOVQ R14, 144(SP) - XORQ R11, R15 - - // Result s - MOVQ 16(DI), R10 - MOVQ 64(DI), R11 - MOVQ 112(DI), R12 - XORQ DX, R10 - MOVQ 120(DI), R13 - ROLQ $0x3e, R10 - XORQ R8, R11 - MOVQ 168(DI), R14 - ROLQ $0x37, R11 - XORQ R9, R12 - MOVQ R10, R9 - XORQ CX, R14 - ROLQ $0x02, R14 - ANDQ R11, R9 - XORQ R14, R9 - MOVQ R9, 192(SP) - ROLQ $0x27, R12 - XORQ R9, R15 - NOTQ R11 - XORQ BX, R13 - MOVQ R11, BX - ANDQ R12, BX - XORQ R10, BX - MOVQ BX, 160(SP) - XORQ BX, SI - ROLQ $0x29, R13 - MOVQ R12, CX - ORQ R13, CX - XORQ R11, CX - MOVQ CX, 168(SP) - XORQ CX, BP - MOVQ R13, DX - MOVQ R14, R8 - ANDQ R14, DX - ORQ R10, R8 - XORQ R12, DX - XORQ R13, R8 - MOVQ DX, 176(SP) - MOVQ R8, 184(SP) - - // Prepare round - MOVQ BP, BX - ROLQ $0x01, BX - MOVQ 16(SP), R12 - XORQ 56(SP), DX - XORQ R15, BX - XORQ 96(SP), R12 - XORQ 136(SP), DX - XORQ DX, R12 - MOVQ R12, CX - ROLQ $0x01, CX - MOVQ 24(SP), R13 - XORQ 64(SP), R8 - XORQ SI, CX - XORQ 104(SP), R13 - XORQ 144(SP), R8 - XORQ R8, R13 - MOVQ R13, DX - ROLQ $0x01, DX - MOVQ R15, R8 - XORQ BP, DX - ROLQ $0x01, R8 - MOVQ SI, R9 - XORQ R12, R8 - ROLQ $0x01, R9 - - // Result b - MOVQ (SP), R10 - MOVQ 48(SP), R11 - XORQ R13, R9 - MOVQ 96(SP), R12 - MOVQ 144(SP), R13 - MOVQ 192(SP), R14 - XORQ CX, R11 - ROLQ $0x2c, R11 - XORQ DX, R12 - XORQ BX, R10 - ROLQ $0x2b, R12 - MOVQ R11, SI - MOVQ $0x8000000080008008, AX - ORQ R12, SI - XORQ R10, AX - XORQ AX, SI - MOVQ SI, (DI) - XORQ R9, R14 - ROLQ $0x0e, R14 - MOVQ R10, R15 - ANDQ R11, R15 - XORQ R14, R15 - MOVQ R15, 32(DI) - XORQ R8, R13 - ROLQ $0x15, R13 - MOVQ R13, AX - ANDQ R14, AX - XORQ R12, AX - MOVQ AX, 16(DI) - NOTQ R12 - ORQ R10, R14 - ORQ R13, R12 - XORQ R13, R14 - XORQ R11, R12 - MOVQ R14, 24(DI) - MOVQ R12, 8(DI) - NOP - - // Result g - MOVQ 72(SP), R11 - XORQ R9, R11 - MOVQ 80(SP), R12 - ROLQ $0x14, R11 - XORQ BX, R12 - ROLQ $0x03, R12 - MOVQ 24(SP), R10 - MOVQ R11, AX - ORQ R12, AX - XORQ R8, R10 - MOVQ 128(SP), R13 - MOVQ 176(SP), R14 - ROLQ $0x1c, R10 - XORQ R10, AX - MOVQ AX, 40(DI) - NOP - XORQ CX, R13 - ROLQ $0x2d, R13 - MOVQ R12, AX - ANDQ R13, AX - XORQ R11, AX - MOVQ AX, 48(DI) - NOP - XORQ DX, R14 - ROLQ $0x3d, R14 - MOVQ R14, AX - ORQ R10, AX - XORQ R13, AX - MOVQ AX, 64(DI) - ANDQ R11, R10 - XORQ R14, R10 - MOVQ R10, 72(DI) - NOTQ R14 - NOP - ORQ R14, R13 - XORQ R12, R13 - MOVQ R13, 56(DI) - - // Result k - MOVQ 8(SP), R10 - MOVQ 56(SP), R11 - MOVQ 104(SP), R12 - MOVQ 152(SP), R13 - MOVQ 160(SP), R14 - XORQ DX, R11 - ROLQ $0x06, R11 - XORQ R8, R12 - ROLQ $0x19, R12 - MOVQ R11, AX - ORQ R12, AX - XORQ CX, R10 - ROLQ $0x01, R10 - XORQ R10, AX - MOVQ AX, 80(DI) - NOP - XORQ R9, R13 - ROLQ $0x08, R13 - MOVQ R12, AX - ANDQ R13, AX - XORQ R11, AX - MOVQ AX, 88(DI) - NOP - XORQ BX, R14 - ROLQ $0x12, R14 - NOTQ R13 - MOVQ R13, AX - ANDQ R14, AX - XORQ R12, AX - MOVQ AX, 96(DI) - MOVQ R14, AX - ORQ R10, AX - XORQ R13, AX - MOVQ AX, 104(DI) - ANDQ R11, R10 - XORQ R14, R10 - MOVQ R10, 112(DI) - NOP - - // Result m - MOVQ 40(SP), R11 - XORQ BX, R11 - MOVQ 88(SP), R12 - ROLQ $0x24, R11 - XORQ CX, R12 - MOVQ 32(SP), R10 - ROLQ $0x0a, R12 - MOVQ R11, AX - MOVQ 136(SP), R13 - ANDQ R12, AX - XORQ R9, R10 - MOVQ 184(SP), R14 - ROLQ $0x1b, R10 - XORQ R10, AX - MOVQ AX, 120(DI) - NOP - XORQ DX, R13 - ROLQ $0x0f, R13 - MOVQ R12, AX - ORQ R13, AX - XORQ R11, AX - MOVQ AX, 128(DI) - NOP - XORQ R8, R14 - ROLQ $0x38, R14 - NOTQ R13 - MOVQ R13, AX - ORQ R14, AX - XORQ R12, AX - MOVQ AX, 136(DI) - ORQ R10, R11 - XORQ R14, R11 - MOVQ R11, 152(DI) - ANDQ R10, R14 - XORQ R13, R14 - MOVQ R14, 144(DI) - NOP - - // Result s - MOVQ 16(SP), R10 - MOVQ 64(SP), R11 - MOVQ 112(SP), R12 - XORQ DX, R10 - MOVQ 120(SP), R13 - ROLQ $0x3e, R10 - XORQ R8, R11 - MOVQ 168(SP), R14 - ROLQ $0x37, R11 - XORQ R9, R12 - MOVQ R10, R9 - XORQ CX, R14 - ROLQ $0x02, R14 - ANDQ R11, R9 - XORQ R14, R9 - MOVQ R9, 192(DI) - ROLQ $0x27, R12 - NOP - NOTQ R11 - XORQ BX, R13 - MOVQ R11, BX - ANDQ R12, BX - XORQ R10, BX - MOVQ BX, 160(DI) - NOP - ROLQ $0x29, R13 - MOVQ R12, CX - ORQ R13, CX - XORQ R11, CX - MOVQ CX, 168(DI) - NOP - MOVQ R13, DX - MOVQ R14, R8 - ANDQ R14, DX - ORQ R10, R8 - XORQ R12, DX - XORQ R13, R8 - MOVQ DX, 176(DI) - MOVQ R8, 184(DI) - - // Revert the internal state to the user state - NOTQ 8(DI) - NOTQ 16(DI) - NOTQ 64(DI) - NOTQ 96(DI) - NOTQ 136(DI) - NOTQ 160(DI) - RET diff --git a/src/vendor/golang.org/x/crypto/sha3/sha3.go b/src/vendor/golang.org/x/crypto/sha3/sha3.go deleted file mode 100644 index afedde5abf..0000000000 --- a/src/vendor/golang.org/x/crypto/sha3/sha3.go +++ /dev/null @@ -1,185 +0,0 @@ -// Copyright 2014 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -package sha3 - -// spongeDirection indicates the direction bytes are flowing through the sponge. -type spongeDirection int - -const ( - // spongeAbsorbing indicates that the sponge is absorbing input. - spongeAbsorbing spongeDirection = iota - // spongeSqueezing indicates that the sponge is being squeezed. - spongeSqueezing -) - -const ( - // maxRate is the maximum size of the internal buffer. SHAKE-256 - // currently needs the largest buffer. - maxRate = 168 -) - -type state struct { - // Generic sponge components. - a [25]uint64 // main state of the hash - rate int // the number of bytes of state to use - - // dsbyte contains the "domain separation" bits and the first bit of - // the padding. Sections 6.1 and 6.2 of [1] separate the outputs of the - // SHA-3 and SHAKE functions by appending bitstrings to the message. - // Using a little-endian bit-ordering convention, these are "01" for SHA-3 - // and "1111" for SHAKE, or 00000010b and 00001111b, respectively. Then the - // padding rule from section 5.1 is applied to pad the message to a multiple - // of the rate, which involves adding a "1" bit, zero or more "0" bits, and - // a final "1" bit. We merge the first "1" bit from the padding into dsbyte, - // giving 00000110b (0x06) and 00011111b (0x1f). - // [1] http://csrc.nist.gov/publications/drafts/fips-202/fips_202_draft.pdf - // "Draft FIPS 202: SHA-3 Standard: Permutation-Based Hash and - // Extendable-Output Functions (May 2014)" - dsbyte byte - - i, n int // storage[i:n] is the buffer, i is only used while squeezing - storage [maxRate]byte - - // Specific to SHA-3 and SHAKE. - outputLen int // the default output size in bytes - state spongeDirection // whether the sponge is absorbing or squeezing -} - -// BlockSize returns the rate of sponge underlying this hash function. -func (d *state) BlockSize() int { return d.rate } - -// Size returns the output size of the hash function in bytes. -func (d *state) Size() int { return d.outputLen } - -// Reset clears the internal state by zeroing the sponge state and -// the buffer indexes, and setting Sponge.state to absorbing. -func (d *state) Reset() { - // Zero the permutation's state. - for i := range d.a { - d.a[i] = 0 - } - d.state = spongeAbsorbing - d.i, d.n = 0, 0 -} - -func (d *state) clone() *state { - ret := *d - return &ret -} - -// permute applies the KeccakF-1600 permutation. It handles -// any input-output buffering. -func (d *state) permute() { - switch d.state { - case spongeAbsorbing: - // If we're absorbing, we need to xor the input into the state - // before applying the permutation. - xorIn(d, d.storage[:d.rate]) - d.n = 0 - keccakF1600(&d.a) - case spongeSqueezing: - // If we're squeezing, we need to apply the permutation before - // copying more output. - keccakF1600(&d.a) - d.i = 0 - copyOut(d, d.storage[:d.rate]) - } -} - -// pads appends the domain separation bits in dsbyte, applies -// the multi-bitrate 10..1 padding rule, and permutes the state. -func (d *state) padAndPermute() { - // Pad with this instance's domain-separator bits. We know that there's - // at least one byte of space in d.buf because, if it were full, - // permute would have been called to empty it. dsbyte also contains the - // first one bit for the padding. See the comment in the state struct. - d.storage[d.n] = d.dsbyte - d.n++ - for d.n < d.rate { - d.storage[d.n] = 0 - d.n++ - } - // This adds the final one bit for the padding. Because of the way that - // bits are numbered from the LSB upwards, the final bit is the MSB of - // the last byte. - d.storage[d.rate-1] ^= 0x80 - // Apply the permutation - d.permute() - d.state = spongeSqueezing - d.n = d.rate - copyOut(d, d.storage[:d.rate]) -} - -// Write absorbs more data into the hash's state. It panics if any -// output has already been read. -func (d *state) Write(p []byte) (written int, err error) { - if d.state != spongeAbsorbing { - panic("sha3: Write after Read") - } - written = len(p) - - for len(p) > 0 { - if d.n == 0 && len(p) >= d.rate { - // The fast path; absorb a full "rate" bytes of input and apply the permutation. - xorIn(d, p[:d.rate]) - p = p[d.rate:] - keccakF1600(&d.a) - } else { - // The slow path; buffer the input until we can fill the sponge, and then xor it in. - todo := d.rate - d.n - if todo > len(p) { - todo = len(p) - } - d.n += copy(d.storage[d.n:], p[:todo]) - p = p[todo:] - - // If the sponge is full, apply the permutation. - if d.n == d.rate { - d.permute() - } - } - } - - return -} - -// Read squeezes an arbitrary number of bytes from the sponge. -func (d *state) Read(out []byte) (n int, err error) { - // If we're still absorbing, pad and apply the permutation. - if d.state == spongeAbsorbing { - d.padAndPermute() - } - - n = len(out) - - // Now, do the squeezing. - for len(out) > 0 { - n := copy(out, d.storage[d.i:d.n]) - d.i += n - out = out[n:] - - // Apply the permutation if we've squeezed the sponge dry. - if d.i == d.rate { - d.permute() - } - } - - return -} - -// Sum applies padding to the hash state and then squeezes out the desired -// number of output bytes. It panics if any output has already been read. -func (d *state) Sum(in []byte) []byte { - if d.state != spongeAbsorbing { - panic("sha3: Sum after Read") - } - - // Make a copy of the original hash so that caller can keep writing - // and summing. - dup := d.clone() - hash := make([]byte, dup.outputLen, 64) // explicit cap to allow stack allocation - dup.Read(hash) - return append(in, hash...) -} diff --git a/src/vendor/golang.org/x/crypto/sha3/sha3_s390x.go b/src/vendor/golang.org/x/crypto/sha3/sha3_s390x.go deleted file mode 100644 index 00d8034ae6..0000000000 --- a/src/vendor/golang.org/x/crypto/sha3/sha3_s390x.go +++ /dev/null @@ -1,303 +0,0 @@ -// Copyright 2017 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -//go:build gc && !purego - -package sha3 - -// This file contains code for using the 'compute intermediate -// message digest' (KIMD) and 'compute last message digest' (KLMD) -// instructions to compute SHA-3 and SHAKE hashes on IBM Z. - -import ( - "hash" - - "golang.org/x/sys/cpu" -) - -// codes represent 7-bit KIMD/KLMD function codes as defined in -// the Principles of Operation. -type code uint64 - -const ( - // function codes for KIMD/KLMD - sha3_224 code = 32 - sha3_256 = 33 - sha3_384 = 34 - sha3_512 = 35 - shake_128 = 36 - shake_256 = 37 - nopad = 0x100 -) - -// kimd is a wrapper for the 'compute intermediate message digest' instruction. -// src must be a multiple of the rate for the given function code. -// -//go:noescape -func kimd(function code, chain *[200]byte, src []byte) - -// klmd is a wrapper for the 'compute last message digest' instruction. -// src padding is handled by the instruction. -// -//go:noescape -func klmd(function code, chain *[200]byte, dst, src []byte) - -type asmState struct { - a [200]byte // 1600 bit state - buf []byte // care must be taken to ensure cap(buf) is a multiple of rate - rate int // equivalent to block size - storage [3072]byte // underlying storage for buf - outputLen int // output length for full security - function code // KIMD/KLMD function code - state spongeDirection // whether the sponge is absorbing or squeezing -} - -func newAsmState(function code) *asmState { - var s asmState - s.function = function - switch function { - case sha3_224: - s.rate = 144 - s.outputLen = 28 - case sha3_256: - s.rate = 136 - s.outputLen = 32 - case sha3_384: - s.rate = 104 - s.outputLen = 48 - case sha3_512: - s.rate = 72 - s.outputLen = 64 - case shake_128: - s.rate = 168 - s.outputLen = 32 - case shake_256: - s.rate = 136 - s.outputLen = 64 - default: - panic("sha3: unrecognized function code") - } - - // limit s.buf size to a multiple of s.rate - s.resetBuf() - return &s -} - -func (s *asmState) clone() *asmState { - c := *s - c.buf = c.storage[:len(s.buf):cap(s.buf)] - return &c -} - -// copyIntoBuf copies b into buf. It will panic if there is not enough space to -// store all of b. -func (s *asmState) copyIntoBuf(b []byte) { - bufLen := len(s.buf) - s.buf = s.buf[:len(s.buf)+len(b)] - copy(s.buf[bufLen:], b) -} - -// resetBuf points buf at storage, sets the length to 0 and sets cap to be a -// multiple of the rate. -func (s *asmState) resetBuf() { - max := (cap(s.storage) / s.rate) * s.rate - s.buf = s.storage[:0:max] -} - -// Write (via the embedded io.Writer interface) adds more data to the running hash. -// It never returns an error. -func (s *asmState) Write(b []byte) (int, error) { - if s.state != spongeAbsorbing { - panic("sha3: Write after Read") - } - length := len(b) - for len(b) > 0 { - if len(s.buf) == 0 && len(b) >= cap(s.buf) { - // Hash the data directly and push any remaining bytes - // into the buffer. - remainder := len(b) % s.rate - kimd(s.function, &s.a, b[:len(b)-remainder]) - if remainder != 0 { - s.copyIntoBuf(b[len(b)-remainder:]) - } - return length, nil - } - - if len(s.buf) == cap(s.buf) { - // flush the buffer - kimd(s.function, &s.a, s.buf) - s.buf = s.buf[:0] - } - - // copy as much as we can into the buffer - n := len(b) - if len(b) > cap(s.buf)-len(s.buf) { - n = cap(s.buf) - len(s.buf) - } - s.copyIntoBuf(b[:n]) - b = b[n:] - } - return length, nil -} - -// Read squeezes an arbitrary number of bytes from the sponge. -func (s *asmState) Read(out []byte) (n int, err error) { - // The 'compute last message digest' instruction only stores the digest - // at the first operand (dst) for SHAKE functions. - if s.function != shake_128 && s.function != shake_256 { - panic("sha3: can only call Read for SHAKE functions") - } - - n = len(out) - - // need to pad if we were absorbing - if s.state == spongeAbsorbing { - s.state = spongeSqueezing - - // write hash directly into out if possible - if len(out)%s.rate == 0 { - klmd(s.function, &s.a, out, s.buf) // len(out) may be 0 - s.buf = s.buf[:0] - return - } - - // write hash into buffer - max := cap(s.buf) - if max > len(out) { - max = (len(out)/s.rate)*s.rate + s.rate - } - klmd(s.function, &s.a, s.buf[:max], s.buf) - s.buf = s.buf[:max] - } - - for len(out) > 0 { - // flush the buffer - if len(s.buf) != 0 { - c := copy(out, s.buf) - out = out[c:] - s.buf = s.buf[c:] - continue - } - - // write hash directly into out if possible - if len(out)%s.rate == 0 { - klmd(s.function|nopad, &s.a, out, nil) - return - } - - // write hash into buffer - s.resetBuf() - if cap(s.buf) > len(out) { - s.buf = s.buf[:(len(out)/s.rate)*s.rate+s.rate] - } - klmd(s.function|nopad, &s.a, s.buf, nil) - } - return -} - -// Sum appends the current hash to b and returns the resulting slice. -// It does not change the underlying hash state. -func (s *asmState) Sum(b []byte) []byte { - if s.state != spongeAbsorbing { - panic("sha3: Sum after Read") - } - - // Copy the state to preserve the original. - a := s.a - - // Hash the buffer. Note that we don't clear it because we - // aren't updating the state. - switch s.function { - case sha3_224, sha3_256, sha3_384, sha3_512: - klmd(s.function, &a, nil, s.buf) - return append(b, a[:s.outputLen]...) - case shake_128, shake_256: - d := make([]byte, s.outputLen, 64) - klmd(s.function, &a, d, s.buf) - return append(b, d[:s.outputLen]...) - default: - panic("sha3: unknown function") - } -} - -// Reset resets the Hash to its initial state. -func (s *asmState) Reset() { - for i := range s.a { - s.a[i] = 0 - } - s.resetBuf() - s.state = spongeAbsorbing -} - -// Size returns the number of bytes Sum will return. -func (s *asmState) Size() int { - return s.outputLen -} - -// BlockSize returns the hash's underlying block size. -// The Write method must be able to accept any amount -// of data, but it may operate more efficiently if all writes -// are a multiple of the block size. -func (s *asmState) BlockSize() int { - return s.rate -} - -// Clone returns a copy of the ShakeHash in its current state. -func (s *asmState) Clone() ShakeHash { - return s.clone() -} - -// new224 returns an assembly implementation of SHA3-224 if available, -// otherwise it returns a generic implementation. -func new224() hash.Hash { - if cpu.S390X.HasSHA3 { - return newAsmState(sha3_224) - } - return new224Generic() -} - -// new256 returns an assembly implementation of SHA3-256 if available, -// otherwise it returns a generic implementation. -func new256() hash.Hash { - if cpu.S390X.HasSHA3 { - return newAsmState(sha3_256) - } - return new256Generic() -} - -// new384 returns an assembly implementation of SHA3-384 if available, -// otherwise it returns a generic implementation. -func new384() hash.Hash { - if cpu.S390X.HasSHA3 { - return newAsmState(sha3_384) - } - return new384Generic() -} - -// new512 returns an assembly implementation of SHA3-512 if available, -// otherwise it returns a generic implementation. -func new512() hash.Hash { - if cpu.S390X.HasSHA3 { - return newAsmState(sha3_512) - } - return new512Generic() -} - -// newShake128 returns an assembly implementation of SHAKE-128 if available, -// otherwise it returns a generic implementation. -func newShake128() ShakeHash { - if cpu.S390X.HasSHA3 { - return newAsmState(shake_128) - } - return newShake128Generic() -} - -// newShake256 returns an assembly implementation of SHAKE-256 if available, -// otherwise it returns a generic implementation. -func newShake256() ShakeHash { - if cpu.S390X.HasSHA3 { - return newAsmState(shake_256) - } - return newShake256Generic() -} diff --git a/src/vendor/golang.org/x/crypto/sha3/sha3_s390x.s b/src/vendor/golang.org/x/crypto/sha3/sha3_s390x.s deleted file mode 100644 index 826b862c77..0000000000 --- a/src/vendor/golang.org/x/crypto/sha3/sha3_s390x.s +++ /dev/null @@ -1,33 +0,0 @@ -// Copyright 2017 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -//go:build gc && !purego - -#include "textflag.h" - -// func kimd(function code, chain *[200]byte, src []byte) -TEXT ·kimd(SB), NOFRAME|NOSPLIT, $0-40 - MOVD function+0(FP), R0 - MOVD chain+8(FP), R1 - LMG src+16(FP), R2, R3 // R2=base, R3=len - -continue: - WORD $0xB93E0002 // KIMD --, R2 - BVS continue // continue if interrupted - MOVD $0, R0 // reset R0 for pre-go1.8 compilers - RET - -// func klmd(function code, chain *[200]byte, dst, src []byte) -TEXT ·klmd(SB), NOFRAME|NOSPLIT, $0-64 - // TODO: SHAKE support - MOVD function+0(FP), R0 - MOVD chain+8(FP), R1 - LMG dst+16(FP), R2, R3 // R2=base, R3=len - LMG src+40(FP), R4, R5 // R4=base, R5=len - -continue: - WORD $0xB93F0024 // KLMD R2, R4 - BVS continue // continue if interrupted - MOVD $0, R0 // reset R0 for pre-go1.8 compilers - RET diff --git a/src/vendor/golang.org/x/crypto/sha3/shake.go b/src/vendor/golang.org/x/crypto/sha3/shake.go deleted file mode 100644 index 1ea9275b8b..0000000000 --- a/src/vendor/golang.org/x/crypto/sha3/shake.go +++ /dev/null @@ -1,174 +0,0 @@ -// Copyright 2014 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -package sha3 - -// This file defines the ShakeHash interface, and provides -// functions for creating SHAKE and cSHAKE instances, as well as utility -// functions for hashing bytes to arbitrary-length output. -// -// -// SHAKE implementation is based on FIPS PUB 202 [1] -// cSHAKE implementations is based on NIST SP 800-185 [2] -// -// [1] https://nvlpubs.nist.gov/nistpubs/FIPS/NIST.FIPS.202.pdf -// [2] https://doi.org/10.6028/NIST.SP.800-185 - -import ( - "encoding/binary" - "hash" - "io" -) - -// ShakeHash defines the interface to hash functions that support -// arbitrary-length output. When used as a plain [hash.Hash], it -// produces minimum-length outputs that provide full-strength generic -// security. -type ShakeHash interface { - hash.Hash - - // Read reads more output from the hash; reading affects the hash's - // state. (ShakeHash.Read is thus very different from Hash.Sum) - // It never returns an error, but subsequent calls to Write or Sum - // will panic. - io.Reader - - // Clone returns a copy of the ShakeHash in its current state. - Clone() ShakeHash -} - -// cSHAKE specific context -type cshakeState struct { - *state // SHA-3 state context and Read/Write operations - - // initBlock is the cSHAKE specific initialization set of bytes. It is initialized - // by newCShake function and stores concatenation of N followed by S, encoded - // by the method specified in 3.3 of [1]. - // It is stored here in order for Reset() to be able to put context into - // initial state. - initBlock []byte -} - -// Consts for configuring initial SHA-3 state -const ( - dsbyteShake = 0x1f - dsbyteCShake = 0x04 - rate128 = 168 - rate256 = 136 -) - -func bytepad(input []byte, w int) []byte { - // leftEncode always returns max 9 bytes - buf := make([]byte, 0, 9+len(input)+w) - buf = append(buf, leftEncode(uint64(w))...) - buf = append(buf, input...) - padlen := w - (len(buf) % w) - return append(buf, make([]byte, padlen)...) -} - -func leftEncode(value uint64) []byte { - var b [9]byte - binary.BigEndian.PutUint64(b[1:], value) - // Trim all but last leading zero bytes - i := byte(1) - for i < 8 && b[i] == 0 { - i++ - } - // Prepend number of encoded bytes - b[i-1] = 9 - i - return b[i-1:] -} - -func newCShake(N, S []byte, rate, outputLen int, dsbyte byte) ShakeHash { - c := cshakeState{state: &state{rate: rate, outputLen: outputLen, dsbyte: dsbyte}} - - // leftEncode returns max 9 bytes - c.initBlock = make([]byte, 0, 9*2+len(N)+len(S)) - c.initBlock = append(c.initBlock, leftEncode(uint64(len(N)*8))...) - c.initBlock = append(c.initBlock, N...) - c.initBlock = append(c.initBlock, leftEncode(uint64(len(S)*8))...) - c.initBlock = append(c.initBlock, S...) - c.Write(bytepad(c.initBlock, c.rate)) - return &c -} - -// Reset resets the hash to initial state. -func (c *cshakeState) Reset() { - c.state.Reset() - c.Write(bytepad(c.initBlock, c.rate)) -} - -// Clone returns copy of a cSHAKE context within its current state. -func (c *cshakeState) Clone() ShakeHash { - b := make([]byte, len(c.initBlock)) - copy(b, c.initBlock) - return &cshakeState{state: c.clone(), initBlock: b} -} - -// Clone returns copy of SHAKE context within its current state. -func (c *state) Clone() ShakeHash { - return c.clone() -} - -// NewShake128 creates a new SHAKE128 variable-output-length ShakeHash. -// Its generic security strength is 128 bits against all attacks if at -// least 32 bytes of its output are used. -func NewShake128() ShakeHash { - return newShake128() -} - -// NewShake256 creates a new SHAKE256 variable-output-length ShakeHash. -// Its generic security strength is 256 bits against all attacks if -// at least 64 bytes of its output are used. -func NewShake256() ShakeHash { - return newShake256() -} - -func newShake128Generic() *state { - return &state{rate: rate128, outputLen: 32, dsbyte: dsbyteShake} -} - -func newShake256Generic() *state { - return &state{rate: rate256, outputLen: 64, dsbyte: dsbyteShake} -} - -// NewCShake128 creates a new instance of cSHAKE128 variable-output-length ShakeHash, -// a customizable variant of SHAKE128. -// N is used to define functions based on cSHAKE, it can be empty when plain cSHAKE is -// desired. S is a customization byte string used for domain separation - two cSHAKE -// computations on same input with different S yield unrelated outputs. -// When N and S are both empty, this is equivalent to NewShake128. -func NewCShake128(N, S []byte) ShakeHash { - if len(N) == 0 && len(S) == 0 { - return NewShake128() - } - return newCShake(N, S, rate128, 32, dsbyteCShake) -} - -// NewCShake256 creates a new instance of cSHAKE256 variable-output-length ShakeHash, -// a customizable variant of SHAKE256. -// N is used to define functions based on cSHAKE, it can be empty when plain cSHAKE is -// desired. S is a customization byte string used for domain separation - two cSHAKE -// computations on same input with different S yield unrelated outputs. -// When N and S are both empty, this is equivalent to NewShake256. -func NewCShake256(N, S []byte) ShakeHash { - if len(N) == 0 && len(S) == 0 { - return NewShake256() - } - return newCShake(N, S, rate256, 64, dsbyteCShake) -} - -// ShakeSum128 writes an arbitrary-length digest of data into hash. -func ShakeSum128(hash, data []byte) { - h := NewShake128() - h.Write(data) - h.Read(hash) -} - -// ShakeSum256 writes an arbitrary-length digest of data into hash. -func ShakeSum256(hash, data []byte) { - h := NewShake256() - h.Write(data) - h.Read(hash) -} diff --git a/src/vendor/golang.org/x/crypto/sha3/shake_noasm.go b/src/vendor/golang.org/x/crypto/sha3/shake_noasm.go deleted file mode 100644 index 4276ba4ab2..0000000000 --- a/src/vendor/golang.org/x/crypto/sha3/shake_noasm.go +++ /dev/null @@ -1,15 +0,0 @@ -// Copyright 2023 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -//go:build !gc || purego || !s390x - -package sha3 - -func newShake128() *state { - return newShake128Generic() -} - -func newShake256() *state { - return newShake256Generic() -} diff --git a/src/vendor/golang.org/x/crypto/sha3/xor.go b/src/vendor/golang.org/x/crypto/sha3/xor.go deleted file mode 100644 index 6ada5c9574..0000000000 --- a/src/vendor/golang.org/x/crypto/sha3/xor.go +++ /dev/null @@ -1,40 +0,0 @@ -// Copyright 2015 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -package sha3 - -import ( - "crypto/subtle" - "encoding/binary" - "unsafe" - - "golang.org/x/sys/cpu" -) - -// xorIn xors the bytes in buf into the state. -func xorIn(d *state, buf []byte) { - if cpu.IsBigEndian { - for i := 0; len(buf) >= 8; i++ { - a := binary.LittleEndian.Uint64(buf) - d.a[i] ^= a - buf = buf[8:] - } - } else { - ab := (*[25 * 64 / 8]byte)(unsafe.Pointer(&d.a)) - subtle.XORBytes(ab[:], ab[:], buf) - } -} - -// copyOut copies uint64s to a byte buffer. -func copyOut(d *state, b []byte) { - if cpu.IsBigEndian { - for i := 0; len(b) >= 8; i++ { - binary.LittleEndian.PutUint64(b, d.a[i]) - b = b[8:] - } - } else { - ab := (*[25 * 64 / 8]byte)(unsafe.Pointer(&d.a)) - copy(b, ab[:]) - } -} diff --git a/src/vendor/modules.txt b/src/vendor/modules.txt index 2e10edb2b0..2ce5d0cf0d 100644 --- a/src/vendor/modules.txt +++ b/src/vendor/modules.txt @@ -7,7 +7,6 @@ golang.org/x/crypto/cryptobyte/asn1 golang.org/x/crypto/hkdf golang.org/x/crypto/internal/alias golang.org/x/crypto/internal/poly1305 -golang.org/x/crypto/sha3 # golang.org/x/net v0.27.1-0.20240722181819-765c7e89b3bd ## explicit; go 1.18 golang.org/x/net/dns/dnsmessage -- cgit v1.3