diff options
| author | Junyang Shao <shaojunyang@google.com> | 2025-10-16 16:07:32 +0000 |
|---|---|---|
| committer | Junyang Shao <shaojunyang@google.com> | 2025-10-24 10:53:28 -0700 |
| commit | cf7c1a4cbb917b6c5d80d1d9443a40cb7720db75 (patch) | |
| tree | a1e74fb930fae0c22ad03c377c5bc4f4585ad6e0 /src/simd | |
| parent | 2b8eded4f4fd3d421d1fb9af68c774142abcf208 (diff) | |
| download | go-cf7c1a4cbb917b6c5d80d1d9443a40cb7720db75.tar.xz | |
[dev.simd] cmd/compile, simd: add SHA features
This CL also fixed some bugs left in CL 712181.
Change-Id: I9cb6cd9fbaef307f352809bf21b8fec3eb62721a
Reviewed-on: https://go-review.googlesource.com/c/go/+/712361
Reviewed-by: David Chase <drchase@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Diffstat (limited to 'src/simd')
| -rw-r--r-- | src/simd/_gen/simdgen/gen_simdIntrinsics.go | 2 | ||||
| -rw-r--r-- | src/simd/_gen/simdgen/gen_simdMachineOps.go | 4 | ||||
| -rw-r--r-- | src/simd/_gen/simdgen/gen_simdTypes.go | 9 | ||||
| -rw-r--r-- | src/simd/_gen/simdgen/gen_simdssa.go | 3 | ||||
| -rw-r--r-- | src/simd/_gen/simdgen/gen_utility.go | 13 | ||||
| -rw-r--r-- | src/simd/_gen/simdgen/godefs.go | 2 | ||||
| -rw-r--r-- | src/simd/_gen/simdgen/main.go | 21 | ||||
| -rw-r--r-- | src/simd/_gen/simdgen/ops/Others/categories.yaml | 61 | ||||
| -rw-r--r-- | src/simd/_gen/simdgen/ops/Others/go.yaml | 43 | ||||
| -rw-r--r-- | src/simd/_gen/simdgen/xed.go | 3 | ||||
| -rw-r--r-- | src/simd/cpu.go | 8 | ||||
| -rw-r--r-- | src/simd/ops_amd64.go | 150 |
12 files changed, 300 insertions, 19 deletions
diff --git a/src/simd/_gen/simdgen/gen_simdIntrinsics.go b/src/simd/_gen/simdgen/gen_simdIntrinsics.go index a59bd9d658..8827ce07c1 100644 --- a/src/simd/_gen/simdgen/gen_simdIntrinsics.go +++ b/src/simd/_gen/simdgen/gen_simdIntrinsics.go @@ -58,6 +58,8 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies . {{end}} {{define "op2Imm8_II"}} addF(simdPackage, "{{(index .In 1).Go}}.{{.Go}}", opLen2Imm8_II(ssa.Op{{.GenericName}}, {{.SSAType}}, {{(index .In 0).ImmOffset}}), sys.AMD64) {{end}} +{{define "op2Imm8_SHA1RNDS4"}} addF(simdPackage, "{{(index .In 1).Go}}.{{.Go}}", opLen2Imm8_SHA1RNDS4(ssa.Op{{.GenericName}}, {{.SSAType}}, {{(index .In 0).ImmOffset}}), sys.AMD64) +{{end}} {{define "op3Imm8"}} addF(simdPackage, "{{(index .In 1).Go}}.{{.Go}}", opLen3Imm8(ssa.Op{{.GenericName}}, {{.SSAType}}, {{(index .In 0).ImmOffset}}), sys.AMD64) {{end}} {{define "op3Imm8_2I"}} addF(simdPackage, "{{(index .In 1).Go}}.{{.Go}}", opLen3Imm8_2I(ssa.Op{{.GenericName}}, {{.SSAType}}, {{(index .In 0).ImmOffset}}), sys.AMD64) diff --git a/src/simd/_gen/simdgen/gen_simdMachineOps.go b/src/simd/_gen/simdgen/gen_simdMachineOps.go index e65b36e95d..b1286ad604 100644 --- a/src/simd/_gen/simdgen/gen_simdMachineOps.go +++ b/src/simd/_gen/simdgen/gen_simdMachineOps.go @@ -16,7 +16,7 @@ const simdMachineOpsTmpl = ` package main func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vfpkv, w11, w21, w2k, wkw, w2kw, w2kk, w31, w3kw, wgpw, wgp, wfpw, wfpkw, - wkwload, v21load, v31load, v11load, w21load, w31load, w2kload, w2kwload, w11load, w3kwload, w2kkload regInfo) []opData { + wkwload, v21load, v31load, v11load, w21load, w31load, w2kload, w2kwload, w11load, w3kwload, w2kkload, v31x0AtIn2 regInfo) []opData { return []opData{ {{- range .OpsData }} {name: "{{.OpName}}", argLength: {{.OpInLen}}, reg: {{.RegInfo}}, asm: "{{.Asm}}", commutative: {{.Comm}}, typ: "{{.Type}}", resultInArg0: {{.ResultInArg0}}}, @@ -61,7 +61,7 @@ func writeSIMDMachineOps(ops []Operation) *bytes.Buffer { "v11": true, "v21": true, "v2k": true, "v2kv": true, "v2kk": true, "vkv": true, "v31": true, "v3kv": true, "vgpv": true, "vgp": true, "vfpv": true, "vfpkv": true, "w11": true, "w21": true, "w2k": true, "w2kw": true, "w2kk": true, "wkw": true, "w31": true, "w3kw": true, "wgpw": true, "wgp": true, "wfpw": true, "wfpkw": true, "wkwload": true, "v21load": true, "v31load": true, "v11load": true, "w21load": true, "w31load": true, "w2kload": true, "w2kwload": true, "w11load": true, - "w3kwload": true, "w2kkload": true} + "w3kwload": true, "w2kkload": true, "v31x0AtIn2": true} opsData := make([]opData, 0) opsDataImm := make([]opData, 0) opsDataLoad := make([]opData, 0) diff --git a/src/simd/_gen/simdgen/gen_simdTypes.go b/src/simd/_gen/simdgen/gen_simdTypes.go index 2d81231cda..a8998ec252 100644 --- a/src/simd/_gen/simdgen/gen_simdTypes.go +++ b/src/simd/_gen/simdgen/gen_simdTypes.go @@ -352,6 +352,15 @@ func ({{.Op1NameAndType "x"}}) {{.Go}}({{.Op2NameAndType "y"}}, {{.ImmName}} uin func ({{.Op1NameAndType "x"}}) {{.Go}}({{.ImmName}} uint8, {{.Op2NameAndType "y"}}) {{.GoType}} {{end}} +{{define "op2Imm8_SHA1RNDS4"}} +{{if .Documentation}}{{.Documentation}} +//{{end}} +// {{.ImmName}} results in better performance when it's a constant, a non-constant value will be translated into a jump table. +// +// Asm: {{.Asm}}, CPU Feature: {{.CPUFeature}} +func ({{.Op1NameAndType "x"}}) {{.Go}}({{.ImmName}} uint8, {{.Op2NameAndType "y"}}) {{.GoType}} +{{end}} + {{define "op3Imm8"}} {{if .Documentation}}{{.Documentation}} //{{end}} diff --git a/src/simd/_gen/simdgen/gen_simdssa.go b/src/simd/_gen/simdgen/gen_simdssa.go index c1ce584549..8402376210 100644 --- a/src/simd/_gen/simdgen/gen_simdssa.go +++ b/src/simd/_gen/simdgen/gen_simdssa.go @@ -96,6 +96,9 @@ func writeSIMDSSA(ops []Operation) *bytes.Buffer { "v2kvloadImm8", "v31ResultInArg0Imm8", "v31loadResultInArg0Imm8", + "v21ResultInArg0", + "v21ResultInArg0Imm8", + "v31x0AtIn2ResultInArg0", } regInfoSet := map[string][]string{} for _, key := range regInfoKeys { diff --git a/src/simd/_gen/simdgen/gen_utility.go b/src/simd/_gen/simdgen/gen_utility.go index 70f07cf7a4..2fb05026c0 100644 --- a/src/simd/_gen/simdgen/gen_utility.go +++ b/src/simd/_gen/simdgen/gen_utility.go @@ -236,9 +236,9 @@ func (op *Operation) shape() (shapeIn inShape, shapeOut outShape, maskType maskS // regShape returns a string representation of the register shape. func (op *Operation) regShape(mem memShape) (string, error) { _, _, _, _, gOp := op.shape() - var regInfo string + var regInfo, fixedName string var vRegInCnt, gRegInCnt, kMaskInCnt, vRegOutCnt, gRegOutCnt, kMaskOutCnt, memInCnt, memOutCnt int - for _, in := range gOp.In { + for i, in := range gOp.In { switch in.Class { case "vreg": vRegInCnt++ @@ -253,8 +253,11 @@ func (op *Operation) regShape(mem memShape) (string, error) { memInCnt++ vRegInCnt++ } + if in.FixedReg != nil { + fixedName = fmt.Sprintf("%sAtIn%d", *in.FixedReg, i) + } } - for _, out := range gOp.Out { + for i, out := range gOp.Out { // If class overwrite is happening, that's not really a mask but a vreg. if out.Class == "vreg" || out.OverwriteClass != nil { vRegOutCnt++ @@ -269,6 +272,9 @@ func (op *Operation) regShape(mem memShape) (string, error) { vRegOutCnt++ memOutCnt++ } + if out.FixedReg != nil { + fixedName = fmt.Sprintf("%sAtIn%d", *out.FixedReg, i) + } } var inRegs, inMasks, outRegs, outMasks string @@ -309,6 +315,7 @@ func (op *Operation) regShape(mem memShape) (string, error) { if memOutCnt > 0 { panic("simdgen does not understand memory as output as of now") } + regInfo += fixedName return regInfo, nil } diff --git a/src/simd/_gen/simdgen/godefs.go b/src/simd/_gen/simdgen/godefs.go index bda1dfc8fe..244f67fe9d 100644 --- a/src/simd/_gen/simdgen/godefs.go +++ b/src/simd/_gen/simdgen/godefs.go @@ -256,6 +256,8 @@ type Operand struct { // because Intel's XED data is inconsistent. e.g. AVX512 VPMADDUBSW marks its operand // elemBits 16, which should be 8. OverwriteElementBits *int + // FixedReg is the name of the fixed registers + FixedReg *string } // isDigit returns true if the byte is an ASCII digit. diff --git a/src/simd/_gen/simdgen/main.go b/src/simd/_gen/simdgen/main.go index 537dde0c66..ca75cff55d 100644 --- a/src/simd/_gen/simdgen/main.go +++ b/src/simd/_gen/simdgen/main.go @@ -92,8 +92,9 @@ import ( "slices" "strings" - "gopkg.in/yaml.v3" "simd/_gen/unify" + + "gopkg.in/yaml.v3" ) var ( @@ -199,6 +200,15 @@ func main() { log.Fatal(err) } + // Validate results. + // + // Don't validate if this is a command-line query because that tends to + // eliminate lots of required defs and is used in cases where maybe defs + // aren't enumerable anyway. + if *flagQ == "" && len(must) > 0 { + validate(unified, must) + } + // Print results. switch *flagO { case "yaml": @@ -228,15 +238,6 @@ func main() { fmt.Fprintf(os.Stderr, "XED decoding generated %d \"errors\" which is not cause for alarm, use -v for details.\n", operandRemarks) } } - - // Validate results. - // - // Don't validate if this is a command-line query because that tends to - // eliminate lots of required defs and is used in cases where maybe defs - // aren't enumerable anyway. - if *flagQ == "" && len(must) > 0 { - validate(unified, must) - } } func validate(cl unify.Closure, required map[*unify.Value]struct{}) { diff --git a/src/simd/_gen/simdgen/ops/Others/categories.yaml b/src/simd/_gen/simdgen/ops/Others/categories.yaml index dd922fb14b..3c8befb826 100644 --- a/src/simd/_gen/simdgen/ops/Others/categories.yaml +++ b/src/simd/_gen/simdgen/ops/Others/categories.yaml @@ -46,4 +46,63 @@ documentation: !string |- // NAME performs the InvMixColumns operation in AES cipher algorithm defined in FIPS 197. // x is the chunk of w array in use. - // result = InvMixColumns(x)
\ No newline at end of file + // result = InvMixColumns(x) +- go: SHA1Round4 + commutative: false + documentation: !string |- + // NAME performs 4 rounds of B loop in SHA1 algorithm defined in FIPS 180-4. + // x contains the state variables a, b, c and d from upper to lower order. + // y contains the W array elements (with the state variable e added to the upper element) from upper to lower order. + // result = the state variables a', b', c', d' updated after 4 rounds. + // constant = 0 for the first 20 rounds of the loop, 1 for the next 20 rounds of the loop..., 3 for the last 20 rounds of the loop. +- go: SHA1NextE + commutative: false + documentation: !string |- + // NAME calculates the state variable e' updated after 4 rounds in SHA1 algorithm defined in FIPS 180-4. + // x contains the state variable a (before the 4 rounds), placed in the upper element. + // y is the elements of W array for next 4 rounds from upper to lower order. + // result = the elements of the W array for the next 4 rounds, with the updated state variable e' added to the upper element, + // from upper to lower order. + // For the last round of the loop, you can specify zero for y to obtain the e' value itself, or better off specifying H4:0:0:0 + // for y to get e' added to H4. (Note that the value of e' is computed only from x, and values of y don't affect the + // computation of the value of e'.) +- go: SHA1Msg1 + commutative: false + documentation: !string |- + // NAME does the XORing of 1 in SHA1 algorithm defined in FIPS 180-4. + // x = {W3, W2, W1, W0} + // y = {0, 0, W5, W4} + // result = {W3^W5, W2^W4, W1^W3, W0^W2}. +- go: SHA1Msg2 + commutative: false + documentation: !string |- + // NAME does the calculation of 3 and 4 in SHA1 algorithm defined in FIPS 180-4. + // x = result of 2. + // y = {W15, W14, W13} + // result = {W19, W18, W17, W16} +- go: SHA256Rounds2 + commutative: false + documentation: !string |- + // NAME does 2 rounds of B loop to calculate updated state variables in SHA1 algorithm defined in FIPS 180-4. + // x = {h, g, d, c} + // y = {f, e, b, a} + // z = {W0+K0, W1+K1} + // result = {f', e', b', a'} + // The K array is a 64-DWORD constant array defined in page 11 of FIPS 180-4. Each element of the K array is to be added to + // the corresponding element of the W array to make the input data z. + // The updated state variables c', d', g', h' are not returned by this instruction, because they are equal to the input data + // y (the state variables a, b, e, f before the 2 rounds). +- go: SHA256Msg1 + commutative: false + documentation: !string |- + // NAME does the sigma and addtion of 1 in SHA1 algorithm defined in FIPS 180-4. + // x = {W0, W1, W2, W3} + // y = {W4, 0, 0, 0} + // result = {W0+σ(W1), W1+σ(W2), W2+σ(W3), W3+σ(W4)} +- go: SHA256Msg2 + commutative: false + documentation: !string |- + // NAME does the sigma and addition of 3 in SHA1 algorithm defined in FIPS 180-4. + // x = result of 2 + // y = {0, 0, W14, W15} + // result = {W16, W17, W18, W19}
\ No newline at end of file diff --git a/src/simd/_gen/simdgen/ops/Others/go.yaml b/src/simd/_gen/simdgen/ops/Others/go.yaml index 0f8b7b43a2..77b9fc3783 100644 --- a/src/simd/_gen/simdgen/ops/Others/go.yaml +++ b/src/simd/_gen/simdgen/ops/Others/go.yaml @@ -52,4 +52,45 @@ in: - *uint32s out: - - *uint32s
\ No newline at end of file + - *uint32s +- go: SHA1Round4 + asm: SHA1RNDS4 + operandOrder: "SHA1RNDS4" + in: &2any1imm + - *any + - *any + - class: immediate + immOffset: 0 + out: &1any + - *any +- go: SHA1NextE + asm: SHA1NEXTE + in: &2any + - *any + - *any + out: *1any +- go: SHA1Msg1 + asm: SHA1MSG1 + in: *2any + out: *1any +- go: SHA1Msg2 + asm: SHA1MSG2 + in: *2any + out: *1any +- go: SHA256Rounds2 + asm: SHA256RNDS2 + in: + - base: $t + - base: $t + - base: $t + overwriteElementBits: 32 + out: + - base: $t +- go: SHA256Msg1 + asm: SHA256MSG1 + in: *2any + out: *1any +- go: SHA256Msg2 + asm: SHA256MSG1 + in: *2any + out: *1any
\ No newline at end of file diff --git a/src/simd/_gen/simdgen/xed.go b/src/simd/_gen/simdgen/xed.go index 76bd584b52..9e9b67e77d 100644 --- a/src/simd/_gen/simdgen/xed.go +++ b/src/simd/_gen/simdgen/xed.go @@ -25,7 +25,6 @@ const ( NOT_REG_CLASS = iota // not a register VREG_CLASS // classify as a vector register; see GREG_CLASS // classify as a general register - REG_FIXED // classify as a fixed register ) // instVariant is a bitmap indicating a variant of an instruction that has @@ -852,7 +851,7 @@ type fixedReg struct { } var fixedRegMap = map[string]fixedReg{ - "XED_REG_XMM0": {REG_FIXED, "XMM0", 128}, + "XED_REG_XMM0": {VREG_CLASS, "x0", 128}, } // decodeReg returns class (NOT_REG_CLASS, VREG_CLASS, GREG_CLASS, VREG_CLASS_FIXED, diff --git a/src/simd/cpu.go b/src/simd/cpu.go index 7d4fe25003..ca445072c0 100644 --- a/src/simd/cpu.go +++ b/src/simd/cpu.go @@ -106,3 +106,11 @@ func HasAVX512VPOPCNTDQ() bool { func HasAVXVNNI() bool { return cpu.X86.HasAVXVNNI } + +// HasSHA returns whether the CPU supports the SHA feature. +// +// HasSHA is defined on all GOARCHes, but will only return true on +// GOARCH amd64. +func HasSHA() bool { + return cpu.X86.HasSHA +} diff --git a/src/simd/ops_amd64.go b/src/simd/ops_amd64.go index 49c387aea9..e0c76099ba 100644 --- a/src/simd/ops_amd64.go +++ b/src/simd/ops_amd64.go @@ -5623,6 +5623,156 @@ func (x Float64x4) RoundToEvenScaledResidue(prec uint8) Float64x4 // Asm: VREDUCEPD, CPU Feature: AVX512 func (x Float64x8) RoundToEvenScaledResidue(prec uint8) Float64x8 +/* SHA1Msg1 */ + +// SHA1Msg1 does the XORing of 1 in SHA1 algorithm defined in FIPS 180-4. +// x = {W3, W2, W1, W0} +// y = {0, 0, W5, W4} +// result = {W3^W5, W2^W4, W1^W3, W0^W2}. +// +// Asm: SHA1MSG1, CPU Feature: SHA +func (x Int32x4) SHA1Msg1(y Int32x4) Int32x4 + +// SHA1Msg1 does the XORing of 1 in SHA1 algorithm defined in FIPS 180-4. +// x = {W3, W2, W1, W0} +// y = {0, 0, W5, W4} +// result = {W3^W5, W2^W4, W1^W3, W0^W2}. +// +// Asm: SHA1MSG1, CPU Feature: SHA +func (x Uint32x4) SHA1Msg1(y Uint32x4) Uint32x4 + +/* SHA1Msg2 */ + +// SHA1Msg2 does the calculation of 3 and 4 in SHA1 algorithm defined in FIPS 180-4. +// x = result of 2. +// y = {W15, W14, W13} +// result = {W19, W18, W17, W16} +// +// Asm: SHA1MSG2, CPU Feature: SHA +func (x Int32x4) SHA1Msg2(y Int32x4) Int32x4 + +// SHA1Msg2 does the calculation of 3 and 4 in SHA1 algorithm defined in FIPS 180-4. +// x = result of 2. +// y = {W15, W14, W13} +// result = {W19, W18, W17, W16} +// +// Asm: SHA1MSG2, CPU Feature: SHA +func (x Uint32x4) SHA1Msg2(y Uint32x4) Uint32x4 + +/* SHA1NextE */ + +// SHA1NextE calculates the state variable e' updated after 4 rounds in SHA1 algorithm defined in FIPS 180-4. +// x contains the state variable a (before the 4 rounds), placed in the upper element. +// y is the elements of W array for next 4 rounds from upper to lower order. +// result = the elements of the W array for the next 4 rounds, with the updated state variable e' added to the upper element, +// from upper to lower order. +// For the last round of the loop, you can specify zero for y to obtain the e' value itself, or better off specifying H4:0:0:0 +// for y to get e' added to H4. (Note that the value of e' is computed only from x, and values of y don't affect the +// computation of the value of e'.) +// +// Asm: SHA1NEXTE, CPU Feature: SHA +func (x Int32x4) SHA1NextE(y Int32x4) Int32x4 + +// SHA1NextE calculates the state variable e' updated after 4 rounds in SHA1 algorithm defined in FIPS 180-4. +// x contains the state variable a (before the 4 rounds), placed in the upper element. +// y is the elements of W array for next 4 rounds from upper to lower order. +// result = the elements of the W array for the next 4 rounds, with the updated state variable e' added to the upper element, +// from upper to lower order. +// For the last round of the loop, you can specify zero for y to obtain the e' value itself, or better off specifying H4:0:0:0 +// for y to get e' added to H4. (Note that the value of e' is computed only from x, and values of y don't affect the +// computation of the value of e'.) +// +// Asm: SHA1NEXTE, CPU Feature: SHA +func (x Uint32x4) SHA1NextE(y Uint32x4) Uint32x4 + +/* SHA1Round4 */ + +// SHA1Round4 performs 4 rounds of B loop in SHA1 algorithm defined in FIPS 180-4. +// x contains the state variables a, b, c and d from upper to lower order. +// y contains the W array elements (with the state variable e added to the upper element) from upper to lower order. +// result = the state variables a', b', c', d' updated after 4 rounds. +// constant = 0 for the first 20 rounds of the loop, 1 for the next 20 rounds of the loop..., 3 for the last 20 rounds of the loop. +// +// constant results in better performance when it's a constant, a non-constant value will be translated into a jump table. +// +// Asm: SHA1RNDS4, CPU Feature: SHA +func (x Int32x4) SHA1Round4(constant uint8, y Int32x4) Int32x4 + +// SHA1Round4 performs 4 rounds of B loop in SHA1 algorithm defined in FIPS 180-4. +// x contains the state variables a, b, c and d from upper to lower order. +// y contains the W array elements (with the state variable e added to the upper element) from upper to lower order. +// result = the state variables a', b', c', d' updated after 4 rounds. +// constant = 0 for the first 20 rounds of the loop, 1 for the next 20 rounds of the loop..., 3 for the last 20 rounds of the loop. +// +// constant results in better performance when it's a constant, a non-constant value will be translated into a jump table. +// +// Asm: SHA1RNDS4, CPU Feature: SHA +func (x Uint32x4) SHA1Round4(constant uint8, y Uint32x4) Uint32x4 + +/* SHA256Msg1 */ + +// SHA256Msg1 does the sigma and addtion of 1 in SHA1 algorithm defined in FIPS 180-4. +// x = {W0, W1, W2, W3} +// y = {W4, 0, 0, 0} +// result = {W0+σ(W1), W1+σ(W2), W2+σ(W3), W3+σ(W4)} +// +// Asm: SHA256MSG1, CPU Feature: SHA +func (x Int32x4) SHA256Msg1(y Int32x4) Int32x4 + +// SHA256Msg1 does the sigma and addtion of 1 in SHA1 algorithm defined in FIPS 180-4. +// x = {W0, W1, W2, W3} +// y = {W4, 0, 0, 0} +// result = {W0+σ(W1), W1+σ(W2), W2+σ(W3), W3+σ(W4)} +// +// Asm: SHA256MSG1, CPU Feature: SHA +func (x Uint32x4) SHA256Msg1(y Uint32x4) Uint32x4 + +/* SHA256Msg2 */ + +// SHA256Msg2 does the sigma and addition of 3 in SHA1 algorithm defined in FIPS 180-4. +// x = result of 2 +// y = {0, 0, W14, W15} +// result = {W16, W17, W18, W19} +// +// Asm: SHA256MSG1, CPU Feature: SHA +func (x Int32x4) SHA256Msg2(y Int32x4) Int32x4 + +// SHA256Msg2 does the sigma and addition of 3 in SHA1 algorithm defined in FIPS 180-4. +// x = result of 2 +// y = {0, 0, W14, W15} +// result = {W16, W17, W18, W19} +// +// Asm: SHA256MSG1, CPU Feature: SHA +func (x Uint32x4) SHA256Msg2(y Uint32x4) Uint32x4 + +/* SHA256Rounds2 */ + +// SHA256Rounds2 does 2 rounds of B loop to calculate updated state variables in SHA1 algorithm defined in FIPS 180-4. +// x = {h, g, d, c} +// y = {f, e, b, a} +// z = {W0+K0, W1+K1} +// result = {f', e', b', a'} +// The K array is a 64-DWORD constant array defined in page 11 of FIPS 180-4. Each element of the K array is to be added to +// the corresponding element of the W array to make the input data z. +// The updated state variables c', d', g', h' are not returned by this instruction, because they are equal to the input data +// y (the state variables a, b, e, f before the 2 rounds). +// +// Asm: SHA256RNDS2, CPU Feature: SHA +func (x Int32x4) SHA256Rounds2(y Int32x4, z Int32x4) Int32x4 + +// SHA256Rounds2 does 2 rounds of B loop to calculate updated state variables in SHA1 algorithm defined in FIPS 180-4. +// x = {h, g, d, c} +// y = {f, e, b, a} +// z = {W0+K0, W1+K1} +// result = {f', e', b', a'} +// The K array is a 64-DWORD constant array defined in page 11 of FIPS 180-4. Each element of the K array is to be added to +// the corresponding element of the W array to make the input data z. +// The updated state variables c', d', g', h' are not returned by this instruction, because they are equal to the input data +// y (the state variables a, b, e, f before the 2 rounds). +// +// Asm: SHA256RNDS2, CPU Feature: SHA +func (x Uint32x4) SHA256Rounds2(y Uint32x4, z Uint32x4) Uint32x4 + /* Scale */ // Scale multiplies elements by a power of 2. |
