diff options
| author | Junyang Shao <shaojunyang@google.com> | 2026-02-24 22:34:44 +0000 |
|---|---|---|
| committer | Junyang Shao <shaojunyang@google.com> | 2026-03-02 12:07:34 -0800 |
| commit | aa80d7a7e6bf97aa27a74cc5056ef270a2a0c2f4 (patch) | |
| tree | 66a8bdd949123efb45ad4f1b70519c10ff5c5109 /src/simd | |
| parent | ba057f7950f93a7cbd249ad79ed763a526a9cbbf (diff) | |
| download | go-aa80d7a7e6bf97aa27a74cc5056ef270a2a0c2f4.tar.xz | |
cmd/compile, simd/archsimd: add VPSRL immeidate peepholes
Before this CL, the simdgen contains a sign check to selectively enable
such rules for deduplication purposes. This left out `VPSRL` as it's
only available in unsigned form. This CL fixes that.
It looks like the previous documentation fix to SHA instruction might
not had run go generate, so this CL also contains the generated code for
that fix.
There is also a weird phantom import in
cmd/compile/internal/ssa/issue77582_test.go
This CL also fixes that
The trybot didn't complain?
Change-Id: Ibbf9f789c1a67af1474f0285ab376bc07f17667e
Reviewed-on: https://go-review.googlesource.com/c/go/+/748501
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: David Chase <drchase@google.com>
Diffstat (limited to 'src/simd')
| -rw-r--r-- | src/simd/archsimd/_gen/simdgen/gen_simdrules.go | 7 | ||||
| -rw-r--r-- | src/simd/archsimd/ops_amd64.go | 2 |
2 files changed, 5 insertions, 4 deletions
diff --git a/src/simd/archsimd/_gen/simdgen/gen_simdrules.go b/src/simd/archsimd/_gen/simdgen/gen_simdrules.go index 7a8823483a..01f99e54c1 100644 --- a/src/simd/archsimd/_gen/simdgen/gen_simdrules.go +++ b/src/simd/archsimd/_gen/simdgen/gen_simdrules.go @@ -129,7 +129,8 @@ func writeSIMDRules(ops []Operation) *bytes.Buffer { // asm -> masked merging rules maskedMergeOpts := make(map[string]string) s2n := map[int]string{8: "B", 16: "W", 32: "D", 64: "Q"} - asmCheck := map[string]bool{} + asmCheck := map[string]bool{} // for masked merge optimizations. + sftimmCheck := map[string]bool{} // deduplicate sftimm rules var allData []tplRuleData var optData []tplRuleData // for mask peephole optimizations, and other misc var memOptData []tplRuleData // for memory peephole optimizations @@ -229,8 +230,8 @@ func writeSIMDRules(ops []Operation) *bytes.Buffer { if gOp.SpecialLower != nil { if *gOp.SpecialLower == "sftimm" { - if data.GoType[0] == 'I' { - // only do these for signed types, it is a duplicate rewrite for unsigned + if !sftimmCheck[data.Asm] { + sftimmCheck[data.Asm] = true sftImmData := data if tplName == "maskIn" { sftImmData.tplName = "masksftimm" diff --git a/src/simd/archsimd/ops_amd64.go b/src/simd/archsimd/ops_amd64.go index 3489f34642..8eb06994ad 100644 --- a/src/simd/archsimd/ops_amd64.go +++ b/src/simd/archsimd/ops_amd64.go @@ -5359,7 +5359,7 @@ func (x Uint32x4) SHA1NextE(y Uint32x4) Uint32x4 /* SHA256Message1 */ -// SHA256Message1 does the sigma and addtion of 1 in SHA256 algorithm defined in FIPS 180-4. +// SHA256Message1 does the sigma and addition of 1 in SHA256 algorithm defined in FIPS 180-4. // x = {W0, W1, W2, W3} // y = {W4, 0, 0, 0} // result = {W0+σ(W1), W1+σ(W2), W2+σ(W3), W3+σ(W4)} |
