From e84983fa40a6e97d3e169f1f3549af889b1b1f22 Mon Sep 17 00:00:00 2001 From: Cherry Mui Date: Fri, 2 Jan 2026 14:02:07 -0500 Subject: cmd/compile: optimize SIMD IsNaN.Or(IsNaN) IsNaN's underlying instruction, VCMPPS (or VCMPPD), takes two inputs, and computes either of them is NaN. Optimize the Or pattern to generate two-operand form. This implements the optimization mentioned in CL 733660. Change-Id: I13943b377ee384864c913eed320763f333a03e41 Reviewed-on: https://go-review.googlesource.com/c/go/+/733680 Reviewed-by: David Chase LUCI-TryBot-Result: Go LUCI --- test/codegen/simd.go | 31 +++++++++++++++++++++++++++++-- 1 file changed, 29 insertions(+), 2 deletions(-) (limited to 'test/codegen') diff --git a/test/codegen/simd.go b/test/codegen/simd.go index 8f3a1a9f46..04e01944de 100644 --- a/test/codegen/simd.go +++ b/test/codegen/simd.go @@ -6,11 +6,14 @@ // These tests check code generation of simd peephole optimizations. -//go:build goexperiment.simd +//go:build goexperiment.simd && amd64 package codegen -import "simd/archsimd" +import ( + "math" + "simd/archsimd" +) func vptest1() bool { v1 := archsimd.LoadUint64x2Slice([]uint64{0, 1}) @@ -77,3 +80,27 @@ func simdMaskedMerge() archsimd.Int16x16 { mask := archsimd.Mask16x16FromBits(5) return x.Add(y).Merge(x, mask) // amd64:`VPBLENDVB\s.*$` } + +var nan = math.NaN() +var floats64s = []float64{0, 1, 2, nan, 4, nan, 6, 7, 8, 9, 10, 11, nan, 13, 14, 15} +var sinkInt64s = make([]int64, 100) + +func simdIsNaN() { + x := archsimd.LoadFloat64x4Slice(floats64s) + y := archsimd.LoadFloat64x4Slice(floats64s[4:]) + a := x.IsNaN() + b := y.IsNaN() + // amd64:"VCMPPD [$]3," -"VPOR" + c := a.Or(b) + c.ToInt64x4().StoreSlice(sinkInt64s) +} + +func simdIsNaN512() { + x := archsimd.LoadFloat64x8Slice(floats64s) + y := archsimd.LoadFloat64x8Slice(floats64s[8:]) + a := x.IsNaN() + b := y.IsNaN() + // amd64:"VCMPPD [$]3," -"VPOR" + c := a.Or(b) + c.ToInt64x8().StoreSlice(sinkInt64s) +} -- cgit v1.3-5-g45d5