simd/archsimd: make IsNaN unary

Currently, the IsNan API is defined as x.IsNan(y), which returns a mask to represent, for each element, either x or y is NaN. Albeit closer to the machine instruction, this is weird API, as IsNaN is a unary operation. This CL changes it to unary, x.IsNaN(). It compiles to VCMPPS $3, x, x (or VCMPPD). For the two-operand version, we can optimize x.IsNaN().Or(y.IsNaN()) to VCMPPS $3, x, y (not done in this CL). While here, change the name to IsNaN (uppercase both Ns), which matches math.IsNaN. Tests in the next CL. Change-Id: Ib6e7afc2635e6c3c606db5ea16420ee673a6c6d6 Reviewed-on: https://go-review.googlesource.com/c/go/+/733660 Reviewed-by: David Chase <drchase@google.com> LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
author: Cherry Mui <cherryyz@google.com> 2025-12-31 02:42:30 -0500
committer: Cherry Mui <cherryyz@google.com> 2026-01-02 12:13:40 -0800
commit: 13440fb51831bfde5804430596d2045a64fd2209 (patch)
tree: 84196ec227a3ed45eee119c10a434169216e9516 /src/cmd
parent: c3550b3352ae283110c443576e1e62cdf8efaa72 (diff)
download: go-13440fb51831bfde5804430596d2045a64fd2209.tar.xz
8 files changed, 93 insertions, 108 deletions
diff --git a/src/cmd/compile/internal/ssa/_gen/AMD64.rules b/src/cmd/compile/internal/ssa/_gen/AMD64.rules
index 38ca44f7eb..9c54186854 100644
--- a/src/cmd/compile/internal/ssa/_gen/AMD64.rules
+++ b/src/cmd/compile/internal/ssa/_gen/AMD64.rules
@@ -1730,6 +1730,13 @@
 // Misc
 (IsZeroVec x) => (SETEQ (VPTEST x x))
 
+(IsNaNFloat32x4  x) => (VCMPPS128 [3] x x)
+(IsNaNFloat32x8  x) => (VCMPPS256 [3] x x)
+(IsNaNFloat32x16 x) => (VPMOVMToVec32x16 (VCMPPS512 [3] x x))
+(IsNaNFloat64x2  x) => (VCMPPD128 [3] x x)
+(IsNaNFloat64x4  x) => (VCMPPD256 [3] x x)
+(IsNaNFloat64x8  x) => (VPMOVMToVec64x8 (VCMPPD512 [3] x x))
+
 // SIMD vector K-masked loads and stores
 
 (LoadMasked64 <t> ptr mask mem) && t.Size() == 64 => (VPMASK64load512 ptr (VPMOVVec64x8ToM  <types.TypeMask> mask) mem)
diff --git a/src/cmd/compile/internal/ssa/_gen/genericOps.go b/src/cmd/compile/internal/ssa/_gen/genericOps.go
index 8637133e5f..85bde1aab2 100644
--- a/src/cmd/compile/internal/ssa/_gen/genericOps.go
+++ b/src/cmd/compile/internal/ssa/_gen/genericOps.go
@@ -715,6 +715,14 @@ var genericOps = []opData{
 
 	// Returns true if arg0 is all zero.
 	{name: "IsZeroVec", argLength: 1},
+
+	// Returns a mask indicating whether arg0's elements are NaN.
+	{name: "IsNaNFloat32x4", argLength: 1},
+	{name: "IsNaNFloat32x8", argLength: 1},
+	{name: "IsNaNFloat32x16", argLength: 1},
+	{name: "IsNaNFloat64x2", argLength: 1},
+	{name: "IsNaNFloat64x4", argLength: 1},
+	{name: "IsNaNFloat64x8", argLength: 1},
 }
 
 //     kind          controls          successors   implicit exit
diff --git a/src/cmd/compile/internal/ssa/_gen/simdAMD64.rules b/src/cmd/compile/internal/ssa/_gen/simdAMD64.rules
index 8a5b70da30..5c83f39a1f 100644
--- a/src/cmd/compile/internal/ssa/_gen/simdAMD64.rules
+++ b/src/cmd/compile/internal/ssa/_gen/simdAMD64.rules
@@ -559,12 +559,6 @@
 (InterleaveLoGroupedUint32x16 ...) => (VPUNPCKLDQ512 ...)
 (InterleaveLoGroupedUint64x4 ...) => (VPUNPCKLQDQ256 ...)
 (InterleaveLoGroupedUint64x8 ...) => (VPUNPCKLQDQ512 ...)
-(IsNanFloat32x4 x y) => (VCMPPS128 [3] x y)
-(IsNanFloat32x8 x y) => (VCMPPS256 [3] x y)
-(IsNanFloat32x16 x y) => (VPMOVMToVec32x16 (VCMPPS512 [3] x y))
-(IsNanFloat64x2 x y) => (VCMPPD128 [3] x y)
-(IsNanFloat64x4 x y) => (VCMPPD256 [3] x y)
-(IsNanFloat64x8 x y) => (VPMOVMToVec64x8 (VCMPPD512 [3] x y))
 (LeadingZerosInt32x4 ...) => (VPLZCNTD128 ...)
 (LeadingZerosInt32x8 ...) => (VPLZCNTD256 ...)
 (LeadingZerosInt32x16 ...) => (VPLZCNTD512 ...)
diff --git a/src/cmd/compile/internal/ssa/_gen/simdgenericOps.go b/src/cmd/compile/internal/ssa/_gen/simdgenericOps.go
index af1007cd54..889ab0d84f 100644
--- a/src/cmd/compile/internal/ssa/_gen/simdgenericOps.go
+++ b/src/cmd/compile/internal/ssa/_gen/simdgenericOps.go
@@ -519,12 +519,6 @@ func simdGenericOps() []opData {
 		{name: "InterleaveLoUint16x8", argLength: 2, commutative: false},
 		{name: "InterleaveLoUint32x4", argLength: 2, commutative: false},
 		{name: "InterleaveLoUint64x2", argLength: 2, commutative: false},
-		{name: "IsNanFloat32x4", argLength: 2, commutative: true},
-		{name: "IsNanFloat32x8", argLength: 2, commutative: true},
-		{name: "IsNanFloat32x16", argLength: 2, commutative: true},
-		{name: "IsNanFloat64x2", argLength: 2, commutative: true},
-		{name: "IsNanFloat64x4", argLength: 2, commutative: true},
-		{name: "IsNanFloat64x8", argLength: 2, commutative: true},
 		{name: "LeadingZerosInt32x4", argLength: 1, commutative: false},
 		{name: "LeadingZerosInt32x8", argLength: 1, commutative: false},
 		{name: "LeadingZerosInt32x16", argLength: 1, commutative: false},
diff --git a/src/cmd/compile/internal/ssa/opGen.go b/src/cmd/compile/internal/ssa/opGen.go
index d2ba15f740..abaf7911d4 100644
--- a/src/cmd/compile/internal/ssa/opGen.go
+++ b/src/cmd/compile/internal/ssa/opGen.go
@@ -6158,6 +6158,12 @@ const (
 	OpCvtMask64x4to8
 	OpCvtMask64x8to8
 	OpIsZeroVec
+	OpIsNaNFloat32x4
+	OpIsNaNFloat32x8
+	OpIsNaNFloat32x16
+	OpIsNaNFloat64x2
+	OpIsNaNFloat64x4
+	OpIsNaNFloat64x8
 	OpAESDecryptLastRoundUint8x16
 	OpAESDecryptLastRoundUint8x32
 	OpAESDecryptLastRoundUint8x64
@@ -6673,12 +6679,6 @@ const (
 	OpInterleaveLoUint16x8
 	OpInterleaveLoUint32x4
 	OpInterleaveLoUint64x2
-	OpIsNanFloat32x4
-	OpIsNanFloat32x8
-	OpIsNanFloat32x16
-	OpIsNanFloat64x2
-	OpIsNanFloat64x4
-	OpIsNanFloat64x8
 	OpLeadingZerosInt32x4
 	OpLeadingZerosInt32x8
 	OpLeadingZerosInt32x16
@@ -88994,6 +88994,36 @@ var opcodeTable = [...]opInfo{
 		generic: true,
 	},
 	{
+		name:    "IsNaNFloat32x4",
+		argLen:  1,
+		generic: true,
+	},
+	{
+		name:    "IsNaNFloat32x8",
+		argLen:  1,
+		generic: true,
+	},
+	{
+		name:    "IsNaNFloat32x16",
+		argLen:  1,
+		generic: true,
+	},
+	{
+		name:    "IsNaNFloat64x2",
+		argLen:  1,
+		generic: true,
+	},
+	{
+		name:    "IsNaNFloat64x4",
+		argLen:  1,
+		generic: true,
+	},
+	{
+		name:    "IsNaNFloat64x8",
+		argLen:  1,
+		generic: true,
+	},
+	{
 		name:    "AESDecryptLastRoundUint8x16",
 		argLen:  2,
 		generic: true,
@@ -91671,42 +91701,6 @@ var opcodeTable = [...]opInfo{
 		generic: true,
 	},
 	{
-		name:        "IsNanFloat32x4",
-		argLen:      2,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "IsNanFloat32x8",
-		argLen:      2,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "IsNanFloat32x16",
-		argLen:      2,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "IsNanFloat64x2",
-		argLen:      2,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "IsNanFloat64x4",
-		argLen:      2,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "IsNanFloat64x8",
-		argLen:      2,
-		commutative: true,
-		generic:     true,
-	},
-	{
 		name:    "LeadingZerosInt32x4",
 		argLen:  1,
 		generic: true,
diff --git a/src/cmd/compile/internal/ssa/rewriteAMD64.go b/src/cmd/compile/internal/ssa/rewriteAMD64.go
index 0c04410074..0b2bb74ce4 100644
--- a/src/cmd/compile/internal/ssa/rewriteAMD64.go
+++ b/src/cmd/compile/internal/ssa/rewriteAMD64.go
@@ -3773,18 +3773,18 @@ func rewriteValueAMD64(v *Value) bool {
 		return true
 	case OpIsInBounds:
 		return rewriteValueAMD64_OpIsInBounds(v)
-	case OpIsNanFloat32x16:
-		return rewriteValueAMD64_OpIsNanFloat32x16(v)
-	case OpIsNanFloat32x4:
-		return rewriteValueAMD64_OpIsNanFloat32x4(v)
-	case OpIsNanFloat32x8:
-		return rewriteValueAMD64_OpIsNanFloat32x8(v)
-	case OpIsNanFloat64x2:
-		return rewriteValueAMD64_OpIsNanFloat64x2(v)
-	case OpIsNanFloat64x4:
-		return rewriteValueAMD64_OpIsNanFloat64x4(v)
-	case OpIsNanFloat64x8:
-		return rewriteValueAMD64_OpIsNanFloat64x8(v)
+	case OpIsNaNFloat32x16:
+		return rewriteValueAMD64_OpIsNaNFloat32x16(v)
+	case OpIsNaNFloat32x4:
+		return rewriteValueAMD64_OpIsNaNFloat32x4(v)
+	case OpIsNaNFloat32x8:
+		return rewriteValueAMD64_OpIsNaNFloat32x8(v)
+	case OpIsNaNFloat64x2:
+		return rewriteValueAMD64_OpIsNaNFloat64x2(v)
+	case OpIsNaNFloat64x4:
+		return rewriteValueAMD64_OpIsNaNFloat64x4(v)
+	case OpIsNaNFloat64x8:
+		return rewriteValueAMD64_OpIsNaNFloat64x8(v)
 	case OpIsNonNil:
 		return rewriteValueAMD64_OpIsNonNil(v)
 	case OpIsSliceInBounds:
@@ -70957,94 +70957,82 @@ func rewriteValueAMD64_OpIsInBounds(v *Value) bool {
 		return true
 	}
 }
-func rewriteValueAMD64_OpIsNanFloat32x16(v *Value) bool {
-	v_1 := v.Args[1]
+func rewriteValueAMD64_OpIsNaNFloat32x16(v *Value) bool {
 	v_0 := v.Args[0]
 	b := v.Block
 	typ := &b.Func.Config.Types
-	// match: (IsNanFloat32x16 x y)
-	// result: (VPMOVMToVec32x16 (VCMPPS512 [3] x y))
+	// match: (IsNaNFloat32x16 x)
+	// result: (VPMOVMToVec32x16 (VCMPPS512 [3] x x))
 	for {
 		x := v_0
-		y := v_1
 		v.reset(OpAMD64VPMOVMToVec32x16)
 		v0 := b.NewValue0(v.Pos, OpAMD64VCMPPS512, typ.Mask)
 		v0.AuxInt = uint8ToAuxInt(3)
-		v0.AddArg2(x, y)
+		v0.AddArg2(x, x)
 		v.AddArg(v0)
 		return true
 	}
 }
-func rewriteValueAMD64_OpIsNanFloat32x4(v *Value) bool {
-	v_1 := v.Args[1]
+func rewriteValueAMD64_OpIsNaNFloat32x4(v *Value) bool {
 	v_0 := v.Args[0]
-	// match: (IsNanFloat32x4 x y)
-	// result: (VCMPPS128 [3] x y)
+	// match: (IsNaNFloat32x4 x)
+	// result: (VCMPPS128 [3] x x)
 	for {
 		x := v_0
-		y := v_1
 		v.reset(OpAMD64VCMPPS128)
 		v.AuxInt = uint8ToAuxInt(3)
-		v.AddArg2(x, y)
+		v.AddArg2(x, x)
 		return true
 	}
 }
-func rewriteValueAMD64_OpIsNanFloat32x8(v *Value) bool {
-	v_1 := v.Args[1]
+func rewriteValueAMD64_OpIsNaNFloat32x8(v *Value) bool {
 	v_0 := v.Args[0]
-	// match: (IsNanFloat32x8 x y)
-	// result: (VCMPPS256 [3] x y)
+	// match: (IsNaNFloat32x8 x)
+	// result: (VCMPPS256 [3] x x)
 	for {
 		x := v_0
-		y := v_1
 		v.reset(OpAMD64VCMPPS256)
 		v.AuxInt = uint8ToAuxInt(3)
-		v.AddArg2(x, y)
+		v.AddArg2(x, x)
 		return true
 	}
 }
-func rewriteValueAMD64_OpIsNanFloat64x2(v *Value) bool {
-	v_1 := v.Args[1]
+func rewriteValueAMD64_OpIsNaNFloat64x2(v *Value) bool {
 	v_0 := v.Args[0]
-	// match: (IsNanFloat64x2 x y)
-	// result: (VCMPPD128 [3] x y)
+	// match: (IsNaNFloat64x2 x)
+	// result: (VCMPPD128 [3] x x)
 	for {
 		x := v_0
-		y := v_1
 		v.reset(OpAMD64VCMPPD128)
 		v.AuxInt = uint8ToAuxInt(3)
-		v.AddArg2(x, y)
+		v.AddArg2(x, x)
 		return true
 	}
 }
-func rewriteValueAMD64_OpIsNanFloat64x4(v *Value) bool {
-	v_1 := v.Args[1]
+func rewriteValueAMD64_OpIsNaNFloat64x4(v *Value) bool {
 	v_0 := v.Args[0]
-	// match: (IsNanFloat64x4 x y)
-	// result: (VCMPPD256 [3] x y)
+	// match: (IsNaNFloat64x4 x)
+	// result: (VCMPPD256 [3] x x)
 	for {
 		x := v_0
-		y := v_1
 		v.reset(OpAMD64VCMPPD256)
 		v.AuxInt = uint8ToAuxInt(3)
-		v.AddArg2(x, y)
+		v.AddArg2(x, x)
 		return true
 	}
 }
-func rewriteValueAMD64_OpIsNanFloat64x8(v *Value) bool {
-	v_1 := v.Args[1]
+func rewriteValueAMD64_OpIsNaNFloat64x8(v *Value) bool {
 	v_0 := v.Args[0]
 	b := v.Block
 	typ := &b.Func.Config.Types
-	// match: (IsNanFloat64x8 x y)
-	// result: (VPMOVMToVec64x8 (VCMPPD512 [3] x y))
+	// match: (IsNaNFloat64x8 x)
+	// result: (VPMOVMToVec64x8 (VCMPPD512 [3] x x))
 	for {
 		x := v_0
-		y := v_1
 		v.reset(OpAMD64VPMOVMToVec64x8)
 		v0 := b.NewValue0(v.Pos, OpAMD64VCMPPD512, typ.Mask)
 		v0.AuxInt = uint8ToAuxInt(3)
-		v0.AddArg2(x, y)
+		v0.AddArg2(x, x)
 		v.AddArg(v0)
 		return true
 	}
diff --git a/src/cmd/compile/internal/ssagen/intrinsics.go b/src/cmd/compile/internal/ssagen/intrinsics.go
index 4425c5617b..e2eebd783d 100644
--- a/src/cmd/compile/internal/ssagen/intrinsics.go
+++ b/src/cmd/compile/internal/ssagen/intrinsics.go
@@ -1667,6 +1667,12 @@ func initIntrinsics(cfg *intrinsicBuildConfig) {
 		addF(simdPackage, "Uint16x16.IsZero", opLen1(ssa.OpIsZeroVec, types.Types[types.TBOOL]), sys.AMD64)
 		addF(simdPackage, "Uint32x8.IsZero", opLen1(ssa.OpIsZeroVec, types.Types[types.TBOOL]), sys.AMD64)
 		addF(simdPackage, "Uint64x4.IsZero", opLen1(ssa.OpIsZeroVec, types.Types[types.TBOOL]), sys.AMD64)
+		addF(simdPackage, "Float32x4.IsNaN", opLen1(ssa.OpIsNaNFloat32x4, types.TypeVec128), sys.AMD64)
+		addF(simdPackage, "Float32x8.IsNaN", opLen1(ssa.OpIsNaNFloat32x8, types.TypeVec256), sys.AMD64)
+		addF(simdPackage, "Float32x16.IsNaN", opLen1(ssa.OpIsNaNFloat32x16, types.TypeVec512), sys.AMD64)
+		addF(simdPackage, "Float64x2.IsNaN", opLen1(ssa.OpIsNaNFloat64x2, types.TypeVec128), sys.AMD64)
+		addF(simdPackage, "Float64x4.IsNaN", opLen1(ssa.OpIsNaNFloat64x4, types.TypeVec256), sys.AMD64)
+		addF(simdPackage, "Float64x8.IsNaN", opLen1(ssa.OpIsNaNFloat64x8, types.TypeVec512), sys.AMD64)
 
 		// sfp4 is intrinsic-if-constant, but otherwise it's complicated enough to just implement in Go.
 		sfp4 := func(method string, hwop ssa.Op, vectype *types.Type) {
diff --git a/src/cmd/compile/internal/ssagen/simdintrinsics.go b/src/cmd/compile/internal/ssagen/simdintrinsics.go
index 5a95761228..4ad0c6032c 100644
--- a/src/cmd/compile/internal/ssagen/simdintrinsics.go
+++ b/src/cmd/compile/internal/ssagen/simdintrinsics.go
@@ -571,12 +571,6 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
 	addF(simdPackage, "Uint32x16.InterleaveLoGrouped", opLen2(ssa.OpInterleaveLoGroupedUint32x16, types.TypeVec512), sys.AMD64)
 	addF(simdPackage, "Uint64x4.InterleaveLoGrouped", opLen2(ssa.OpInterleaveLoGroupedUint64x4, types.TypeVec256), sys.AMD64)
 	addF(simdPackage, "Uint64x8.InterleaveLoGrouped", opLen2(ssa.OpInterleaveLoGroupedUint64x8, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Float32x4.IsNan", opLen2(ssa.OpIsNanFloat32x4, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Float32x8.IsNan", opLen2(ssa.OpIsNanFloat32x8, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Float32x16.IsNan", opLen2(ssa.OpIsNanFloat32x16, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Float64x2.IsNan", opLen2(ssa.OpIsNanFloat64x2, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Float64x4.IsNan", opLen2(ssa.OpIsNanFloat64x4, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Float64x8.IsNan", opLen2(ssa.OpIsNanFloat64x8, types.TypeVec512), sys.AMD64)
 	addF(simdPackage, "Int32x4.LeadingZeros", opLen1(ssa.OpLeadingZerosInt32x4, types.TypeVec128), sys.AMD64)
 	addF(simdPackage, "Int32x8.LeadingZeros", opLen1(ssa.OpLeadingZerosInt32x8, types.TypeVec256), sys.AMD64)
 	addF(simdPackage, "Int32x16.LeadingZeros", opLen1(ssa.OpLeadingZerosInt32x16, types.TypeVec512), sys.AMD64)
author	Cherry Mui <cherryyz@google.com>	2025-12-31 02:42:30 -0500
committer	Cherry Mui <cherryyz@google.com>	2026-01-02 12:13:40 -0800
commit	13440fb51831bfde5804430596d2045a64fd2209 (patch)
tree	84196ec227a3ed45eee119c10a434169216e9516 /src/cmd
parent	c3550b3352ae283110c443576e1e62cdf8efaa72 (diff)
download	go-13440fb51831bfde5804430596d2045a64fd2209.tar.xz