aboutsummaryrefslogtreecommitdiff
path: root/src/cmd
diff options
context:
space:
mode:
authorCherry Mui <cherryyz@google.com>2025-12-31 02:42:30 -0500
committerCherry Mui <cherryyz@google.com>2026-01-02 12:13:40 -0800
commit13440fb51831bfde5804430596d2045a64fd2209 (patch)
tree84196ec227a3ed45eee119c10a434169216e9516 /src/cmd
parentc3550b3352ae283110c443576e1e62cdf8efaa72 (diff)
downloadgo-13440fb51831bfde5804430596d2045a64fd2209.tar.xz
simd/archsimd: make IsNaN unary
Currently, the IsNan API is defined as x.IsNan(y), which returns a mask to represent, for each element, either x or y is NaN. Albeit closer to the machine instruction, this is weird API, as IsNaN is a unary operation. This CL changes it to unary, x.IsNaN(). It compiles to VCMPPS $3, x, x (or VCMPPD). For the two-operand version, we can optimize x.IsNaN().Or(y.IsNaN()) to VCMPPS $3, x, y (not done in this CL). While here, change the name to IsNaN (uppercase both Ns), which matches math.IsNaN. Tests in the next CL. Change-Id: Ib6e7afc2635e6c3c606db5ea16420ee673a6c6d6 Reviewed-on: https://go-review.googlesource.com/c/go/+/733660 Reviewed-by: David Chase <drchase@google.com> LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Diffstat (limited to 'src/cmd')
-rw-r--r--src/cmd/compile/internal/ssa/_gen/AMD64.rules7
-rw-r--r--src/cmd/compile/internal/ssa/_gen/genericOps.go8
-rw-r--r--src/cmd/compile/internal/ssa/_gen/simdAMD64.rules6
-rw-r--r--src/cmd/compile/internal/ssa/_gen/simdgenericOps.go6
-rw-r--r--src/cmd/compile/internal/ssa/opGen.go78
-rw-r--r--src/cmd/compile/internal/ssa/rewriteAMD64.go84
-rw-r--r--src/cmd/compile/internal/ssagen/intrinsics.go6
-rw-r--r--src/cmd/compile/internal/ssagen/simdintrinsics.go6
8 files changed, 93 insertions, 108 deletions
diff --git a/src/cmd/compile/internal/ssa/_gen/AMD64.rules b/src/cmd/compile/internal/ssa/_gen/AMD64.rules
index 38ca44f7eb..9c54186854 100644
--- a/src/cmd/compile/internal/ssa/_gen/AMD64.rules
+++ b/src/cmd/compile/internal/ssa/_gen/AMD64.rules
@@ -1730,6 +1730,13 @@
// Misc
(IsZeroVec x) => (SETEQ (VPTEST x x))
+(IsNaNFloat32x4 x) => (VCMPPS128 [3] x x)
+(IsNaNFloat32x8 x) => (VCMPPS256 [3] x x)
+(IsNaNFloat32x16 x) => (VPMOVMToVec32x16 (VCMPPS512 [3] x x))
+(IsNaNFloat64x2 x) => (VCMPPD128 [3] x x)
+(IsNaNFloat64x4 x) => (VCMPPD256 [3] x x)
+(IsNaNFloat64x8 x) => (VPMOVMToVec64x8 (VCMPPD512 [3] x x))
+
// SIMD vector K-masked loads and stores
(LoadMasked64 <t> ptr mask mem) && t.Size() == 64 => (VPMASK64load512 ptr (VPMOVVec64x8ToM <types.TypeMask> mask) mem)
diff --git a/src/cmd/compile/internal/ssa/_gen/genericOps.go b/src/cmd/compile/internal/ssa/_gen/genericOps.go
index 8637133e5f..85bde1aab2 100644
--- a/src/cmd/compile/internal/ssa/_gen/genericOps.go
+++ b/src/cmd/compile/internal/ssa/_gen/genericOps.go
@@ -715,6 +715,14 @@ var genericOps = []opData{
// Returns true if arg0 is all zero.
{name: "IsZeroVec", argLength: 1},
+
+ // Returns a mask indicating whether arg0's elements are NaN.
+ {name: "IsNaNFloat32x4", argLength: 1},
+ {name: "IsNaNFloat32x8", argLength: 1},
+ {name: "IsNaNFloat32x16", argLength: 1},
+ {name: "IsNaNFloat64x2", argLength: 1},
+ {name: "IsNaNFloat64x4", argLength: 1},
+ {name: "IsNaNFloat64x8", argLength: 1},
}
// kind controls successors implicit exit
diff --git a/src/cmd/compile/internal/ssa/_gen/simdAMD64.rules b/src/cmd/compile/internal/ssa/_gen/simdAMD64.rules
index 8a5b70da30..5c83f39a1f 100644
--- a/src/cmd/compile/internal/ssa/_gen/simdAMD64.rules
+++ b/src/cmd/compile/internal/ssa/_gen/simdAMD64.rules
@@ -559,12 +559,6 @@
(InterleaveLoGroupedUint32x16 ...) => (VPUNPCKLDQ512 ...)
(InterleaveLoGroupedUint64x4 ...) => (VPUNPCKLQDQ256 ...)
(InterleaveLoGroupedUint64x8 ...) => (VPUNPCKLQDQ512 ...)
-(IsNanFloat32x4 x y) => (VCMPPS128 [3] x y)
-(IsNanFloat32x8 x y) => (VCMPPS256 [3] x y)
-(IsNanFloat32x16 x y) => (VPMOVMToVec32x16 (VCMPPS512 [3] x y))
-(IsNanFloat64x2 x y) => (VCMPPD128 [3] x y)
-(IsNanFloat64x4 x y) => (VCMPPD256 [3] x y)
-(IsNanFloat64x8 x y) => (VPMOVMToVec64x8 (VCMPPD512 [3] x y))
(LeadingZerosInt32x4 ...) => (VPLZCNTD128 ...)
(LeadingZerosInt32x8 ...) => (VPLZCNTD256 ...)
(LeadingZerosInt32x16 ...) => (VPLZCNTD512 ...)
diff --git a/src/cmd/compile/internal/ssa/_gen/simdgenericOps.go b/src/cmd/compile/internal/ssa/_gen/simdgenericOps.go
index af1007cd54..889ab0d84f 100644
--- a/src/cmd/compile/internal/ssa/_gen/simdgenericOps.go
+++ b/src/cmd/compile/internal/ssa/_gen/simdgenericOps.go
@@ -519,12 +519,6 @@ func simdGenericOps() []opData {
{name: "InterleaveLoUint16x8", argLength: 2, commutative: false},
{name: "InterleaveLoUint32x4", argLength: 2, commutative: false},
{name: "InterleaveLoUint64x2", argLength: 2, commutative: false},
- {name: "IsNanFloat32x4", argLength: 2, commutative: true},
- {name: "IsNanFloat32x8", argLength: 2, commutative: true},
- {name: "IsNanFloat32x16", argLength: 2, commutative: true},
- {name: "IsNanFloat64x2", argLength: 2, commutative: true},
- {name: "IsNanFloat64x4", argLength: 2, commutative: true},
- {name: "IsNanFloat64x8", argLength: 2, commutative: true},
{name: "LeadingZerosInt32x4", argLength: 1, commutative: false},
{name: "LeadingZerosInt32x8", argLength: 1, commutative: false},
{name: "LeadingZerosInt32x16", argLength: 1, commutative: false},
diff --git a/src/cmd/compile/internal/ssa/opGen.go b/src/cmd/compile/internal/ssa/opGen.go
index d2ba15f740..abaf7911d4 100644
--- a/src/cmd/compile/internal/ssa/opGen.go
+++ b/src/cmd/compile/internal/ssa/opGen.go
@@ -6158,6 +6158,12 @@ const (
OpCvtMask64x4to8
OpCvtMask64x8to8
OpIsZeroVec
+ OpIsNaNFloat32x4
+ OpIsNaNFloat32x8
+ OpIsNaNFloat32x16
+ OpIsNaNFloat64x2
+ OpIsNaNFloat64x4
+ OpIsNaNFloat64x8
OpAESDecryptLastRoundUint8x16
OpAESDecryptLastRoundUint8x32
OpAESDecryptLastRoundUint8x64
@@ -6673,12 +6679,6 @@ const (
OpInterleaveLoUint16x8
OpInterleaveLoUint32x4
OpInterleaveLoUint64x2
- OpIsNanFloat32x4
- OpIsNanFloat32x8
- OpIsNanFloat32x16
- OpIsNanFloat64x2
- OpIsNanFloat64x4
- OpIsNanFloat64x8
OpLeadingZerosInt32x4
OpLeadingZerosInt32x8
OpLeadingZerosInt32x16
@@ -88994,6 +88994,36 @@ var opcodeTable = [...]opInfo{
generic: true,
},
{
+ name: "IsNaNFloat32x4",
+ argLen: 1,
+ generic: true,
+ },
+ {
+ name: "IsNaNFloat32x8",
+ argLen: 1,
+ generic: true,
+ },
+ {
+ name: "IsNaNFloat32x16",
+ argLen: 1,
+ generic: true,
+ },
+ {
+ name: "IsNaNFloat64x2",
+ argLen: 1,
+ generic: true,
+ },
+ {
+ name: "IsNaNFloat64x4",
+ argLen: 1,
+ generic: true,
+ },
+ {
+ name: "IsNaNFloat64x8",
+ argLen: 1,
+ generic: true,
+ },
+ {
name: "AESDecryptLastRoundUint8x16",
argLen: 2,
generic: true,
@@ -91671,42 +91701,6 @@ var opcodeTable = [...]opInfo{
generic: true,
},
{
- name: "IsNanFloat32x4",
- argLen: 2,
- commutative: true,
- generic: true,
- },
- {
- name: "IsNanFloat32x8",
- argLen: 2,
- commutative: true,
- generic: true,
- },
- {
- name: "IsNanFloat32x16",
- argLen: 2,
- commutative: true,
- generic: true,
- },
- {
- name: "IsNanFloat64x2",
- argLen: 2,
- commutative: true,
- generic: true,
- },
- {
- name: "IsNanFloat64x4",
- argLen: 2,
- commutative: true,
- generic: true,
- },
- {
- name: "IsNanFloat64x8",
- argLen: 2,
- commutative: true,
- generic: true,
- },
- {
name: "LeadingZerosInt32x4",
argLen: 1,
generic: true,
diff --git a/src/cmd/compile/internal/ssa/rewriteAMD64.go b/src/cmd/compile/internal/ssa/rewriteAMD64.go
index 0c04410074..0b2bb74ce4 100644
--- a/src/cmd/compile/internal/ssa/rewriteAMD64.go
+++ b/src/cmd/compile/internal/ssa/rewriteAMD64.go
@@ -3773,18 +3773,18 @@ func rewriteValueAMD64(v *Value) bool {
return true
case OpIsInBounds:
return rewriteValueAMD64_OpIsInBounds(v)
- case OpIsNanFloat32x16:
- return rewriteValueAMD64_OpIsNanFloat32x16(v)
- case OpIsNanFloat32x4:
- return rewriteValueAMD64_OpIsNanFloat32x4(v)
- case OpIsNanFloat32x8:
- return rewriteValueAMD64_OpIsNanFloat32x8(v)
- case OpIsNanFloat64x2:
- return rewriteValueAMD64_OpIsNanFloat64x2(v)
- case OpIsNanFloat64x4:
- return rewriteValueAMD64_OpIsNanFloat64x4(v)
- case OpIsNanFloat64x8:
- return rewriteValueAMD64_OpIsNanFloat64x8(v)
+ case OpIsNaNFloat32x16:
+ return rewriteValueAMD64_OpIsNaNFloat32x16(v)
+ case OpIsNaNFloat32x4:
+ return rewriteValueAMD64_OpIsNaNFloat32x4(v)
+ case OpIsNaNFloat32x8:
+ return rewriteValueAMD64_OpIsNaNFloat32x8(v)
+ case OpIsNaNFloat64x2:
+ return rewriteValueAMD64_OpIsNaNFloat64x2(v)
+ case OpIsNaNFloat64x4:
+ return rewriteValueAMD64_OpIsNaNFloat64x4(v)
+ case OpIsNaNFloat64x8:
+ return rewriteValueAMD64_OpIsNaNFloat64x8(v)
case OpIsNonNil:
return rewriteValueAMD64_OpIsNonNil(v)
case OpIsSliceInBounds:
@@ -70957,94 +70957,82 @@ func rewriteValueAMD64_OpIsInBounds(v *Value) bool {
return true
}
}
-func rewriteValueAMD64_OpIsNanFloat32x16(v *Value) bool {
- v_1 := v.Args[1]
+func rewriteValueAMD64_OpIsNaNFloat32x16(v *Value) bool {
v_0 := v.Args[0]
b := v.Block
typ := &b.Func.Config.Types
- // match: (IsNanFloat32x16 x y)
- // result: (VPMOVMToVec32x16 (VCMPPS512 [3] x y))
+ // match: (IsNaNFloat32x16 x)
+ // result: (VPMOVMToVec32x16 (VCMPPS512 [3] x x))
for {
x := v_0
- y := v_1
v.reset(OpAMD64VPMOVMToVec32x16)
v0 := b.NewValue0(v.Pos, OpAMD64VCMPPS512, typ.Mask)
v0.AuxInt = uint8ToAuxInt(3)
- v0.AddArg2(x, y)
+ v0.AddArg2(x, x)
v.AddArg(v0)
return true
}
}
-func rewriteValueAMD64_OpIsNanFloat32x4(v *Value) bool {
- v_1 := v.Args[1]
+func rewriteValueAMD64_OpIsNaNFloat32x4(v *Value) bool {
v_0 := v.Args[0]
- // match: (IsNanFloat32x4 x y)
- // result: (VCMPPS128 [3] x y)
+ // match: (IsNaNFloat32x4 x)
+ // result: (VCMPPS128 [3] x x)
for {
x := v_0
- y := v_1
v.reset(OpAMD64VCMPPS128)
v.AuxInt = uint8ToAuxInt(3)
- v.AddArg2(x, y)
+ v.AddArg2(x, x)
return true
}
}
-func rewriteValueAMD64_OpIsNanFloat32x8(v *Value) bool {
- v_1 := v.Args[1]
+func rewriteValueAMD64_OpIsNaNFloat32x8(v *Value) bool {
v_0 := v.Args[0]
- // match: (IsNanFloat32x8 x y)
- // result: (VCMPPS256 [3] x y)
+ // match: (IsNaNFloat32x8 x)
+ // result: (VCMPPS256 [3] x x)
for {
x := v_0
- y := v_1
v.reset(OpAMD64VCMPPS256)
v.AuxInt = uint8ToAuxInt(3)
- v.AddArg2(x, y)
+ v.AddArg2(x, x)
return true
}
}
-func rewriteValueAMD64_OpIsNanFloat64x2(v *Value) bool {
- v_1 := v.Args[1]
+func rewriteValueAMD64_OpIsNaNFloat64x2(v *Value) bool {
v_0 := v.Args[0]
- // match: (IsNanFloat64x2 x y)
- // result: (VCMPPD128 [3] x y)
+ // match: (IsNaNFloat64x2 x)
+ // result: (VCMPPD128 [3] x x)
for {
x := v_0
- y := v_1
v.reset(OpAMD64VCMPPD128)
v.AuxInt = uint8ToAuxInt(3)
- v.AddArg2(x, y)
+ v.AddArg2(x, x)
return true
}
}
-func rewriteValueAMD64_OpIsNanFloat64x4(v *Value) bool {
- v_1 := v.Args[1]
+func rewriteValueAMD64_OpIsNaNFloat64x4(v *Value) bool {
v_0 := v.Args[0]
- // match: (IsNanFloat64x4 x y)
- // result: (VCMPPD256 [3] x y)
+ // match: (IsNaNFloat64x4 x)
+ // result: (VCMPPD256 [3] x x)
for {
x := v_0
- y := v_1
v.reset(OpAMD64VCMPPD256)
v.AuxInt = uint8ToAuxInt(3)
- v.AddArg2(x, y)
+ v.AddArg2(x, x)
return true
}
}
-func rewriteValueAMD64_OpIsNanFloat64x8(v *Value) bool {
- v_1 := v.Args[1]
+func rewriteValueAMD64_OpIsNaNFloat64x8(v *Value) bool {
v_0 := v.Args[0]
b := v.Block
typ := &b.Func.Config.Types
- // match: (IsNanFloat64x8 x y)
- // result: (VPMOVMToVec64x8 (VCMPPD512 [3] x y))
+ // match: (IsNaNFloat64x8 x)
+ // result: (VPMOVMToVec64x8 (VCMPPD512 [3] x x))
for {
x := v_0
- y := v_1
v.reset(OpAMD64VPMOVMToVec64x8)
v0 := b.NewValue0(v.Pos, OpAMD64VCMPPD512, typ.Mask)
v0.AuxInt = uint8ToAuxInt(3)
- v0.AddArg2(x, y)
+ v0.AddArg2(x, x)
v.AddArg(v0)
return true
}
diff --git a/src/cmd/compile/internal/ssagen/intrinsics.go b/src/cmd/compile/internal/ssagen/intrinsics.go
index 4425c5617b..e2eebd783d 100644
--- a/src/cmd/compile/internal/ssagen/intrinsics.go
+++ b/src/cmd/compile/internal/ssagen/intrinsics.go
@@ -1667,6 +1667,12 @@ func initIntrinsics(cfg *intrinsicBuildConfig) {
addF(simdPackage, "Uint16x16.IsZero", opLen1(ssa.OpIsZeroVec, types.Types[types.TBOOL]), sys.AMD64)
addF(simdPackage, "Uint32x8.IsZero", opLen1(ssa.OpIsZeroVec, types.Types[types.TBOOL]), sys.AMD64)
addF(simdPackage, "Uint64x4.IsZero", opLen1(ssa.OpIsZeroVec, types.Types[types.TBOOL]), sys.AMD64)
+ addF(simdPackage, "Float32x4.IsNaN", opLen1(ssa.OpIsNaNFloat32x4, types.TypeVec128), sys.AMD64)
+ addF(simdPackage, "Float32x8.IsNaN", opLen1(ssa.OpIsNaNFloat32x8, types.TypeVec256), sys.AMD64)
+ addF(simdPackage, "Float32x16.IsNaN", opLen1(ssa.OpIsNaNFloat32x16, types.TypeVec512), sys.AMD64)
+ addF(simdPackage, "Float64x2.IsNaN", opLen1(ssa.OpIsNaNFloat64x2, types.TypeVec128), sys.AMD64)
+ addF(simdPackage, "Float64x4.IsNaN", opLen1(ssa.OpIsNaNFloat64x4, types.TypeVec256), sys.AMD64)
+ addF(simdPackage, "Float64x8.IsNaN", opLen1(ssa.OpIsNaNFloat64x8, types.TypeVec512), sys.AMD64)
// sfp4 is intrinsic-if-constant, but otherwise it's complicated enough to just implement in Go.
sfp4 := func(method string, hwop ssa.Op, vectype *types.Type) {
diff --git a/src/cmd/compile/internal/ssagen/simdintrinsics.go b/src/cmd/compile/internal/ssagen/simdintrinsics.go
index 5a95761228..4ad0c6032c 100644
--- a/src/cmd/compile/internal/ssagen/simdintrinsics.go
+++ b/src/cmd/compile/internal/ssagen/simdintrinsics.go
@@ -571,12 +571,6 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
addF(simdPackage, "Uint32x16.InterleaveLoGrouped", opLen2(ssa.OpInterleaveLoGroupedUint32x16, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Uint64x4.InterleaveLoGrouped", opLen2(ssa.OpInterleaveLoGroupedUint64x4, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Uint64x8.InterleaveLoGrouped", opLen2(ssa.OpInterleaveLoGroupedUint64x8, types.TypeVec512), sys.AMD64)
- addF(simdPackage, "Float32x4.IsNan", opLen2(ssa.OpIsNanFloat32x4, types.TypeVec128), sys.AMD64)
- addF(simdPackage, "Float32x8.IsNan", opLen2(ssa.OpIsNanFloat32x8, types.TypeVec256), sys.AMD64)
- addF(simdPackage, "Float32x16.IsNan", opLen2(ssa.OpIsNanFloat32x16, types.TypeVec512), sys.AMD64)
- addF(simdPackage, "Float64x2.IsNan", opLen2(ssa.OpIsNanFloat64x2, types.TypeVec128), sys.AMD64)
- addF(simdPackage, "Float64x4.IsNan", opLen2(ssa.OpIsNanFloat64x4, types.TypeVec256), sys.AMD64)
- addF(simdPackage, "Float64x8.IsNan", opLen2(ssa.OpIsNanFloat64x8, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Int32x4.LeadingZeros", opLen1(ssa.OpLeadingZerosInt32x4, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Int32x8.LeadingZeros", opLen1(ssa.OpLeadingZerosInt32x8, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Int32x16.LeadingZeros", opLen1(ssa.OpLeadingZerosInt32x16, types.TypeVec512), sys.AMD64)