aboutsummaryrefslogtreecommitdiff
path: root/src/cmd/compile/internal
diff options
context:
space:
mode:
authorDavid Chase <drchase@google.com>2025-09-19 13:07:59 -0400
committerDavid Chase <drchase@google.com>2025-09-26 13:11:10 -0700
commit25c36b95d1523f22d4c46ec237acc03e00540e0a (patch)
treed96c9994fe212ab4d5a2f11d62bd13f4b340365d /src/cmd/compile/internal
parentf0e281e693685954df71374c9a9fb856e8745519 (diff)
downloadgo-25c36b95d1523f22d4c46ec237acc03e00540e0a.tar.xz
[dev.simd] simd, cmd/compile: add 128 bit select-from-pair
Using this name until a better one appears: x.Select128FromPair(3, 2, y) Includes test for constant and variable case. Checks for unexpected immediates (using the zeroing flag, which is not supported for this intrinsic) and panics. Change-Id: I9249475d6572968c127b4ee9e00328d717c07578 Reviewed-on: https://go-review.googlesource.com/c/go/+/705496 Reviewed-by: Junyang Shao <shaojunyang@google.com> LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Diffstat (limited to 'src/cmd/compile/internal')
-rw-r--r--src/cmd/compile/internal/amd64/simdssa.go2
-rw-r--r--src/cmd/compile/internal/ir/symtab.go1
-rw-r--r--src/cmd/compile/internal/ssa/_gen/simdAMD64.rules6
-rw-r--r--src/cmd/compile/internal/ssa/_gen/simdAMD64ops.go2
-rw-r--r--src/cmd/compile/internal/ssa/_gen/simdgenericOps.go6
-rw-r--r--src/cmd/compile/internal/ssa/opGen.go74
-rw-r--r--src/cmd/compile/internal/ssa/rewriteAMD64.go18
-rw-r--r--src/cmd/compile/internal/ssagen/intrinsics.go26
-rw-r--r--src/cmd/compile/internal/ssagen/simdintrinsics.go6
-rw-r--r--src/cmd/compile/internal/ssagen/ssa.go1
10 files changed, 141 insertions, 1 deletions
diff --git a/src/cmd/compile/internal/amd64/simdssa.go b/src/cmd/compile/internal/amd64/simdssa.go
index d69740cd96..a4d2452435 100644
--- a/src/cmd/compile/internal/amd64/simdssa.go
+++ b/src/cmd/compile/internal/amd64/simdssa.go
@@ -1053,6 +1053,8 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
ssa.OpAMD64VGF2P8AFFINEINVQB128,
ssa.OpAMD64VGF2P8AFFINEINVQB256,
ssa.OpAMD64VGF2P8AFFINEINVQB512,
+ ssa.OpAMD64VPERM2F128256,
+ ssa.OpAMD64VPERM2I128256,
ssa.OpAMD64VINSERTF128256,
ssa.OpAMD64VINSERTF64X4512,
ssa.OpAMD64VINSERTI128256,
diff --git a/src/cmd/compile/internal/ir/symtab.go b/src/cmd/compile/internal/ir/symtab.go
index 2222a5444a..0cfa2a2262 100644
--- a/src/cmd/compile/internal/ir/symtab.go
+++ b/src/cmd/compile/internal/ir/symtab.go
@@ -45,6 +45,7 @@ type symsStruct struct {
PanicdottypeI *obj.LSym
Panicnildottype *obj.LSym
Panicoverflow *obj.LSym
+ PanicSimdImm *obj.LSym
Racefuncenter *obj.LSym
Racefuncexit *obj.LSym
Raceread *obj.LSym
diff --git a/src/cmd/compile/internal/ssa/_gen/simdAMD64.rules b/src/cmd/compile/internal/ssa/_gen/simdAMD64.rules
index 9db223c04f..1eab8b5e6d 100644
--- a/src/cmd/compile/internal/ssa/_gen/simdAMD64.rules
+++ b/src/cmd/compile/internal/ssa/_gen/simdAMD64.rules
@@ -938,6 +938,12 @@
(ScaleFloat64x2 ...) => (VSCALEFPD128 ...)
(ScaleFloat64x4 ...) => (VSCALEFPD256 ...)
(ScaleFloat64x8 ...) => (VSCALEFPD512 ...)
+(Select128FromPairFloat32x8 ...) => (VPERM2F128256 ...)
+(Select128FromPairFloat64x4 ...) => (VPERM2F128256 ...)
+(Select128FromPairInt32x8 ...) => (VPERM2I128256 ...)
+(Select128FromPairInt64x4 ...) => (VPERM2I128256 ...)
+(Select128FromPairUint32x8 ...) => (VPERM2I128256 ...)
+(Select128FromPairUint64x4 ...) => (VPERM2I128256 ...)
(SetElemFloat32x4 ...) => (VPINSRD128 ...)
(SetElemFloat64x2 ...) => (VPINSRQ128 ...)
(SetElemInt8x16 ...) => (VPINSRB128 ...)
diff --git a/src/cmd/compile/internal/ssa/_gen/simdAMD64ops.go b/src/cmd/compile/internal/ssa/_gen/simdAMD64ops.go
index ba91fb3fc9..5e1da3249f 100644
--- a/src/cmd/compile/internal/ssa/_gen/simdAMD64ops.go
+++ b/src/cmd/compile/internal/ssa/_gen/simdAMD64ops.go
@@ -1212,6 +1212,8 @@ func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vf
{name: "VPRORQMasked128", argLength: 2, reg: wkw, asm: "VPRORQ", aux: "UInt8", commutative: false, typ: "Vec128", resultInArg0: false},
{name: "VPRORQMasked256", argLength: 2, reg: wkw, asm: "VPRORQ", aux: "UInt8", commutative: false, typ: "Vec256", resultInArg0: false},
{name: "VPRORQMasked512", argLength: 2, reg: wkw, asm: "VPRORQ", aux: "UInt8", commutative: false, typ: "Vec512", resultInArg0: false},
+ {name: "VPERM2F128256", argLength: 2, reg: v21, asm: "VPERM2F128", aux: "UInt8", commutative: false, typ: "Vec256", resultInArg0: false},
+ {name: "VPERM2I128256", argLength: 2, reg: v21, asm: "VPERM2I128", aux: "UInt8", commutative: false, typ: "Vec256", resultInArg0: false},
{name: "VPINSRD128", argLength: 2, reg: vgpv, asm: "VPINSRD", aux: "UInt8", commutative: false, typ: "Vec128", resultInArg0: false},
{name: "VPINSRQ128", argLength: 2, reg: vgpv, asm: "VPINSRQ", aux: "UInt8", commutative: false, typ: "Vec128", resultInArg0: false},
{name: "VPINSRB128", argLength: 2, reg: vgpv, asm: "VPINSRB", aux: "UInt8", commutative: false, typ: "Vec128", resultInArg0: false},
diff --git a/src/cmd/compile/internal/ssa/_gen/simdgenericOps.go b/src/cmd/compile/internal/ssa/_gen/simdgenericOps.go
index 81a1dff137..aa088dbf0b 100644
--- a/src/cmd/compile/internal/ssa/_gen/simdgenericOps.go
+++ b/src/cmd/compile/internal/ssa/_gen/simdgenericOps.go
@@ -1199,6 +1199,12 @@ func simdGenericOps() []opData {
{name: "RoundToEvenScaledResidueFloat64x2", argLength: 1, commutative: false, aux: "UInt8"},
{name: "RoundToEvenScaledResidueFloat64x4", argLength: 1, commutative: false, aux: "UInt8"},
{name: "RoundToEvenScaledResidueFloat64x8", argLength: 1, commutative: false, aux: "UInt8"},
+ {name: "Select128FromPairFloat32x8", argLength: 2, commutative: false, aux: "UInt8"},
+ {name: "Select128FromPairFloat64x4", argLength: 2, commutative: false, aux: "UInt8"},
+ {name: "Select128FromPairInt32x8", argLength: 2, commutative: false, aux: "UInt8"},
+ {name: "Select128FromPairInt64x4", argLength: 2, commutative: false, aux: "UInt8"},
+ {name: "Select128FromPairUint32x8", argLength: 2, commutative: false, aux: "UInt8"},
+ {name: "Select128FromPairUint64x4", argLength: 2, commutative: false, aux: "UInt8"},
{name: "SetElemFloat32x4", argLength: 2, commutative: false, aux: "UInt8"},
{name: "SetElemFloat64x2", argLength: 2, commutative: false, aux: "UInt8"},
{name: "SetElemInt8x16", argLength: 2, commutative: false, aux: "UInt8"},
diff --git a/src/cmd/compile/internal/ssa/opGen.go b/src/cmd/compile/internal/ssa/opGen.go
index 792a1ca08f..105d1a803c 100644
--- a/src/cmd/compile/internal/ssa/opGen.go
+++ b/src/cmd/compile/internal/ssa/opGen.go
@@ -2444,6 +2444,8 @@ const (
OpAMD64VPRORQMasked128
OpAMD64VPRORQMasked256
OpAMD64VPRORQMasked512
+ OpAMD64VPERM2F128256
+ OpAMD64VPERM2I128256
OpAMD64VPINSRD128
OpAMD64VPINSRQ128
OpAMD64VPINSRB128
@@ -6594,6 +6596,12 @@ const (
OpRoundToEvenScaledResidueFloat64x2
OpRoundToEvenScaledResidueFloat64x4
OpRoundToEvenScaledResidueFloat64x8
+ OpSelect128FromPairFloat32x8
+ OpSelect128FromPairFloat64x4
+ OpSelect128FromPairInt32x8
+ OpSelect128FromPairInt64x4
+ OpSelect128FromPairUint32x8
+ OpSelect128FromPairUint64x4
OpSetElemFloat32x4
OpSetElemFloat64x2
OpSetElemInt8x16
@@ -37657,6 +37665,36 @@ var opcodeTable = [...]opInfo{
},
},
{
+ name: "VPERM2F128256",
+ auxType: auxUInt8,
+ argLen: 2,
+ asm: x86.AVPERM2F128,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, 4294901760}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15
+ {1, 4294901760}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15
+ },
+ outputs: []outputInfo{
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ },
+ },
+ {
+ name: "VPERM2I128256",
+ auxType: auxUInt8,
+ argLen: 2,
+ asm: x86.AVPERM2I128,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, 4294901760}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15
+ {1, 4294901760}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15
+ },
+ outputs: []outputInfo{
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ },
+ },
+ {
name: "VPINSRD128",
auxType: auxUInt8,
argLen: 2,
@@ -82361,6 +82399,42 @@ var opcodeTable = [...]opInfo{
generic: true,
},
{
+ name: "Select128FromPairFloat32x8",
+ auxType: auxUInt8,
+ argLen: 2,
+ generic: true,
+ },
+ {
+ name: "Select128FromPairFloat64x4",
+ auxType: auxUInt8,
+ argLen: 2,
+ generic: true,
+ },
+ {
+ name: "Select128FromPairInt32x8",
+ auxType: auxUInt8,
+ argLen: 2,
+ generic: true,
+ },
+ {
+ name: "Select128FromPairInt64x4",
+ auxType: auxUInt8,
+ argLen: 2,
+ generic: true,
+ },
+ {
+ name: "Select128FromPairUint32x8",
+ auxType: auxUInt8,
+ argLen: 2,
+ generic: true,
+ },
+ {
+ name: "Select128FromPairUint64x4",
+ auxType: auxUInt8,
+ argLen: 2,
+ generic: true,
+ },
+ {
name: "SetElemFloat32x4",
auxType: auxUInt8,
argLen: 2,
diff --git a/src/cmd/compile/internal/ssa/rewriteAMD64.go b/src/cmd/compile/internal/ssa/rewriteAMD64.go
index ca9f9ae17b..bc611fc44c 100644
--- a/src/cmd/compile/internal/ssa/rewriteAMD64.go
+++ b/src/cmd/compile/internal/ssa/rewriteAMD64.go
@@ -4991,6 +4991,24 @@ func rewriteValueAMD64(v *Value) bool {
return rewriteValueAMD64_OpSelect0(v)
case OpSelect1:
return rewriteValueAMD64_OpSelect1(v)
+ case OpSelect128FromPairFloat32x8:
+ v.Op = OpAMD64VPERM2F128256
+ return true
+ case OpSelect128FromPairFloat64x4:
+ v.Op = OpAMD64VPERM2F128256
+ return true
+ case OpSelect128FromPairInt32x8:
+ v.Op = OpAMD64VPERM2I128256
+ return true
+ case OpSelect128FromPairInt64x4:
+ v.Op = OpAMD64VPERM2I128256
+ return true
+ case OpSelect128FromPairUint32x8:
+ v.Op = OpAMD64VPERM2I128256
+ return true
+ case OpSelect128FromPairUint64x4:
+ v.Op = OpAMD64VPERM2I128256
+ return true
case OpSelectN:
return rewriteValueAMD64_OpSelectN(v)
case OpSetElemFloat32x4:
diff --git a/src/cmd/compile/internal/ssagen/intrinsics.go b/src/cmd/compile/internal/ssagen/intrinsics.go
index 985d899a71..4c5cd9ef2c 100644
--- a/src/cmd/compile/internal/ssagen/intrinsics.go
+++ b/src/cmd/compile/internal/ssagen/intrinsics.go
@@ -1842,7 +1842,9 @@ func immJumpTable(s *state, idx *ssa.Value, intrinsicCall *ir.CallExpr, genOp fu
for i, t := range targets {
s.startBlock(t)
genOp(s, i)
- t.AddEdgeTo(bEnd)
+ if t.Kind != ssa.BlockExit {
+ t.AddEdgeTo(bEnd)
+ }
s.endBlock()
}
@@ -1899,6 +1901,28 @@ func opLen2Imm8_2I(op ssa.Op, t *types.Type, offset int) func(s *state, n *ir.Ca
}
}
+// Two immediates instead of just 1. Offset is ignored, so it is a _ parameter instead.
+func opLen2Imm8_II(op ssa.Op, t *types.Type, _ int) func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
+ return func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
+ if args[1].Op == ssa.OpConst8 && args[2].Op == ssa.OpConst8 && args[1].AuxInt & ^3 == 0 && args[2].AuxInt & ^3 == 0 {
+ i1, i2 := args[1].AuxInt, args[2].AuxInt
+ return s.newValue2I(op, t, i1+i2<<4, args[0], args[3])
+ }
+ four := s.constInt64(types.Types[types.TUINT8], 4)
+ shifted := s.newValue2(ssa.OpLsh8x8, types.Types[types.TUINT8], args[2], four)
+ combined := s.newValue2(ssa.OpAdd8, types.Types[types.TUINT8], args[1], shifted)
+ return immJumpTable(s, combined, n, func(sNew *state, idx int) {
+ // Encode as int8 due to requirement of AuxInt, check its comment for details.
+ // TODO for "zeroing" values, panic instead.
+ if idx & ^(3+3<<4) == 0 {
+ s.vars[n] = sNew.newValue2I(op, t, int64(int8(idx)), args[0], args[3])
+ } else {
+ sNew.rtcall(ir.Syms.PanicSimdImm, false, nil)
+ }
+ })
+ }
+}
+
func opLen3Imm8_2I(op ssa.Op, t *types.Type, offset int) func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
return func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
if args[2].Op == ssa.OpConst8 {
diff --git a/src/cmd/compile/internal/ssagen/simdintrinsics.go b/src/cmd/compile/internal/ssagen/simdintrinsics.go
index 41858a7745..a62b3882c3 100644
--- a/src/cmd/compile/internal/ssagen/simdintrinsics.go
+++ b/src/cmd/compile/internal/ssagen/simdintrinsics.go
@@ -950,6 +950,12 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
addF(simdPackage, "Float64x2.Scale", opLen2(ssa.OpScaleFloat64x2, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Float64x4.Scale", opLen2(ssa.OpScaleFloat64x4, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Float64x8.Scale", opLen2(ssa.OpScaleFloat64x8, types.TypeVec512), sys.AMD64)
+ addF(simdPackage, "Float32x8.Select128FromPair", opLen2Imm8_II(ssa.OpSelect128FromPairFloat32x8, types.TypeVec256, 0), sys.AMD64)
+ addF(simdPackage, "Float64x4.Select128FromPair", opLen2Imm8_II(ssa.OpSelect128FromPairFloat64x4, types.TypeVec256, 0), sys.AMD64)
+ addF(simdPackage, "Int32x8.Select128FromPair", opLen2Imm8_II(ssa.OpSelect128FromPairInt32x8, types.TypeVec256, 0), sys.AMD64)
+ addF(simdPackage, "Int64x4.Select128FromPair", opLen2Imm8_II(ssa.OpSelect128FromPairInt64x4, types.TypeVec256, 0), sys.AMD64)
+ addF(simdPackage, "Uint32x8.Select128FromPair", opLen2Imm8_II(ssa.OpSelect128FromPairUint32x8, types.TypeVec256, 0), sys.AMD64)
+ addF(simdPackage, "Uint64x4.Select128FromPair", opLen2Imm8_II(ssa.OpSelect128FromPairUint64x4, types.TypeVec256, 0), sys.AMD64)
addF(simdPackage, "Float32x4.SetElem", opLen2Imm8(ssa.OpSetElemFloat32x4, types.TypeVec128, 0), sys.AMD64)
addF(simdPackage, "Float64x2.SetElem", opLen2Imm8(ssa.OpSetElemFloat64x2, types.TypeVec128, 0), sys.AMD64)
addF(simdPackage, "Int8x16.SetElem", opLen2Imm8(ssa.OpSetElemInt8x16, types.TypeVec128, 0), sys.AMD64)
diff --git a/src/cmd/compile/internal/ssagen/ssa.go b/src/cmd/compile/internal/ssagen/ssa.go
index 57129817f6..37aad360f2 100644
--- a/src/cmd/compile/internal/ssagen/ssa.go
+++ b/src/cmd/compile/internal/ssagen/ssa.go
@@ -141,6 +141,7 @@ func InitConfig() {
ir.Syms.Panicnildottype = typecheck.LookupRuntimeFunc("panicnildottype")
ir.Syms.Panicoverflow = typecheck.LookupRuntimeFunc("panicoverflow")
ir.Syms.Panicshift = typecheck.LookupRuntimeFunc("panicshift")
+ ir.Syms.PanicSimdImm = typecheck.LookupRuntimeFunc("panicSimdImm")
ir.Syms.Racefuncenter = typecheck.LookupRuntimeFunc("racefuncenter")
ir.Syms.Racefuncexit = typecheck.LookupRuntimeFunc("racefuncexit")
ir.Syms.Raceread = typecheck.LookupRuntimeFunc("raceread")