aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJunyang Shao <shaojunyang@google.com>2026-01-08 18:14:02 +0000
committerJunyang Shao <shaojunyang@google.com>2026-01-08 18:14:02 +0000
commit216d147a6de31c46d46857096d65c1c97c2ab25c (patch)
tree798f6094df8a12e9314c96f1e70b29e06407f04d
parentb8191a2f9893220bdbe52ecebb37e293847d98f5 (diff)
parent6b2505c79cb3838c6e27cf47ac09980fe51c83c2 (diff)
downloadgo-216d147a6de31c46d46857096d65c1c97c2ab25c.tar.xz
[release-branch.go1.26] all: merge master (6b2505c) into release-branch.go1.26
Merge List: + 2026-01-08 6b2505c79c cmd/go: remove user-content from doc strings in cgo ASTs. + 2026-01-08 4b89bcb8b7 lib/fips140: freeze v1.26.0 FIPS 140-3 module + 2026-01-08 8ac4477d83 simd/archsimd: rename Broadcast methods + 2026-01-08 5facb3b24b internal/types: add test for cycles in value context + 2026-01-07 28147b5283 cmd/go: guarantee a minimum of min(4,GOMAXPROCS) to compile -c + 2026-01-07 874d8b98eb cmd/go/internal/work: decrement concurrentProcesses when action finishes + 2026-01-07 d1e7f49e3d internal/trace: fix recorder.Write return value for header-only buffers Change-Id: I863375a1ac0f641b0b02968acf01a602b7d7f2a1
-rw-r--r--lib/fips140/fips140.sum2
-rw-r--r--lib/fips140/v1.26.0.zip (renamed from lib/fips140/v1.1.0-rc1.zip)bin678896 -> 676132 bytes
-rw-r--r--src/cmd/cgo/ast.go11
-rw-r--r--src/cmd/compile/internal/amd64/simdssa.go88
-rw-r--r--src/cmd/compile/internal/ssa/_gen/simdAMD64.rules82
-rw-r--r--src/cmd/compile/internal/ssa/_gen/simdgenericOps.go60
-rw-r--r--src/cmd/compile/internal/ssa/opGen.go120
-rw-r--r--src/cmd/compile/internal/ssa/rewriteAMD64.go112
-rw-r--r--src/cmd/compile/internal/ssagen/simdintrinsics.go60
-rw-r--r--src/cmd/go/internal/work/exec.go5
-rw-r--r--src/cmd/go/internal/work/gc.go20
-rw-r--r--src/internal/types/testdata/check/cycles6.go71
-rw-r--r--src/runtime/trace/recorder.go2
-rw-r--r--src/simd/archsimd/_gen/simdgen/ops/Moves/categories.yaml33
-rw-r--r--src/simd/archsimd/_gen/simdgen/ops/Moves/go.yaml79
-rw-r--r--src/simd/archsimd/_gen/tmplgen/main.go2
-rw-r--r--src/simd/archsimd/ops_amd64.go240
-rw-r--r--src/simd/archsimd/other_gen_amd64.go60
18 files changed, 584 insertions, 463 deletions
diff --git a/lib/fips140/fips140.sum b/lib/fips140/fips140.sum
index c4d185da73..050957af60 100644
--- a/lib/fips140/fips140.sum
+++ b/lib/fips140/fips140.sum
@@ -10,4 +10,4 @@
# go test cmd/go/internal/fips140 -update
#
v1.0.0-c2097c7c.zip daf3614e0406f67ae6323c902db3f953a1effb199142362a039e7526dfb9368b
-v1.1.0-rc1.zip ea94f8c3885294c9efe1bd8f9b6e86daeb25b6aff2aeb20707cd9a5101f6f54e
+v1.26.0.zip 9b28f847fdf1db4a36cb2b2f8ec09443c039383f085630a03ecfaddf6db7ea23
diff --git a/lib/fips140/v1.1.0-rc1.zip b/lib/fips140/v1.26.0.zip
index d4264bdb2e..f53ade8036 100644
--- a/lib/fips140/v1.1.0-rc1.zip
+++ b/lib/fips140/v1.26.0.zip
Binary files differ
diff --git a/src/cmd/cgo/ast.go b/src/cmd/cgo/ast.go
index 2da6ca5a30..df0552f525 100644
--- a/src/cmd/cgo/ast.go
+++ b/src/cmd/cgo/ast.go
@@ -301,17 +301,12 @@ func (f *File) saveExport(x any, context astContext) {
error_(c.Pos(), "export comment has wrong name %q, want %q", name, n.Name.Name)
}
- doc := ""
- for _, c1 := range n.Doc.List {
- if c1 != c {
- doc += c1.Text + "\n"
- }
- }
-
f.ExpFunc = append(f.ExpFunc, &ExpFunc{
Func: n,
ExpName: name,
- Doc: doc,
+ // Caution: Do not set the Doc field on purpose
+ // to ensure that there are no unintended artifacts
+ // in the binary. See https://go.dev/issue/76697.
})
break
}
diff --git a/src/cmd/compile/internal/amd64/simdssa.go b/src/cmd/compile/internal/amd64/simdssa.go
index c4d0fd69c6..a028cbe86d 100644
--- a/src/cmd/compile/internal/amd64/simdssa.go
+++ b/src/cmd/compile/internal/amd64/simdssa.go
@@ -25,23 +25,23 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
ssa.OpAMD64VPABSQ128,
ssa.OpAMD64VPABSQ256,
ssa.OpAMD64VPABSQ512,
- ssa.OpAMD64VBROADCASTSS128,
ssa.OpAMD64VPBROADCASTQ128,
- ssa.OpAMD64VPBROADCASTB128,
- ssa.OpAMD64VPBROADCASTW128,
+ ssa.OpAMD64VBROADCASTSS128,
+ ssa.OpAMD64VBROADCASTSD256,
ssa.OpAMD64VPBROADCASTD128,
+ ssa.OpAMD64VPBROADCASTQ256,
ssa.OpAMD64VBROADCASTSS256,
- ssa.OpAMD64VBROADCASTSD256,
- ssa.OpAMD64VPBROADCASTB256,
- ssa.OpAMD64VPBROADCASTW256,
+ ssa.OpAMD64VBROADCASTSD512,
+ ssa.OpAMD64VPBROADCASTW128,
ssa.OpAMD64VPBROADCASTD256,
- ssa.OpAMD64VPBROADCASTQ256,
+ ssa.OpAMD64VPBROADCASTQ512,
ssa.OpAMD64VBROADCASTSS512,
- ssa.OpAMD64VBROADCASTSD512,
- ssa.OpAMD64VPBROADCASTB512,
- ssa.OpAMD64VPBROADCASTW512,
+ ssa.OpAMD64VPBROADCASTB128,
+ ssa.OpAMD64VPBROADCASTW256,
ssa.OpAMD64VPBROADCASTD512,
- ssa.OpAMD64VPBROADCASTQ512,
+ ssa.OpAMD64VPBROADCASTB256,
+ ssa.OpAMD64VPBROADCASTW512,
+ ssa.OpAMD64VPBROADCASTB512,
ssa.OpAMD64VCVTPD2PSX128,
ssa.OpAMD64VCVTPD2PSY128,
ssa.OpAMD64VCVTPD2PS256,
@@ -832,23 +832,23 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
ssa.OpAMD64VPABSQMasked128,
ssa.OpAMD64VPABSQMasked256,
ssa.OpAMD64VPABSQMasked512,
- ssa.OpAMD64VBROADCASTSSMasked128,
ssa.OpAMD64VPBROADCASTQMasked128,
- ssa.OpAMD64VPBROADCASTBMasked128,
- ssa.OpAMD64VPBROADCASTWMasked128,
+ ssa.OpAMD64VBROADCASTSSMasked128,
+ ssa.OpAMD64VBROADCASTSDMasked256,
ssa.OpAMD64VPBROADCASTDMasked128,
+ ssa.OpAMD64VPBROADCASTQMasked256,
ssa.OpAMD64VBROADCASTSSMasked256,
- ssa.OpAMD64VBROADCASTSDMasked256,
- ssa.OpAMD64VPBROADCASTBMasked256,
- ssa.OpAMD64VPBROADCASTWMasked256,
+ ssa.OpAMD64VBROADCASTSDMasked512,
+ ssa.OpAMD64VPBROADCASTWMasked128,
ssa.OpAMD64VPBROADCASTDMasked256,
- ssa.OpAMD64VPBROADCASTQMasked256,
+ ssa.OpAMD64VPBROADCASTQMasked512,
ssa.OpAMD64VBROADCASTSSMasked512,
- ssa.OpAMD64VBROADCASTSDMasked512,
- ssa.OpAMD64VPBROADCASTBMasked512,
- ssa.OpAMD64VPBROADCASTWMasked512,
+ ssa.OpAMD64VPBROADCASTBMasked128,
+ ssa.OpAMD64VPBROADCASTWMasked256,
ssa.OpAMD64VPBROADCASTDMasked512,
- ssa.OpAMD64VPBROADCASTQMasked512,
+ ssa.OpAMD64VPBROADCASTBMasked256,
+ ssa.OpAMD64VPBROADCASTWMasked512,
+ ssa.OpAMD64VPBROADCASTBMasked512,
ssa.OpAMD64VCOMPRESSPSMasked128,
ssa.OpAMD64VCOMPRESSPSMasked256,
ssa.OpAMD64VCOMPRESSPSMasked512,
@@ -2460,23 +2460,23 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
ssa.OpAMD64VPABSQMasked128Merging,
ssa.OpAMD64VPABSQMasked256Merging,
ssa.OpAMD64VPABSQMasked512Merging,
- ssa.OpAMD64VBROADCASTSSMasked128Merging,
ssa.OpAMD64VPBROADCASTQMasked128Merging,
- ssa.OpAMD64VPBROADCASTBMasked128Merging,
- ssa.OpAMD64VPBROADCASTWMasked128Merging,
+ ssa.OpAMD64VBROADCASTSSMasked128Merging,
+ ssa.OpAMD64VBROADCASTSDMasked256Merging,
ssa.OpAMD64VPBROADCASTDMasked128Merging,
+ ssa.OpAMD64VPBROADCASTQMasked256Merging,
ssa.OpAMD64VBROADCASTSSMasked256Merging,
- ssa.OpAMD64VBROADCASTSDMasked256Merging,
- ssa.OpAMD64VPBROADCASTBMasked256Merging,
- ssa.OpAMD64VPBROADCASTWMasked256Merging,
+ ssa.OpAMD64VBROADCASTSDMasked512Merging,
+ ssa.OpAMD64VPBROADCASTWMasked128Merging,
ssa.OpAMD64VPBROADCASTDMasked256Merging,
- ssa.OpAMD64VPBROADCASTQMasked256Merging,
+ ssa.OpAMD64VPBROADCASTQMasked512Merging,
ssa.OpAMD64VBROADCASTSSMasked512Merging,
- ssa.OpAMD64VBROADCASTSDMasked512Merging,
- ssa.OpAMD64VPBROADCASTBMasked512Merging,
- ssa.OpAMD64VPBROADCASTWMasked512Merging,
+ ssa.OpAMD64VPBROADCASTBMasked128Merging,
+ ssa.OpAMD64VPBROADCASTWMasked256Merging,
ssa.OpAMD64VPBROADCASTDMasked512Merging,
- ssa.OpAMD64VPBROADCASTQMasked512Merging,
+ ssa.OpAMD64VPBROADCASTBMasked256Merging,
+ ssa.OpAMD64VPBROADCASTWMasked512Merging,
+ ssa.OpAMD64VPBROADCASTBMasked512Merging,
ssa.OpAMD64VRNDSCALEPSMasked128Merging,
ssa.OpAMD64VRNDSCALEPSMasked256Merging,
ssa.OpAMD64VRNDSCALEPSMasked512Merging,
@@ -2817,23 +2817,23 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
ssa.OpAMD64VPAVGWMasked128,
ssa.OpAMD64VPAVGWMasked256,
ssa.OpAMD64VPAVGWMasked512,
- ssa.OpAMD64VBROADCASTSSMasked128,
ssa.OpAMD64VPBROADCASTQMasked128,
- ssa.OpAMD64VPBROADCASTBMasked128,
- ssa.OpAMD64VPBROADCASTWMasked128,
+ ssa.OpAMD64VBROADCASTSSMasked128,
+ ssa.OpAMD64VBROADCASTSDMasked256,
ssa.OpAMD64VPBROADCASTDMasked128,
+ ssa.OpAMD64VPBROADCASTQMasked256,
ssa.OpAMD64VBROADCASTSSMasked256,
- ssa.OpAMD64VBROADCASTSDMasked256,
- ssa.OpAMD64VPBROADCASTBMasked256,
- ssa.OpAMD64VPBROADCASTWMasked256,
+ ssa.OpAMD64VBROADCASTSDMasked512,
+ ssa.OpAMD64VPBROADCASTWMasked128,
ssa.OpAMD64VPBROADCASTDMasked256,
- ssa.OpAMD64VPBROADCASTQMasked256,
+ ssa.OpAMD64VPBROADCASTQMasked512,
ssa.OpAMD64VBROADCASTSSMasked512,
- ssa.OpAMD64VBROADCASTSDMasked512,
- ssa.OpAMD64VPBROADCASTBMasked512,
- ssa.OpAMD64VPBROADCASTWMasked512,
+ ssa.OpAMD64VPBROADCASTBMasked128,
+ ssa.OpAMD64VPBROADCASTWMasked256,
ssa.OpAMD64VPBROADCASTDMasked512,
- ssa.OpAMD64VPBROADCASTQMasked512,
+ ssa.OpAMD64VPBROADCASTBMasked256,
+ ssa.OpAMD64VPBROADCASTWMasked512,
+ ssa.OpAMD64VPBROADCASTBMasked512,
ssa.OpAMD64VRNDSCALEPSMasked128,
ssa.OpAMD64VRNDSCALEPSMasked128load,
ssa.OpAMD64VRNDSCALEPSMasked256,
diff --git a/src/cmd/compile/internal/ssa/_gen/simdAMD64.rules b/src/cmd/compile/internal/ssa/_gen/simdAMD64.rules
index 5c83f39a1f..799461610d 100644
--- a/src/cmd/compile/internal/ssa/_gen/simdAMD64.rules
+++ b/src/cmd/compile/internal/ssa/_gen/simdAMD64.rules
@@ -140,36 +140,36 @@
(AverageUint16x8 ...) => (VPAVGW128 ...)
(AverageUint16x16 ...) => (VPAVGW256 ...)
(AverageUint16x32 ...) => (VPAVGW512 ...)
-(Broadcast128Float32x4 ...) => (VBROADCASTSS128 ...)
-(Broadcast128Float64x2 ...) => (VPBROADCASTQ128 ...)
-(Broadcast128Int8x16 ...) => (VPBROADCASTB128 ...)
-(Broadcast128Int16x8 ...) => (VPBROADCASTW128 ...)
-(Broadcast128Int32x4 ...) => (VPBROADCASTD128 ...)
-(Broadcast128Int64x2 ...) => (VPBROADCASTQ128 ...)
-(Broadcast128Uint8x16 ...) => (VPBROADCASTB128 ...)
-(Broadcast128Uint16x8 ...) => (VPBROADCASTW128 ...)
-(Broadcast128Uint32x4 ...) => (VPBROADCASTD128 ...)
-(Broadcast128Uint64x2 ...) => (VPBROADCASTQ128 ...)
-(Broadcast256Float32x4 ...) => (VBROADCASTSS256 ...)
-(Broadcast256Float64x2 ...) => (VBROADCASTSD256 ...)
-(Broadcast256Int8x16 ...) => (VPBROADCASTB256 ...)
-(Broadcast256Int16x8 ...) => (VPBROADCASTW256 ...)
-(Broadcast256Int32x4 ...) => (VPBROADCASTD256 ...)
-(Broadcast256Int64x2 ...) => (VPBROADCASTQ256 ...)
-(Broadcast256Uint8x16 ...) => (VPBROADCASTB256 ...)
-(Broadcast256Uint16x8 ...) => (VPBROADCASTW256 ...)
-(Broadcast256Uint32x4 ...) => (VPBROADCASTD256 ...)
-(Broadcast256Uint64x2 ...) => (VPBROADCASTQ256 ...)
-(Broadcast512Float32x4 ...) => (VBROADCASTSS512 ...)
-(Broadcast512Float64x2 ...) => (VBROADCASTSD512 ...)
-(Broadcast512Int8x16 ...) => (VPBROADCASTB512 ...)
-(Broadcast512Int16x8 ...) => (VPBROADCASTW512 ...)
-(Broadcast512Int32x4 ...) => (VPBROADCASTD512 ...)
-(Broadcast512Int64x2 ...) => (VPBROADCASTQ512 ...)
-(Broadcast512Uint8x16 ...) => (VPBROADCASTB512 ...)
-(Broadcast512Uint16x8 ...) => (VPBROADCASTW512 ...)
-(Broadcast512Uint32x4 ...) => (VPBROADCASTD512 ...)
-(Broadcast512Uint64x2 ...) => (VPBROADCASTQ512 ...)
+(Broadcast1To2Float64x2 ...) => (VPBROADCASTQ128 ...)
+(Broadcast1To2Int64x2 ...) => (VPBROADCASTQ128 ...)
+(Broadcast1To2Uint64x2 ...) => (VPBROADCASTQ128 ...)
+(Broadcast1To4Float32x4 ...) => (VBROADCASTSS128 ...)
+(Broadcast1To4Float64x2 ...) => (VBROADCASTSD256 ...)
+(Broadcast1To4Int32x4 ...) => (VPBROADCASTD128 ...)
+(Broadcast1To4Int64x2 ...) => (VPBROADCASTQ256 ...)
+(Broadcast1To4Uint32x4 ...) => (VPBROADCASTD128 ...)
+(Broadcast1To4Uint64x2 ...) => (VPBROADCASTQ256 ...)
+(Broadcast1To8Float32x4 ...) => (VBROADCASTSS256 ...)
+(Broadcast1To8Float64x2 ...) => (VBROADCASTSD512 ...)
+(Broadcast1To8Int16x8 ...) => (VPBROADCASTW128 ...)
+(Broadcast1To8Int32x4 ...) => (VPBROADCASTD256 ...)
+(Broadcast1To8Int64x2 ...) => (VPBROADCASTQ512 ...)
+(Broadcast1To8Uint16x8 ...) => (VPBROADCASTW128 ...)
+(Broadcast1To8Uint32x4 ...) => (VPBROADCASTD256 ...)
+(Broadcast1To8Uint64x2 ...) => (VPBROADCASTQ512 ...)
+(Broadcast1To16Float32x4 ...) => (VBROADCASTSS512 ...)
+(Broadcast1To16Int8x16 ...) => (VPBROADCASTB128 ...)
+(Broadcast1To16Int16x8 ...) => (VPBROADCASTW256 ...)
+(Broadcast1To16Int32x4 ...) => (VPBROADCASTD512 ...)
+(Broadcast1To16Uint8x16 ...) => (VPBROADCASTB128 ...)
+(Broadcast1To16Uint16x8 ...) => (VPBROADCASTW256 ...)
+(Broadcast1To16Uint32x4 ...) => (VPBROADCASTD512 ...)
+(Broadcast1To32Int8x16 ...) => (VPBROADCASTB256 ...)
+(Broadcast1To32Int16x8 ...) => (VPBROADCASTW512 ...)
+(Broadcast1To32Uint8x16 ...) => (VPBROADCASTB256 ...)
+(Broadcast1To32Uint16x8 ...) => (VPBROADCASTW512 ...)
+(Broadcast1To64Int8x16 ...) => (VPBROADCASTB512 ...)
+(Broadcast1To64Uint8x16 ...) => (VPBROADCASTB512 ...)
(CeilFloat32x4 x) => (VROUNDPS128 [2] x)
(CeilFloat32x8 x) => (VROUNDPS256 [2] x)
(CeilFloat64x2 x) => (VROUNDPD128 [2] x)
@@ -1424,23 +1424,23 @@
(VMOVDQU16Masked128 (VPAVGW128 x y) mask) => (VPAVGWMasked128 x y mask)
(VMOVDQU16Masked256 (VPAVGW256 x y) mask) => (VPAVGWMasked256 x y mask)
(VMOVDQU16Masked512 (VPAVGW512 x y) mask) => (VPAVGWMasked512 x y mask)
-(VMOVDQU32Masked128 (VBROADCASTSS128 x) mask) => (VBROADCASTSSMasked128 x mask)
(VMOVDQU64Masked128 (VPBROADCASTQ128 x) mask) => (VPBROADCASTQMasked128 x mask)
-(VMOVDQU8Masked128 (VPBROADCASTB128 x) mask) => (VPBROADCASTBMasked128 x mask)
-(VMOVDQU16Masked128 (VPBROADCASTW128 x) mask) => (VPBROADCASTWMasked128 x mask)
+(VMOVDQU32Masked128 (VBROADCASTSS128 x) mask) => (VBROADCASTSSMasked128 x mask)
+(VMOVDQU64Masked256 (VBROADCASTSD256 x) mask) => (VBROADCASTSDMasked256 x mask)
(VMOVDQU32Masked128 (VPBROADCASTD128 x) mask) => (VPBROADCASTDMasked128 x mask)
+(VMOVDQU64Masked256 (VPBROADCASTQ256 x) mask) => (VPBROADCASTQMasked256 x mask)
(VMOVDQU32Masked256 (VBROADCASTSS256 x) mask) => (VBROADCASTSSMasked256 x mask)
-(VMOVDQU64Masked256 (VBROADCASTSD256 x) mask) => (VBROADCASTSDMasked256 x mask)
-(VMOVDQU8Masked256 (VPBROADCASTB256 x) mask) => (VPBROADCASTBMasked256 x mask)
-(VMOVDQU16Masked256 (VPBROADCASTW256 x) mask) => (VPBROADCASTWMasked256 x mask)
+(VMOVDQU64Masked512 (VBROADCASTSD512 x) mask) => (VBROADCASTSDMasked512 x mask)
+(VMOVDQU16Masked128 (VPBROADCASTW128 x) mask) => (VPBROADCASTWMasked128 x mask)
(VMOVDQU32Masked256 (VPBROADCASTD256 x) mask) => (VPBROADCASTDMasked256 x mask)
-(VMOVDQU64Masked256 (VPBROADCASTQ256 x) mask) => (VPBROADCASTQMasked256 x mask)
+(VMOVDQU64Masked512 (VPBROADCASTQ512 x) mask) => (VPBROADCASTQMasked512 x mask)
(VMOVDQU32Masked512 (VBROADCASTSS512 x) mask) => (VBROADCASTSSMasked512 x mask)
-(VMOVDQU64Masked512 (VBROADCASTSD512 x) mask) => (VBROADCASTSDMasked512 x mask)
-(VMOVDQU8Masked512 (VPBROADCASTB512 x) mask) => (VPBROADCASTBMasked512 x mask)
-(VMOVDQU16Masked512 (VPBROADCASTW512 x) mask) => (VPBROADCASTWMasked512 x mask)
+(VMOVDQU8Masked128 (VPBROADCASTB128 x) mask) => (VPBROADCASTBMasked128 x mask)
+(VMOVDQU16Masked256 (VPBROADCASTW256 x) mask) => (VPBROADCASTWMasked256 x mask)
(VMOVDQU32Masked512 (VPBROADCASTD512 x) mask) => (VPBROADCASTDMasked512 x mask)
-(VMOVDQU64Masked512 (VPBROADCASTQ512 x) mask) => (VPBROADCASTQMasked512 x mask)
+(VMOVDQU8Masked256 (VPBROADCASTB256 x) mask) => (VPBROADCASTBMasked256 x mask)
+(VMOVDQU16Masked512 (VPBROADCASTW512 x) mask) => (VPBROADCASTWMasked512 x mask)
+(VMOVDQU8Masked512 (VPBROADCASTB512 x) mask) => (VPBROADCASTBMasked512 x mask)
(VMOVDQU32Masked128 (VRNDSCALEPS128 [a] x) mask) => (VRNDSCALEPSMasked128 [a] x mask)
(VMOVDQU32Masked256 (VRNDSCALEPS256 [a] x) mask) => (VRNDSCALEPSMasked256 [a] x mask)
(VMOVDQU32Masked512 (VRNDSCALEPS512 [a] x) mask) => (VRNDSCALEPSMasked512 [a] x mask)
diff --git a/src/cmd/compile/internal/ssa/_gen/simdgenericOps.go b/src/cmd/compile/internal/ssa/_gen/simdgenericOps.go
index 889ab0d84f..ff863a389f 100644
--- a/src/cmd/compile/internal/ssa/_gen/simdgenericOps.go
+++ b/src/cmd/compile/internal/ssa/_gen/simdgenericOps.go
@@ -143,36 +143,36 @@ func simdGenericOps() []opData {
{name: "AverageUint16x8", argLength: 2, commutative: true},
{name: "AverageUint16x16", argLength: 2, commutative: true},
{name: "AverageUint16x32", argLength: 2, commutative: true},
- {name: "Broadcast128Float32x4", argLength: 1, commutative: false},
- {name: "Broadcast128Float64x2", argLength: 1, commutative: false},
- {name: "Broadcast128Int8x16", argLength: 1, commutative: false},
- {name: "Broadcast128Int16x8", argLength: 1, commutative: false},
- {name: "Broadcast128Int32x4", argLength: 1, commutative: false},
- {name: "Broadcast128Int64x2", argLength: 1, commutative: false},
- {name: "Broadcast128Uint8x16", argLength: 1, commutative: false},
- {name: "Broadcast128Uint16x8", argLength: 1, commutative: false},
- {name: "Broadcast128Uint32x4", argLength: 1, commutative: false},
- {name: "Broadcast128Uint64x2", argLength: 1, commutative: false},
- {name: "Broadcast256Float32x4", argLength: 1, commutative: false},
- {name: "Broadcast256Float64x2", argLength: 1, commutative: false},
- {name: "Broadcast256Int8x16", argLength: 1, commutative: false},
- {name: "Broadcast256Int16x8", argLength: 1, commutative: false},
- {name: "Broadcast256Int32x4", argLength: 1, commutative: false},
- {name: "Broadcast256Int64x2", argLength: 1, commutative: false},
- {name: "Broadcast256Uint8x16", argLength: 1, commutative: false},
- {name: "Broadcast256Uint16x8", argLength: 1, commutative: false},
- {name: "Broadcast256Uint32x4", argLength: 1, commutative: false},
- {name: "Broadcast256Uint64x2", argLength: 1, commutative: false},
- {name: "Broadcast512Float32x4", argLength: 1, commutative: false},
- {name: "Broadcast512Float64x2", argLength: 1, commutative: false},
- {name: "Broadcast512Int8x16", argLength: 1, commutative: false},
- {name: "Broadcast512Int16x8", argLength: 1, commutative: false},
- {name: "Broadcast512Int32x4", argLength: 1, commutative: false},
- {name: "Broadcast512Int64x2", argLength: 1, commutative: false},
- {name: "Broadcast512Uint8x16", argLength: 1, commutative: false},
- {name: "Broadcast512Uint16x8", argLength: 1, commutative: false},
- {name: "Broadcast512Uint32x4", argLength: 1, commutative: false},
- {name: "Broadcast512Uint64x2", argLength: 1, commutative: false},
+ {name: "Broadcast1To2Float64x2", argLength: 1, commutative: false},
+ {name: "Broadcast1To2Int64x2", argLength: 1, commutative: false},
+ {name: "Broadcast1To2Uint64x2", argLength: 1, commutative: false},
+ {name: "Broadcast1To4Float32x4", argLength: 1, commutative: false},
+ {name: "Broadcast1To4Float64x2", argLength: 1, commutative: false},
+ {name: "Broadcast1To4Int32x4", argLength: 1, commutative: false},
+ {name: "Broadcast1To4Int64x2", argLength: 1, commutative: false},
+ {name: "Broadcast1To4Uint32x4", argLength: 1, commutative: false},
+ {name: "Broadcast1To4Uint64x2", argLength: 1, commutative: false},
+ {name: "Broadcast1To8Float32x4", argLength: 1, commutative: false},
+ {name: "Broadcast1To8Float64x2", argLength: 1, commutative: false},
+ {name: "Broadcast1To8Int16x8", argLength: 1, commutative: false},
+ {name: "Broadcast1To8Int32x4", argLength: 1, commutative: false},
+ {name: "Broadcast1To8Int64x2", argLength: 1, commutative: false},
+ {name: "Broadcast1To8Uint16x8", argLength: 1, commutative: false},
+ {name: "Broadcast1To8Uint32x4", argLength: 1, commutative: false},
+ {name: "Broadcast1To8Uint64x2", argLength: 1, commutative: false},
+ {name: "Broadcast1To16Float32x4", argLength: 1, commutative: false},
+ {name: "Broadcast1To16Int8x16", argLength: 1, commutative: false},
+ {name: "Broadcast1To16Int16x8", argLength: 1, commutative: false},
+ {name: "Broadcast1To16Int32x4", argLength: 1, commutative: false},
+ {name: "Broadcast1To16Uint8x16", argLength: 1, commutative: false},
+ {name: "Broadcast1To16Uint16x8", argLength: 1, commutative: false},
+ {name: "Broadcast1To16Uint32x4", argLength: 1, commutative: false},
+ {name: "Broadcast1To32Int8x16", argLength: 1, commutative: false},
+ {name: "Broadcast1To32Int16x8", argLength: 1, commutative: false},
+ {name: "Broadcast1To32Uint8x16", argLength: 1, commutative: false},
+ {name: "Broadcast1To32Uint16x8", argLength: 1, commutative: false},
+ {name: "Broadcast1To64Int8x16", argLength: 1, commutative: false},
+ {name: "Broadcast1To64Uint8x16", argLength: 1, commutative: false},
{name: "CeilFloat32x4", argLength: 1, commutative: false},
{name: "CeilFloat32x8", argLength: 1, commutative: false},
{name: "CeilFloat64x2", argLength: 1, commutative: false},
diff --git a/src/cmd/compile/internal/ssa/opGen.go b/src/cmd/compile/internal/ssa/opGen.go
index 7b70dc2686..9e5fdb1fc1 100644
--- a/src/cmd/compile/internal/ssa/opGen.go
+++ b/src/cmd/compile/internal/ssa/opGen.go
@@ -6309,36 +6309,36 @@ const (
OpAverageUint16x8
OpAverageUint16x16
OpAverageUint16x32
- OpBroadcast128Float32x4
- OpBroadcast128Float64x2
- OpBroadcast128Int8x16
- OpBroadcast128Int16x8
- OpBroadcast128Int32x4
- OpBroadcast128Int64x2
- OpBroadcast128Uint8x16
- OpBroadcast128Uint16x8
- OpBroadcast128Uint32x4
- OpBroadcast128Uint64x2
- OpBroadcast256Float32x4
- OpBroadcast256Float64x2
- OpBroadcast256Int8x16
- OpBroadcast256Int16x8
- OpBroadcast256Int32x4
- OpBroadcast256Int64x2
- OpBroadcast256Uint8x16
- OpBroadcast256Uint16x8
- OpBroadcast256Uint32x4
- OpBroadcast256Uint64x2
- OpBroadcast512Float32x4
- OpBroadcast512Float64x2
- OpBroadcast512Int8x16
- OpBroadcast512Int16x8
- OpBroadcast512Int32x4
- OpBroadcast512Int64x2
- OpBroadcast512Uint8x16
- OpBroadcast512Uint16x8
- OpBroadcast512Uint32x4
- OpBroadcast512Uint64x2
+ OpBroadcast1To2Float64x2
+ OpBroadcast1To2Int64x2
+ OpBroadcast1To2Uint64x2
+ OpBroadcast1To4Float32x4
+ OpBroadcast1To4Float64x2
+ OpBroadcast1To4Int32x4
+ OpBroadcast1To4Int64x2
+ OpBroadcast1To4Uint32x4
+ OpBroadcast1To4Uint64x2
+ OpBroadcast1To8Float32x4
+ OpBroadcast1To8Float64x2
+ OpBroadcast1To8Int16x8
+ OpBroadcast1To8Int32x4
+ OpBroadcast1To8Int64x2
+ OpBroadcast1To8Uint16x8
+ OpBroadcast1To8Uint32x4
+ OpBroadcast1To8Uint64x2
+ OpBroadcast1To16Float32x4
+ OpBroadcast1To16Int8x16
+ OpBroadcast1To16Int16x8
+ OpBroadcast1To16Int32x4
+ OpBroadcast1To16Uint8x16
+ OpBroadcast1To16Uint16x8
+ OpBroadcast1To16Uint32x4
+ OpBroadcast1To32Int8x16
+ OpBroadcast1To32Int16x8
+ OpBroadcast1To32Uint8x16
+ OpBroadcast1To32Uint16x8
+ OpBroadcast1To64Int8x16
+ OpBroadcast1To64Uint8x16
OpCeilFloat32x4
OpCeilFloat32x8
OpCeilFloat64x2
@@ -89875,152 +89875,152 @@ var opcodeTable = [...]opInfo{
generic: true,
},
{
- name: "Broadcast128Float32x4",
+ name: "Broadcast1To2Float64x2",
argLen: 1,
generic: true,
},
{
- name: "Broadcast128Float64x2",
+ name: "Broadcast1To2Int64x2",
argLen: 1,
generic: true,
},
{
- name: "Broadcast128Int8x16",
+ name: "Broadcast1To2Uint64x2",
argLen: 1,
generic: true,
},
{
- name: "Broadcast128Int16x8",
+ name: "Broadcast1To4Float32x4",
argLen: 1,
generic: true,
},
{
- name: "Broadcast128Int32x4",
+ name: "Broadcast1To4Float64x2",
argLen: 1,
generic: true,
},
{
- name: "Broadcast128Int64x2",
+ name: "Broadcast1To4Int32x4",
argLen: 1,
generic: true,
},
{
- name: "Broadcast128Uint8x16",
+ name: "Broadcast1To4Int64x2",
argLen: 1,
generic: true,
},
{
- name: "Broadcast128Uint16x8",
+ name: "Broadcast1To4Uint32x4",
argLen: 1,
generic: true,
},
{
- name: "Broadcast128Uint32x4",
+ name: "Broadcast1To4Uint64x2",
argLen: 1,
generic: true,
},
{
- name: "Broadcast128Uint64x2",
+ name: "Broadcast1To8Float32x4",
argLen: 1,
generic: true,
},
{
- name: "Broadcast256Float32x4",
+ name: "Broadcast1To8Float64x2",
argLen: 1,
generic: true,
},
{
- name: "Broadcast256Float64x2",
+ name: "Broadcast1To8Int16x8",
argLen: 1,
generic: true,
},
{
- name: "Broadcast256Int8x16",
+ name: "Broadcast1To8Int32x4",
argLen: 1,
generic: true,
},
{
- name: "Broadcast256Int16x8",
+ name: "Broadcast1To8Int64x2",
argLen: 1,
generic: true,
},
{
- name: "Broadcast256Int32x4",
+ name: "Broadcast1To8Uint16x8",
argLen: 1,
generic: true,
},
{
- name: "Broadcast256Int64x2",
+ name: "Broadcast1To8Uint32x4",
argLen: 1,
generic: true,
},
{
- name: "Broadcast256Uint8x16",
+ name: "Broadcast1To8Uint64x2",
argLen: 1,
generic: true,
},
{
- name: "Broadcast256Uint16x8",
+ name: "Broadcast1To16Float32x4",
argLen: 1,
generic: true,
},
{
- name: "Broadcast256Uint32x4",
+ name: "Broadcast1To16Int8x16",
argLen: 1,
generic: true,
},
{
- name: "Broadcast256Uint64x2",
+ name: "Broadcast1To16Int16x8",
argLen: 1,
generic: true,
},
{
- name: "Broadcast512Float32x4",
+ name: "Broadcast1To16Int32x4",
argLen: 1,
generic: true,
},
{
- name: "Broadcast512Float64x2",
+ name: "Broadcast1To16Uint8x16",
argLen: 1,
generic: true,
},
{
- name: "Broadcast512Int8x16",
+ name: "Broadcast1To16Uint16x8",
argLen: 1,
generic: true,
},
{
- name: "Broadcast512Int16x8",
+ name: "Broadcast1To16Uint32x4",
argLen: 1,
generic: true,
},
{
- name: "Broadcast512Int32x4",
+ name: "Broadcast1To32Int8x16",
argLen: 1,
generic: true,
},
{
- name: "Broadcast512Int64x2",
+ name: "Broadcast1To32Int16x8",
argLen: 1,
generic: true,
},
{
- name: "Broadcast512Uint8x16",
+ name: "Broadcast1To32Uint8x16",
argLen: 1,
generic: true,
},
{
- name: "Broadcast512Uint16x8",
+ name: "Broadcast1To32Uint16x8",
argLen: 1,
generic: true,
},
{
- name: "Broadcast512Uint32x4",
+ name: "Broadcast1To64Int8x16",
argLen: 1,
generic: true,
},
{
- name: "Broadcast512Uint64x2",
+ name: "Broadcast1To64Uint8x16",
argLen: 1,
generic: true,
},
diff --git a/src/cmd/compile/internal/ssa/rewriteAMD64.go b/src/cmd/compile/internal/ssa/rewriteAMD64.go
index e84bf19c83..fe0005bb05 100644
--- a/src/cmd/compile/internal/ssa/rewriteAMD64.go
+++ b/src/cmd/compile/internal/ssa/rewriteAMD64.go
@@ -2479,96 +2479,96 @@ func rewriteValueAMD64(v *Value) bool {
return rewriteValueAMD64_OpBitLen64(v)
case OpBitLen8:
return rewriteValueAMD64_OpBitLen8(v)
- case OpBroadcast128Float32x4:
- v.Op = OpAMD64VBROADCASTSS128
+ case OpBroadcast1To16Float32x4:
+ v.Op = OpAMD64VBROADCASTSS512
return true
- case OpBroadcast128Float64x2:
- v.Op = OpAMD64VPBROADCASTQ128
+ case OpBroadcast1To16Int16x8:
+ v.Op = OpAMD64VPBROADCASTW256
return true
- case OpBroadcast128Int16x8:
- v.Op = OpAMD64VPBROADCASTW128
+ case OpBroadcast1To16Int32x4:
+ v.Op = OpAMD64VPBROADCASTD512
return true
- case OpBroadcast128Int32x4:
- v.Op = OpAMD64VPBROADCASTD128
+ case OpBroadcast1To16Int8x16:
+ v.Op = OpAMD64VPBROADCASTB128
return true
- case OpBroadcast128Int64x2:
- v.Op = OpAMD64VPBROADCASTQ128
+ case OpBroadcast1To16Uint16x8:
+ v.Op = OpAMD64VPBROADCASTW256
return true
- case OpBroadcast128Int8x16:
+ case OpBroadcast1To16Uint32x4:
+ v.Op = OpAMD64VPBROADCASTD512
+ return true
+ case OpBroadcast1To16Uint8x16:
v.Op = OpAMD64VPBROADCASTB128
return true
- case OpBroadcast128Uint16x8:
- v.Op = OpAMD64VPBROADCASTW128
+ case OpBroadcast1To2Float64x2:
+ v.Op = OpAMD64VPBROADCASTQ128
return true
- case OpBroadcast128Uint32x4:
- v.Op = OpAMD64VPBROADCASTD128
+ case OpBroadcast1To2Int64x2:
+ v.Op = OpAMD64VPBROADCASTQ128
return true
- case OpBroadcast128Uint64x2:
+ case OpBroadcast1To2Uint64x2:
v.Op = OpAMD64VPBROADCASTQ128
return true
- case OpBroadcast128Uint8x16:
- v.Op = OpAMD64VPBROADCASTB128
+ case OpBroadcast1To32Int16x8:
+ v.Op = OpAMD64VPBROADCASTW512
return true
- case OpBroadcast256Float32x4:
- v.Op = OpAMD64VBROADCASTSS256
+ case OpBroadcast1To32Int8x16:
+ v.Op = OpAMD64VPBROADCASTB256
return true
- case OpBroadcast256Float64x2:
- v.Op = OpAMD64VBROADCASTSD256
+ case OpBroadcast1To32Uint16x8:
+ v.Op = OpAMD64VPBROADCASTW512
return true
- case OpBroadcast256Int16x8:
- v.Op = OpAMD64VPBROADCASTW256
+ case OpBroadcast1To32Uint8x16:
+ v.Op = OpAMD64VPBROADCASTB256
return true
- case OpBroadcast256Int32x4:
- v.Op = OpAMD64VPBROADCASTD256
+ case OpBroadcast1To4Float32x4:
+ v.Op = OpAMD64VBROADCASTSS128
return true
- case OpBroadcast256Int64x2:
- v.Op = OpAMD64VPBROADCASTQ256
+ case OpBroadcast1To4Float64x2:
+ v.Op = OpAMD64VBROADCASTSD256
return true
- case OpBroadcast256Int8x16:
- v.Op = OpAMD64VPBROADCASTB256
+ case OpBroadcast1To4Int32x4:
+ v.Op = OpAMD64VPBROADCASTD128
return true
- case OpBroadcast256Uint16x8:
- v.Op = OpAMD64VPBROADCASTW256
+ case OpBroadcast1To4Int64x2:
+ v.Op = OpAMD64VPBROADCASTQ256
return true
- case OpBroadcast256Uint32x4:
- v.Op = OpAMD64VPBROADCASTD256
+ case OpBroadcast1To4Uint32x4:
+ v.Op = OpAMD64VPBROADCASTD128
return true
- case OpBroadcast256Uint64x2:
+ case OpBroadcast1To4Uint64x2:
v.Op = OpAMD64VPBROADCASTQ256
return true
- case OpBroadcast256Uint8x16:
- v.Op = OpAMD64VPBROADCASTB256
+ case OpBroadcast1To64Int8x16:
+ v.Op = OpAMD64VPBROADCASTB512
return true
- case OpBroadcast512Float32x4:
- v.Op = OpAMD64VBROADCASTSS512
+ case OpBroadcast1To64Uint8x16:
+ v.Op = OpAMD64VPBROADCASTB512
+ return true
+ case OpBroadcast1To8Float32x4:
+ v.Op = OpAMD64VBROADCASTSS256
return true
- case OpBroadcast512Float64x2:
+ case OpBroadcast1To8Float64x2:
v.Op = OpAMD64VBROADCASTSD512
return true
- case OpBroadcast512Int16x8:
- v.Op = OpAMD64VPBROADCASTW512
+ case OpBroadcast1To8Int16x8:
+ v.Op = OpAMD64VPBROADCASTW128
return true
- case OpBroadcast512Int32x4:
- v.Op = OpAMD64VPBROADCASTD512
+ case OpBroadcast1To8Int32x4:
+ v.Op = OpAMD64VPBROADCASTD256
return true
- case OpBroadcast512Int64x2:
+ case OpBroadcast1To8Int64x2:
v.Op = OpAMD64VPBROADCASTQ512
return true
- case OpBroadcast512Int8x16:
- v.Op = OpAMD64VPBROADCASTB512
- return true
- case OpBroadcast512Uint16x8:
- v.Op = OpAMD64VPBROADCASTW512
+ case OpBroadcast1To8Uint16x8:
+ v.Op = OpAMD64VPBROADCASTW128
return true
- case OpBroadcast512Uint32x4:
- v.Op = OpAMD64VPBROADCASTD512
+ case OpBroadcast1To8Uint32x4:
+ v.Op = OpAMD64VPBROADCASTD256
return true
- case OpBroadcast512Uint64x2:
+ case OpBroadcast1To8Uint64x2:
v.Op = OpAMD64VPBROADCASTQ512
return true
- case OpBroadcast512Uint8x16:
- v.Op = OpAMD64VPBROADCASTB512
- return true
case OpBswap16:
return rewriteValueAMD64_OpBswap16(v)
case OpBswap32:
diff --git a/src/cmd/compile/internal/ssagen/simdintrinsics.go b/src/cmd/compile/internal/ssagen/simdintrinsics.go
index 4ad0c6032c..e50561845b 100644
--- a/src/cmd/compile/internal/ssagen/simdintrinsics.go
+++ b/src/cmd/compile/internal/ssagen/simdintrinsics.go
@@ -152,36 +152,36 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
addF(simdPackage, "Uint16x8.Average", opLen2(ssa.OpAverageUint16x8, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Uint16x16.Average", opLen2(ssa.OpAverageUint16x16, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Uint16x32.Average", opLen2(ssa.OpAverageUint16x32, types.TypeVec512), sys.AMD64)
- addF(simdPackage, "Float32x4.Broadcast128", opLen1(ssa.OpBroadcast128Float32x4, types.TypeVec128), sys.AMD64)
- addF(simdPackage, "Float64x2.Broadcast128", opLen1(ssa.OpBroadcast128Float64x2, types.TypeVec128), sys.AMD64)
- addF(simdPackage, "Int8x16.Broadcast128", opLen1(ssa.OpBroadcast128Int8x16, types.TypeVec128), sys.AMD64)
- addF(simdPackage, "Int16x8.Broadcast128", opLen1(ssa.OpBroadcast128Int16x8, types.TypeVec128), sys.AMD64)
- addF(simdPackage, "Int32x4.Broadcast128", opLen1(ssa.OpBroadcast128Int32x4, types.TypeVec128), sys.AMD64)
- addF(simdPackage, "Int64x2.Broadcast128", opLen1(ssa.OpBroadcast128Int64x2, types.TypeVec128), sys.AMD64)
- addF(simdPackage, "Uint8x16.Broadcast128", opLen1(ssa.OpBroadcast128Uint8x16, types.TypeVec128), sys.AMD64)
- addF(simdPackage, "Uint16x8.Broadcast128", opLen1(ssa.OpBroadcast128Uint16x8, types.TypeVec128), sys.AMD64)
- addF(simdPackage, "Uint32x4.Broadcast128", opLen1(ssa.OpBroadcast128Uint32x4, types.TypeVec128), sys.AMD64)
- addF(simdPackage, "Uint64x2.Broadcast128", opLen1(ssa.OpBroadcast128Uint64x2, types.TypeVec128), sys.AMD64)
- addF(simdPackage, "Float32x4.Broadcast256", opLen1(ssa.OpBroadcast256Float32x4, types.TypeVec256), sys.AMD64)
- addF(simdPackage, "Float64x2.Broadcast256", opLen1(ssa.OpBroadcast256Float64x2, types.TypeVec256), sys.AMD64)
- addF(simdPackage, "Int8x16.Broadcast256", opLen1(ssa.OpBroadcast256Int8x16, types.TypeVec256), sys.AMD64)
- addF(simdPackage, "Int16x8.Broadcast256", opLen1(ssa.OpBroadcast256Int16x8, types.TypeVec256), sys.AMD64)
- addF(simdPackage, "Int32x4.Broadcast256", opLen1(ssa.OpBroadcast256Int32x4, types.TypeVec256), sys.AMD64)
- addF(simdPackage, "Int64x2.Broadcast256", opLen1(ssa.OpBroadcast256Int64x2, types.TypeVec256), sys.AMD64)
- addF(simdPackage, "Uint8x16.Broadcast256", opLen1(ssa.OpBroadcast256Uint8x16, types.TypeVec256), sys.AMD64)
- addF(simdPackage, "Uint16x8.Broadcast256", opLen1(ssa.OpBroadcast256Uint16x8, types.TypeVec256), sys.AMD64)
- addF(simdPackage, "Uint32x4.Broadcast256", opLen1(ssa.OpBroadcast256Uint32x4, types.TypeVec256), sys.AMD64)
- addF(simdPackage, "Uint64x2.Broadcast256", opLen1(ssa.OpBroadcast256Uint64x2, types.TypeVec256), sys.AMD64)
- addF(simdPackage, "Float32x4.Broadcast512", opLen1(ssa.OpBroadcast512Float32x4, types.TypeVec512), sys.AMD64)
- addF(simdPackage, "Float64x2.Broadcast512", opLen1(ssa.OpBroadcast512Float64x2, types.TypeVec512), sys.AMD64)
- addF(simdPackage, "Int8x16.Broadcast512", opLen1(ssa.OpBroadcast512Int8x16, types.TypeVec512), sys.AMD64)
- addF(simdPackage, "Int16x8.Broadcast512", opLen1(ssa.OpBroadcast512Int16x8, types.TypeVec512), sys.AMD64)
- addF(simdPackage, "Int32x4.Broadcast512", opLen1(ssa.OpBroadcast512Int32x4, types.TypeVec512), sys.AMD64)
- addF(simdPackage, "Int64x2.Broadcast512", opLen1(ssa.OpBroadcast512Int64x2, types.TypeVec512), sys.AMD64)
- addF(simdPackage, "Uint8x16.Broadcast512", opLen1(ssa.OpBroadcast512Uint8x16, types.TypeVec512), sys.AMD64)
- addF(simdPackage, "Uint16x8.Broadcast512", opLen1(ssa.OpBroadcast512Uint16x8, types.TypeVec512), sys.AMD64)
- addF(simdPackage, "Uint32x4.Broadcast512", opLen1(ssa.OpBroadcast512Uint32x4, types.TypeVec512), sys.AMD64)
- addF(simdPackage, "Uint64x2.Broadcast512", opLen1(ssa.OpBroadcast512Uint64x2, types.TypeVec512), sys.AMD64)
+ addF(simdPackage, "Float64x2.Broadcast1To2", opLen1(ssa.OpBroadcast1To2Float64x2, types.TypeVec128), sys.AMD64)
+ addF(simdPackage, "Int64x2.Broadcast1To2", opLen1(ssa.OpBroadcast1To2Int64x2, types.TypeVec128), sys.AMD64)
+ addF(simdPackage, "Uint64x2.Broadcast1To2", opLen1(ssa.OpBroadcast1To2Uint64x2, types.TypeVec128), sys.AMD64)
+ addF(simdPackage, "Float32x4.Broadcast1To4", opLen1(ssa.OpBroadcast1To4Float32x4, types.TypeVec128), sys.AMD64)
+ addF(simdPackage, "Float64x2.Broadcast1To4", opLen1(ssa.OpBroadcast1To4Float64x2, types.TypeVec256), sys.AMD64)
+ addF(simdPackage, "Int32x4.Broadcast1To4", opLen1(ssa.OpBroadcast1To4Int32x4, types.TypeVec128), sys.AMD64)
+ addF(simdPackage, "Int64x2.Broadcast1To4", opLen1(ssa.OpBroadcast1To4Int64x2, types.TypeVec256), sys.AMD64)
+ addF(simdPackage, "Uint32x4.Broadcast1To4", opLen1(ssa.OpBroadcast1To4Uint32x4, types.TypeVec128), sys.AMD64)
+ addF(simdPackage, "Uint64x2.Broadcast1To4", opLen1(ssa.OpBroadcast1To4Uint64x2, types.TypeVec256), sys.AMD64)
+ addF(simdPackage, "Float32x4.Broadcast1To8", opLen1(ssa.OpBroadcast1To8Float32x4, types.TypeVec256), sys.AMD64)
+ addF(simdPackage, "Float64x2.Broadcast1To8", opLen1(ssa.OpBroadcast1To8Float64x2, types.TypeVec512), sys.AMD64)
+ addF(simdPackage, "Int16x8.Broadcast1To8", opLen1(ssa.OpBroadcast1To8Int16x8, types.TypeVec128), sys.AMD64)
+ addF(simdPackage, "Int32x4.Broadcast1To8", opLen1(ssa.OpBroadcast1To8Int32x4, types.TypeVec256), sys.AMD64)
+ addF(simdPackage, "Int64x2.Broadcast1To8", opLen1(ssa.OpBroadcast1To8Int64x2, types.TypeVec512), sys.AMD64)
+ addF(simdPackage, "Uint16x8.Broadcast1To8", opLen1(ssa.OpBroadcast1To8Uint16x8, types.TypeVec128), sys.AMD64)
+ addF(simdPackage, "Uint32x4.Broadcast1To8", opLen1(ssa.OpBroadcast1To8Uint32x4, types.TypeVec256), sys.AMD64)
+ addF(simdPackage, "Uint64x2.Broadcast1To8", opLen1(ssa.OpBroadcast1To8Uint64x2, types.TypeVec512), sys.AMD64)
+ addF(simdPackage, "Float32x4.Broadcast1To16", opLen1(ssa.OpBroadcast1To16Float32x4, types.TypeVec512), sys.AMD64)
+ addF(simdPackage, "Int8x16.Broadcast1To16", opLen1(ssa.OpBroadcast1To16Int8x16, types.TypeVec128), sys.AMD64)
+ addF(simdPackage, "Int16x8.Broadcast1To16", opLen1(ssa.OpBroadcast1To16Int16x8, types.TypeVec256), sys.AMD64)
+ addF(simdPackage, "Int32x4.Broadcast1To16", opLen1(ssa.OpBroadcast1To16Int32x4, types.TypeVec512), sys.AMD64)
+ addF(simdPackage, "Uint8x16.Broadcast1To16", opLen1(ssa.OpBroadcast1To16Uint8x16, types.TypeVec128), sys.AMD64)
+ addF(simdPackage, "Uint16x8.Broadcast1To16", opLen1(ssa.OpBroadcast1To16Uint16x8, types.TypeVec256), sys.AMD64)
+ addF(simdPackage, "Uint32x4.Broadcast1To16", opLen1(ssa.OpBroadcast1To16Uint32x4, types.TypeVec512), sys.AMD64)
+ addF(simdPackage, "Int8x16.Broadcast1To32", opLen1(ssa.OpBroadcast1To32Int8x16, types.TypeVec256), sys.AMD64)
+ addF(simdPackage, "Int16x8.Broadcast1To32", opLen1(ssa.OpBroadcast1To32Int16x8, types.TypeVec512), sys.AMD64)
+ addF(simdPackage, "Uint8x16.Broadcast1To32", opLen1(ssa.OpBroadcast1To32Uint8x16, types.TypeVec256), sys.AMD64)
+ addF(simdPackage, "Uint16x8.Broadcast1To32", opLen1(ssa.OpBroadcast1To32Uint16x8, types.TypeVec512), sys.AMD64)
+ addF(simdPackage, "Int8x16.Broadcast1To64", opLen1(ssa.OpBroadcast1To64Int8x16, types.TypeVec512), sys.AMD64)
+ addF(simdPackage, "Uint8x16.Broadcast1To64", opLen1(ssa.OpBroadcast1To64Uint8x16, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Float32x4.Ceil", opLen1(ssa.OpCeilFloat32x4, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Float32x8.Ceil", opLen1(ssa.OpCeilFloat32x8, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Float64x2.Ceil", opLen1(ssa.OpCeilFloat64x2, types.TypeVec128), sys.AMD64)
diff --git a/src/cmd/go/internal/work/exec.go b/src/cmd/go/internal/work/exec.go
index 654e9e9374..f2d1b1040b 100644
--- a/src/cmd/go/internal/work/exec.go
+++ b/src/cmd/go/internal/work/exec.go
@@ -248,6 +248,11 @@ func (b *Builder) Do(ctx context.Context, root *Action) {
wg.Wait()
+ if tokens != totalTokens || concurrentProcesses != 0 {
+ base.Fatalf("internal error: tokens not restored at end of build: tokens: %d, totalTokens: %d, concurrentProcesses: %d",
+ tokens, totalTokens, concurrentProcesses)
+ }
+
// Write action graph again, this time with timing information.
writeActionGraph()
}
diff --git a/src/cmd/go/internal/work/gc.go b/src/cmd/go/internal/work/gc.go
index 9a5e6c924c..6300a9135b 100644
--- a/src/cmd/go/internal/work/gc.go
+++ b/src/cmd/go/internal/work/gc.go
@@ -217,16 +217,17 @@ func compilerConcurrency() (int, func()) {
concurrentProcesses++
// Set aside tokens so that we don't run out if we were running cfg.BuildP concurrent compiles.
// We'll set aside one token for each of the action goroutines that aren't currently running a compile.
- setAside := cfg.BuildP - concurrentProcesses
+ setAside := (cfg.BuildP - concurrentProcesses) * minTokens
availableTokens := tokens - setAside
- // Grab half the remaining tokens: but with a floor of at least 1 token, and
+ // Grab half the remaining tokens: but with a floor of at least minTokens token, and
// a ceiling of the max backend concurrency.
- c := max(min(availableTokens/2, maxCompilerConcurrency), 1)
+ c := max(min(availableTokens/2, maxCompilerConcurrency), minTokens)
tokens -= c
// Successfully grabbed the tokens.
return c, func() {
tokensMu.Lock()
defer tokensMu.Unlock()
+ concurrentProcesses--
tokens += c
}
}
@@ -235,17 +236,22 @@ var maxCompilerConcurrency = runtime.GOMAXPROCS(0) // max value we will use for
var (
tokensMu sync.Mutex
+ totalTokens int // total number of tokens: this is used for checking that we get them all back in the end
tokens int // number of available tokens
concurrentProcesses int // number of currently running compiles
+ minTokens int // minimum number of tokens to give out
)
// initCompilerConcurrencyPool sets the number of tokens in the pool. It needs
// to be run after init, so that it can use the value of cfg.BuildP.
func initCompilerConcurrencyPool() {
- // Size the pool so that the worst case total number of compiles is not more
- // than what it was when we capped the concurrency to 4.
- oldConcurrencyCap := min(4, maxCompilerConcurrency)
- tokens = oldConcurrencyCap * cfg.BuildP
+ // Size the pool to allow 2*maxCompilerConcurrency extra tokens to
+ // be distributed amongst the compile actions in addition to the minimum
+ // of min(4,GOMAXPROCS) tokens for each of the potentially cfg.BuildP
+ // concurrently running compile actions.
+ minTokens = min(4, maxCompilerConcurrency)
+ tokens = 2*maxCompilerConcurrency + minTokens*cfg.BuildP
+ totalTokens = tokens
}
// trimpath returns the -trimpath argument to use
diff --git a/src/internal/types/testdata/check/cycles6.go b/src/internal/types/testdata/check/cycles6.go
new file mode 100644
index 0000000000..e5635ed456
--- /dev/null
+++ b/src/internal/types/testdata/check/cycles6.go
@@ -0,0 +1,71 @@
+// Copyright 2026 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package p
+
+import "unsafe"
+
+// Below are the pieces of syntax corresponding to functions which can produce a
+// type T without first having a value of type T. Notice that each causes a
+// value of type T to be passed to unsafe.Sizeof while T is incomplete.
+
+// literal on type
+type T0 /* ERROR "invalid recursive type" */ [unsafe.Sizeof(T0{})]int
+// literal on value (not applicable)
+// literal on pointer (not applicable)
+
+// call on type
+type T1 /* ERROR "invalid recursive type" */ [unsafe.Sizeof(T1(42))]int
+// call on value
+func f2() T2
+type T2 /* ERROR "invalid recursive type" */ [unsafe.Sizeof(f2())]int
+// call on pointer (not applicable)
+
+// assert on type
+var i3 interface{}
+type T3 /* ERROR "invalid recursive type" */ [unsafe.Sizeof(i3.(T3))]int
+// assert on value (not applicable)
+// assert on pointer (not applicable)
+
+// receive on type (not applicable)
+// receive on value
+func f4() <-chan T4
+type T4 /* ERROR "invalid recursive type" */ [unsafe.Sizeof(<-f4())]int
+// receive on pointer (not applicable)
+
+// star on type (not applicable)
+// star on value (not applicable)
+// star on pointer
+func f5() *T5
+type T5 /* ERROR "invalid recursive type" */ [unsafe.Sizeof(*f5())]int
+
+// Below is additional syntax which interacts with incomplete types. Notice that
+// each of the below falls into 1 of 3 cases:
+// 1. It cannot produce a value of (incomplete) type T.
+// 2. It can, but only because it already has a value of type T.
+// 3. It can, but only because it performs an implicit dereference.
+
+// select on type (case 1)
+// select on value (case 2)
+type T6 /* ERROR "invalid recursive type" */ struct {
+ f T7
+}
+type T7 [unsafe.Sizeof(T6{}.f)]int
+// select on pointer (case 3)
+type T8 /* ERROR "invalid recursive type" */ struct {
+ f T9
+}
+type T9 [unsafe.Sizeof(new(T8).f)]int
+
+// slice on type (not applicable)
+// slice on value (case 2)
+type T10 /* ERROR "invalid recursive type" */ [unsafe.Sizeof(T10{}[:])]int
+// slice on pointer (case 3)
+type T11 /* ERROR "invalid recursive type" */ [unsafe.Sizeof(new(T11)[:])]int
+
+// index on type (case 1)
+// index on value (case 2)
+type T12 /* ERROR "invalid recursive type" */ [unsafe.Sizeof(T12{}[42])]int
+// index on pointer (case 3)
+type T13 /* ERROR "invalid recursive type" */ [unsafe.Sizeof(new(T13)[42])]int
diff --git a/src/runtime/trace/recorder.go b/src/runtime/trace/recorder.go
index 4f2d3aa92a..a18d764141 100644
--- a/src/runtime/trace/recorder.go
+++ b/src/runtime/trace/recorder.go
@@ -39,7 +39,7 @@ func (w *recorder) Write(b []byte) (n int, err error) {
w.headerReceived = true
}
if len(b) == n {
- return 0, nil
+ return n, nil
}
ba, nb, err := readBatch(b[n:]) // Every write from the runtime is guaranteed to be a complete batch.
if err != nil {
diff --git a/src/simd/archsimd/_gen/simdgen/ops/Moves/categories.yaml b/src/simd/archsimd/_gen/simdgen/ops/Moves/categories.yaml
index 38bc9374cc..3cba01ef95 100644
--- a/src/simd/archsimd/_gen/simdgen/ops/Moves/categories.yaml
+++ b/src/simd/archsimd/_gen/simdgen/ops/Moves/categories.yaml
@@ -69,21 +69,36 @@
documentation: !string |-
// NAME performs an expansion on a vector x whose elements are packed to lower parts.
// The expansion is to distribute elements as indexed by mask, from lower mask elements to upper in order.
-- go: Broadcast128
+- go: Broadcast1To2
commutative: false
documentation: !string |-
- // NAME copies element zero of its (128-bit) input to all elements of
- // the 128-bit output vector.
-- go: Broadcast256
+ // NAME copies the lowest element of its input to all 2 elements of
+ // the output vector.
+- go: Broadcast1To4
commutative: false
documentation: !string |-
- // NAME copies element zero of its (128-bit) input to all elements of
- // the 256-bit output vector.
-- go: Broadcast512
+ // NAME copies the lowest element of its input to all 4 elements of
+ // the output vector.
+- go: Broadcast1To8
commutative: false
documentation: !string |-
- // NAME copies element zero of its (128-bit) input to all elements of
- // the 512-bit output vector.
+ // NAME copies the lowest element of its input to all 8 elements of
+ // the output vector.
+- go: Broadcast1To16
+ commutative: false
+ documentation: !string |-
+ // NAME copies the lowest element of its input to all 16 elements of
+ // the output vector.
+- go: Broadcast1To32
+ commutative: false
+ documentation: !string |-
+ // NAME copies the lowest element of its input to all 32 elements of
+ // the output vector.
+- go: Broadcast1To64
+ commutative: false
+ documentation: !string |-
+ // NAME copies the lowest element of its input to all 64 elements of
+ // the output vector.
- go: PermuteOrZeroGrouped
commutative: false
documentation: !string |- # Detailed documentation will rely on the specific ops.
diff --git a/src/simd/archsimd/_gen/simdgen/ops/Moves/go.yaml b/src/simd/archsimd/_gen/simdgen/ops/Moves/go.yaml
index e1fd184ed7..02daa2ea1e 100644
--- a/src/simd/archsimd/_gen/simdgen/ops/Moves/go.yaml
+++ b/src/simd/archsimd/_gen/simdgen/ops/Moves/go.yaml
@@ -376,21 +376,21 @@
out:
- *any
-- go: Broadcast128
- asm: VPBROADCAST[BWDQ]
+- go: Broadcast1To2
+ asm: VPBROADCASTQ
in:
- class: vreg
bits: 128
- elemBits: $e
+ elemBits: 64
base: $b
out:
- class: vreg
bits: 128
- elemBits: $e
+ elemBits: 64
base: $b
# weirdly, this one case on AVX2 is memory-operand-only
-- go: Broadcast128
+- go: Broadcast1To2
asm: VPBROADCASTQ
in:
- class: vreg
@@ -405,71 +405,94 @@
base: int
OverwriteBase: float
-- go: Broadcast256
+- go: Broadcast1To4
asm: VPBROADCAST[BWDQ]
in:
- class: vreg
bits: 128
- elemBits: $e
base: $b
out:
- class: vreg
- bits: 256
- elemBits: $e
+ lanes: 4
base: $b
-- go: Broadcast512
+- go: Broadcast1To8
asm: VPBROADCAST[BWDQ]
in:
- class: vreg
bits: 128
- elemBits: $e
base: $b
out:
- class: vreg
- bits: 512
- elemBits: $e
+ lanes: 8
base: $b
-- go: Broadcast128
- asm: VBROADCASTS[SD]
+- go: Broadcast1To16
+ asm: VPBROADCAST[BWDQ]
in:
- class: vreg
bits: 128
- elemBits: $e
base: $b
out:
- class: vreg
- bits: 128
- elemBits: $e
+ lanes: 16
base: $b
-- go: Broadcast256
- asm: VBROADCASTS[SD]
+- go: Broadcast1To32
+ asm: VPBROADCAST[BWDQ]
in:
- class: vreg
bits: 128
- elemBits: $e
base: $b
out:
- class: vreg
- bits: 256
- elemBits: $e
+ lanes: 32
base: $b
-- go: Broadcast512
- asm: VBROADCASTS[SD]
+- go: Broadcast1To64
+ asm: VPBROADCASTB
in:
- class: vreg
bits: 128
- elemBits: $e
base: $b
out:
- class: vreg
- bits: 512
- elemBits: $e
+ lanes: 64
base: $b
+- go: Broadcast1To4
+ asm: VBROADCASTS[SD]
+ in:
+ - class: vreg
+ bits: 128
+ base: float
+ out:
+ - class: vreg
+ lanes: 4
+ base: float
+
+- go: Broadcast1To8
+ asm: VBROADCASTS[SD]
+ in:
+ - class: vreg
+ bits: 128
+ base: float
+ out:
+ - class: vreg
+ lanes: 8
+ base: float
+
+- go: Broadcast1To16
+ asm: VBROADCASTS[SD]
+ in:
+ - class: vreg
+ bits: 128
+ base: float
+ out:
+ - class: vreg
+ lanes: 16
+ base: float
+
# VPSHUFB for 128-bit byte shuffles will be picked with higher priority than VPERMB, given its lower CPU feature requirement. (It's AVX)
- go: PermuteOrZero
asm: VPSHUFB
diff --git a/src/simd/archsimd/_gen/tmplgen/main.go b/src/simd/archsimd/_gen/tmplgen/main.go
index 8db185e1e0..45338b765d 100644
--- a/src/simd/archsimd/_gen/tmplgen/main.go
+++ b/src/simd/archsimd/_gen/tmplgen/main.go
@@ -873,7 +873,7 @@ var broadcastTemplate = templateOf("Broadcast functions", `
// Emulated, CPU Feature: {{.CPUfeatureBC}}
func Broadcast{{.VType}}(x {{.Etype}}) {{.VType}} {
var z {{.As128BitVec }}
- return z.SetElem(0, x).Broadcast{{.Vwidth}}()
+ return z.SetElem(0, x).Broadcast1To{{.Count}}()
}
`)
diff --git a/src/simd/archsimd/ops_amd64.go b/src/simd/archsimd/ops_amd64.go
index eba340c793..bb162c4ff9 100644
--- a/src/simd/archsimd/ops_amd64.go
+++ b/src/simd/archsimd/ops_amd64.go
@@ -805,191 +805,197 @@ func (x Uint16x16) Average(y Uint16x16) Uint16x16
// Asm: VPAVGW, CPU Feature: AVX512
func (x Uint16x32) Average(y Uint16x32) Uint16x32
-/* Broadcast128 */
+/* Broadcast1To2 */
-// Broadcast128 copies element zero of its (128-bit) input to all elements of
-// the 128-bit output vector.
+// Broadcast1To2 copies the lowest element of its input to all 2 elements of
+// the output vector.
//
-// Asm: VBROADCASTSS, CPU Feature: AVX2
-func (x Float32x4) Broadcast128() Float32x4
+// Asm: VPBROADCASTQ, CPU Feature: AVX2
+func (x Float64x2) Broadcast1To2() Float64x2
-// Broadcast128 copies element zero of its (128-bit) input to all elements of
-// the 128-bit output vector.
+// Broadcast1To2 copies the lowest element of its input to all 2 elements of
+// the output vector.
//
// Asm: VPBROADCASTQ, CPU Feature: AVX2
-func (x Float64x2) Broadcast128() Float64x2
+func (x Int64x2) Broadcast1To2() Int64x2
-// Broadcast128 copies element zero of its (128-bit) input to all elements of
-// the 128-bit output vector.
+// Broadcast1To2 copies the lowest element of its input to all 2 elements of
+// the output vector.
//
-// Asm: VPBROADCASTB, CPU Feature: AVX2
-func (x Int8x16) Broadcast128() Int8x16
+// Asm: VPBROADCASTQ, CPU Feature: AVX2
+func (x Uint64x2) Broadcast1To2() Uint64x2
-// Broadcast128 copies element zero of its (128-bit) input to all elements of
-// the 128-bit output vector.
-//
-// Asm: VPBROADCASTW, CPU Feature: AVX2
-func (x Int16x8) Broadcast128() Int16x8
+/* Broadcast1To4 */
-// Broadcast128 copies element zero of its (128-bit) input to all elements of
-// the 128-bit output vector.
+// Broadcast1To4 copies the lowest element of its input to all 4 elements of
+// the output vector.
//
-// Asm: VPBROADCASTD, CPU Feature: AVX2
-func (x Int32x4) Broadcast128() Int32x4
+// Asm: VBROADCASTSS, CPU Feature: AVX2
+func (x Float32x4) Broadcast1To4() Float32x4
-// Broadcast128 copies element zero of its (128-bit) input to all elements of
-// the 128-bit output vector.
+// Broadcast1To4 copies the lowest element of its input to all 4 elements of
+// the output vector.
//
-// Asm: VPBROADCASTQ, CPU Feature: AVX2
-func (x Int64x2) Broadcast128() Int64x2
+// Asm: VBROADCASTSD, CPU Feature: AVX2
+func (x Float64x2) Broadcast1To4() Float64x4
-// Broadcast128 copies element zero of its (128-bit) input to all elements of
-// the 128-bit output vector.
+// Broadcast1To4 copies the lowest element of its input to all 4 elements of
+// the output vector.
//
-// Asm: VPBROADCASTB, CPU Feature: AVX2
-func (x Uint8x16) Broadcast128() Uint8x16
+// Asm: VPBROADCASTD, CPU Feature: AVX2
+func (x Int32x4) Broadcast1To4() Int32x4
-// Broadcast128 copies element zero of its (128-bit) input to all elements of
-// the 128-bit output vector.
+// Broadcast1To4 copies the lowest element of its input to all 4 elements of
+// the output vector.
//
-// Asm: VPBROADCASTW, CPU Feature: AVX2
-func (x Uint16x8) Broadcast128() Uint16x8
+// Asm: VPBROADCASTQ, CPU Feature: AVX2
+func (x Int64x2) Broadcast1To4() Int64x4
-// Broadcast128 copies element zero of its (128-bit) input to all elements of
-// the 128-bit output vector.
+// Broadcast1To4 copies the lowest element of its input to all 4 elements of
+// the output vector.
//
// Asm: VPBROADCASTD, CPU Feature: AVX2
-func (x Uint32x4) Broadcast128() Uint32x4
+func (x Uint32x4) Broadcast1To4() Uint32x4
-// Broadcast128 copies element zero of its (128-bit) input to all elements of
-// the 128-bit output vector.
+// Broadcast1To4 copies the lowest element of its input to all 4 elements of
+// the output vector.
//
// Asm: VPBROADCASTQ, CPU Feature: AVX2
-func (x Uint64x2) Broadcast128() Uint64x2
+func (x Uint64x2) Broadcast1To4() Uint64x4
-/* Broadcast256 */
+/* Broadcast1To8 */
-// Broadcast256 copies element zero of its (128-bit) input to all elements of
-// the 256-bit output vector.
+// Broadcast1To8 copies the lowest element of its input to all 8 elements of
+// the output vector.
//
// Asm: VBROADCASTSS, CPU Feature: AVX2
-func (x Float32x4) Broadcast256() Float32x8
-
-// Broadcast256 copies element zero of its (128-bit) input to all elements of
-// the 256-bit output vector.
-//
-// Asm: VBROADCASTSD, CPU Feature: AVX2
-func (x Float64x2) Broadcast256() Float64x4
+func (x Float32x4) Broadcast1To8() Float32x8
-// Broadcast256 copies element zero of its (128-bit) input to all elements of
-// the 256-bit output vector.
+// Broadcast1To8 copies the lowest element of its input to all 8 elements of
+// the output vector.
//
-// Asm: VPBROADCASTB, CPU Feature: AVX2
-func (x Int8x16) Broadcast256() Int8x32
+// Asm: VBROADCASTSD, CPU Feature: AVX512
+func (x Float64x2) Broadcast1To8() Float64x8
-// Broadcast256 copies element zero of its (128-bit) input to all elements of
-// the 256-bit output vector.
+// Broadcast1To8 copies the lowest element of its input to all 8 elements of
+// the output vector.
//
// Asm: VPBROADCASTW, CPU Feature: AVX2
-func (x Int16x8) Broadcast256() Int16x16
+func (x Int16x8) Broadcast1To8() Int16x8
-// Broadcast256 copies element zero of its (128-bit) input to all elements of
-// the 256-bit output vector.
+// Broadcast1To8 copies the lowest element of its input to all 8 elements of
+// the output vector.
//
// Asm: VPBROADCASTD, CPU Feature: AVX2
-func (x Int32x4) Broadcast256() Int32x8
+func (x Int32x4) Broadcast1To8() Int32x8
-// Broadcast256 copies element zero of its (128-bit) input to all elements of
-// the 256-bit output vector.
+// Broadcast1To8 copies the lowest element of its input to all 8 elements of
+// the output vector.
//
-// Asm: VPBROADCASTQ, CPU Feature: AVX2
-func (x Int64x2) Broadcast256() Int64x4
-
-// Broadcast256 copies element zero of its (128-bit) input to all elements of
-// the 256-bit output vector.
-//
-// Asm: VPBROADCASTB, CPU Feature: AVX2
-func (x Uint8x16) Broadcast256() Uint8x32
+// Asm: VPBROADCASTQ, CPU Feature: AVX512
+func (x Int64x2) Broadcast1To8() Int64x8
-// Broadcast256 copies element zero of its (128-bit) input to all elements of
-// the 256-bit output vector.
+// Broadcast1To8 copies the lowest element of its input to all 8 elements of
+// the output vector.
//
// Asm: VPBROADCASTW, CPU Feature: AVX2
-func (x Uint16x8) Broadcast256() Uint16x16
+func (x Uint16x8) Broadcast1To8() Uint16x8
-// Broadcast256 copies element zero of its (128-bit) input to all elements of
-// the 256-bit output vector.
+// Broadcast1To8 copies the lowest element of its input to all 8 elements of
+// the output vector.
//
// Asm: VPBROADCASTD, CPU Feature: AVX2
-func (x Uint32x4) Broadcast256() Uint32x8
+func (x Uint32x4) Broadcast1To8() Uint32x8
-// Broadcast256 copies element zero of its (128-bit) input to all elements of
-// the 256-bit output vector.
+// Broadcast1To8 copies the lowest element of its input to all 8 elements of
+// the output vector.
//
-// Asm: VPBROADCASTQ, CPU Feature: AVX2
-func (x Uint64x2) Broadcast256() Uint64x4
+// Asm: VPBROADCASTQ, CPU Feature: AVX512
+func (x Uint64x2) Broadcast1To8() Uint64x8
-/* Broadcast512 */
+/* Broadcast1To16 */
-// Broadcast512 copies element zero of its (128-bit) input to all elements of
-// the 512-bit output vector.
+// Broadcast1To16 copies the lowest element of its input to all 16 elements of
+// the output vector.
//
// Asm: VBROADCASTSS, CPU Feature: AVX512
-func (x Float32x4) Broadcast512() Float32x16
+func (x Float32x4) Broadcast1To16() Float32x16
-// Broadcast512 copies element zero of its (128-bit) input to all elements of
-// the 512-bit output vector.
+// Broadcast1To16 copies the lowest element of its input to all 16 elements of
+// the output vector.
//
-// Asm: VBROADCASTSD, CPU Feature: AVX512
-func (x Float64x2) Broadcast512() Float64x8
+// Asm: VPBROADCASTB, CPU Feature: AVX2
+func (x Int8x16) Broadcast1To16() Int8x16
-// Broadcast512 copies element zero of its (128-bit) input to all elements of
-// the 512-bit output vector.
+// Broadcast1To16 copies the lowest element of its input to all 16 elements of
+// the output vector.
//
-// Asm: VPBROADCASTB, CPU Feature: AVX512
-func (x Int8x16) Broadcast512() Int8x64
+// Asm: VPBROADCASTW, CPU Feature: AVX2
+func (x Int16x8) Broadcast1To16() Int16x16
-// Broadcast512 copies element zero of its (128-bit) input to all elements of
-// the 512-bit output vector.
+// Broadcast1To16 copies the lowest element of its input to all 16 elements of
+// the output vector.
//
-// Asm: VPBROADCASTW, CPU Feature: AVX512
-func (x Int16x8) Broadcast512() Int16x32
+// Asm: VPBROADCASTD, CPU Feature: AVX512
+func (x Int32x4) Broadcast1To16() Int32x16
-// Broadcast512 copies element zero of its (128-bit) input to all elements of
-// the 512-bit output vector.
+// Broadcast1To16 copies the lowest element of its input to all 16 elements of
+// the output vector.
+//
+// Asm: VPBROADCASTB, CPU Feature: AVX2
+func (x Uint8x16) Broadcast1To16() Uint8x16
+
+// Broadcast1To16 copies the lowest element of its input to all 16 elements of
+// the output vector.
+//
+// Asm: VPBROADCASTW, CPU Feature: AVX2
+func (x Uint16x8) Broadcast1To16() Uint16x16
+
+// Broadcast1To16 copies the lowest element of its input to all 16 elements of
+// the output vector.
//
// Asm: VPBROADCASTD, CPU Feature: AVX512
-func (x Int32x4) Broadcast512() Int32x16
+func (x Uint32x4) Broadcast1To16() Uint32x16
+
+/* Broadcast1To32 */
-// Broadcast512 copies element zero of its (128-bit) input to all elements of
-// the 512-bit output vector.
+// Broadcast1To32 copies the lowest element of its input to all 32 elements of
+// the output vector.
//
-// Asm: VPBROADCASTQ, CPU Feature: AVX512
-func (x Int64x2) Broadcast512() Int64x8
+// Asm: VPBROADCASTB, CPU Feature: AVX2
+func (x Int8x16) Broadcast1To32() Int8x32
-// Broadcast512 copies element zero of its (128-bit) input to all elements of
-// the 512-bit output vector.
+// Broadcast1To32 copies the lowest element of its input to all 32 elements of
+// the output vector.
//
-// Asm: VPBROADCASTB, CPU Feature: AVX512
-func (x Uint8x16) Broadcast512() Uint8x64
+// Asm: VPBROADCASTW, CPU Feature: AVX512
+func (x Int16x8) Broadcast1To32() Int16x32
+
+// Broadcast1To32 copies the lowest element of its input to all 32 elements of
+// the output vector.
+//
+// Asm: VPBROADCASTB, CPU Feature: AVX2
+func (x Uint8x16) Broadcast1To32() Uint8x32
-// Broadcast512 copies element zero of its (128-bit) input to all elements of
-// the 512-bit output vector.
+// Broadcast1To32 copies the lowest element of its input to all 32 elements of
+// the output vector.
//
// Asm: VPBROADCASTW, CPU Feature: AVX512
-func (x Uint16x8) Broadcast512() Uint16x32
+func (x Uint16x8) Broadcast1To32() Uint16x32
+
+/* Broadcast1To64 */
-// Broadcast512 copies element zero of its (128-bit) input to all elements of
-// the 512-bit output vector.
+// Broadcast1To64 copies the lowest element of its input to all 64 elements of
+// the output vector.
//
-// Asm: VPBROADCASTD, CPU Feature: AVX512
-func (x Uint32x4) Broadcast512() Uint32x16
+// Asm: VPBROADCASTB, CPU Feature: AVX512
+func (x Int8x16) Broadcast1To64() Int8x64
-// Broadcast512 copies element zero of its (128-bit) input to all elements of
-// the 512-bit output vector.
+// Broadcast1To64 copies the lowest element of its input to all 64 elements of
+// the output vector.
//
-// Asm: VPBROADCASTQ, CPU Feature: AVX512
-func (x Uint64x2) Broadcast512() Uint64x8
+// Asm: VPBROADCASTB, CPU Feature: AVX512
+func (x Uint8x16) Broadcast1To64() Uint8x64
/* Ceil */
diff --git a/src/simd/archsimd/other_gen_amd64.go b/src/simd/archsimd/other_gen_amd64.go
index 647001acce..c250dc2436 100644
--- a/src/simd/archsimd/other_gen_amd64.go
+++ b/src/simd/archsimd/other_gen_amd64.go
@@ -10,7 +10,7 @@ package archsimd
// Emulated, CPU Feature: AVX2
func BroadcastInt8x16(x int8) Int8x16 {
var z Int8x16
- return z.SetElem(0, x).Broadcast128()
+ return z.SetElem(0, x).Broadcast1To16()
}
// BroadcastInt16x8 returns a vector with the input
@@ -19,7 +19,7 @@ func BroadcastInt8x16(x int8) Int8x16 {
// Emulated, CPU Feature: AVX2
func BroadcastInt16x8(x int16) Int16x8 {
var z Int16x8
- return z.SetElem(0, x).Broadcast128()
+ return z.SetElem(0, x).Broadcast1To8()
}
// BroadcastInt32x4 returns a vector with the input
@@ -28,7 +28,7 @@ func BroadcastInt16x8(x int16) Int16x8 {
// Emulated, CPU Feature: AVX2
func BroadcastInt32x4(x int32) Int32x4 {
var z Int32x4
- return z.SetElem(0, x).Broadcast128()
+ return z.SetElem(0, x).Broadcast1To4()
}
// BroadcastInt64x2 returns a vector with the input
@@ -37,7 +37,7 @@ func BroadcastInt32x4(x int32) Int32x4 {
// Emulated, CPU Feature: AVX2
func BroadcastInt64x2(x int64) Int64x2 {
var z Int64x2
- return z.SetElem(0, x).Broadcast128()
+ return z.SetElem(0, x).Broadcast1To2()
}
// BroadcastUint8x16 returns a vector with the input
@@ -46,7 +46,7 @@ func BroadcastInt64x2(x int64) Int64x2 {
// Emulated, CPU Feature: AVX2
func BroadcastUint8x16(x uint8) Uint8x16 {
var z Uint8x16
- return z.SetElem(0, x).Broadcast128()
+ return z.SetElem(0, x).Broadcast1To16()
}
// BroadcastUint16x8 returns a vector with the input
@@ -55,7 +55,7 @@ func BroadcastUint8x16(x uint8) Uint8x16 {
// Emulated, CPU Feature: AVX2
func BroadcastUint16x8(x uint16) Uint16x8 {
var z Uint16x8
- return z.SetElem(0, x).Broadcast128()
+ return z.SetElem(0, x).Broadcast1To8()
}
// BroadcastUint32x4 returns a vector with the input
@@ -64,7 +64,7 @@ func BroadcastUint16x8(x uint16) Uint16x8 {
// Emulated, CPU Feature: AVX2
func BroadcastUint32x4(x uint32) Uint32x4 {
var z Uint32x4
- return z.SetElem(0, x).Broadcast128()
+ return z.SetElem(0, x).Broadcast1To4()
}
// BroadcastUint64x2 returns a vector with the input
@@ -73,7 +73,7 @@ func BroadcastUint32x4(x uint32) Uint32x4 {
// Emulated, CPU Feature: AVX2
func BroadcastUint64x2(x uint64) Uint64x2 {
var z Uint64x2
- return z.SetElem(0, x).Broadcast128()
+ return z.SetElem(0, x).Broadcast1To2()
}
// BroadcastFloat32x4 returns a vector with the input
@@ -82,7 +82,7 @@ func BroadcastUint64x2(x uint64) Uint64x2 {
// Emulated, CPU Feature: AVX2
func BroadcastFloat32x4(x float32) Float32x4 {
var z Float32x4
- return z.SetElem(0, x).Broadcast128()
+ return z.SetElem(0, x).Broadcast1To4()
}
// BroadcastFloat64x2 returns a vector with the input
@@ -91,7 +91,7 @@ func BroadcastFloat32x4(x float32) Float32x4 {
// Emulated, CPU Feature: AVX2
func BroadcastFloat64x2(x float64) Float64x2 {
var z Float64x2
- return z.SetElem(0, x).Broadcast128()
+ return z.SetElem(0, x).Broadcast1To2()
}
// BroadcastInt8x32 returns a vector with the input
@@ -100,7 +100,7 @@ func BroadcastFloat64x2(x float64) Float64x2 {
// Emulated, CPU Feature: AVX2
func BroadcastInt8x32(x int8) Int8x32 {
var z Int8x16
- return z.SetElem(0, x).Broadcast256()
+ return z.SetElem(0, x).Broadcast1To32()
}
// BroadcastInt16x16 returns a vector with the input
@@ -109,7 +109,7 @@ func BroadcastInt8x32(x int8) Int8x32 {
// Emulated, CPU Feature: AVX2
func BroadcastInt16x16(x int16) Int16x16 {
var z Int16x8
- return z.SetElem(0, x).Broadcast256()
+ return z.SetElem(0, x).Broadcast1To16()
}
// BroadcastInt32x8 returns a vector with the input
@@ -118,7 +118,7 @@ func BroadcastInt16x16(x int16) Int16x16 {
// Emulated, CPU Feature: AVX2
func BroadcastInt32x8(x int32) Int32x8 {
var z Int32x4
- return z.SetElem(0, x).Broadcast256()
+ return z.SetElem(0, x).Broadcast1To8()
}
// BroadcastInt64x4 returns a vector with the input
@@ -127,7 +127,7 @@ func BroadcastInt32x8(x int32) Int32x8 {
// Emulated, CPU Feature: AVX2
func BroadcastInt64x4(x int64) Int64x4 {
var z Int64x2
- return z.SetElem(0, x).Broadcast256()
+ return z.SetElem(0, x).Broadcast1To4()
}
// BroadcastUint8x32 returns a vector with the input
@@ -136,7 +136,7 @@ func BroadcastInt64x4(x int64) Int64x4 {
// Emulated, CPU Feature: AVX2
func BroadcastUint8x32(x uint8) Uint8x32 {
var z Uint8x16
- return z.SetElem(0, x).Broadcast256()
+ return z.SetElem(0, x).Broadcast1To32()
}
// BroadcastUint16x16 returns a vector with the input
@@ -145,7 +145,7 @@ func BroadcastUint8x32(x uint8) Uint8x32 {
// Emulated, CPU Feature: AVX2
func BroadcastUint16x16(x uint16) Uint16x16 {
var z Uint16x8
- return z.SetElem(0, x).Broadcast256()
+ return z.SetElem(0, x).Broadcast1To16()
}
// BroadcastUint32x8 returns a vector with the input
@@ -154,7 +154,7 @@ func BroadcastUint16x16(x uint16) Uint16x16 {
// Emulated, CPU Feature: AVX2
func BroadcastUint32x8(x uint32) Uint32x8 {
var z Uint32x4
- return z.SetElem(0, x).Broadcast256()
+ return z.SetElem(0, x).Broadcast1To8()
}
// BroadcastUint64x4 returns a vector with the input
@@ -163,7 +163,7 @@ func BroadcastUint32x8(x uint32) Uint32x8 {
// Emulated, CPU Feature: AVX2
func BroadcastUint64x4(x uint64) Uint64x4 {
var z Uint64x2
- return z.SetElem(0, x).Broadcast256()
+ return z.SetElem(0, x).Broadcast1To4()
}
// BroadcastFloat32x8 returns a vector with the input
@@ -172,7 +172,7 @@ func BroadcastUint64x4(x uint64) Uint64x4 {
// Emulated, CPU Feature: AVX2
func BroadcastFloat32x8(x float32) Float32x8 {
var z Float32x4
- return z.SetElem(0, x).Broadcast256()
+ return z.SetElem(0, x).Broadcast1To8()
}
// BroadcastFloat64x4 returns a vector with the input
@@ -181,7 +181,7 @@ func BroadcastFloat32x8(x float32) Float32x8 {
// Emulated, CPU Feature: AVX2
func BroadcastFloat64x4(x float64) Float64x4 {
var z Float64x2
- return z.SetElem(0, x).Broadcast256()
+ return z.SetElem(0, x).Broadcast1To4()
}
// BroadcastInt8x64 returns a vector with the input
@@ -190,7 +190,7 @@ func BroadcastFloat64x4(x float64) Float64x4 {
// Emulated, CPU Feature: AVX512BW
func BroadcastInt8x64(x int8) Int8x64 {
var z Int8x16
- return z.SetElem(0, x).Broadcast512()
+ return z.SetElem(0, x).Broadcast1To64()
}
// BroadcastInt16x32 returns a vector with the input
@@ -199,7 +199,7 @@ func BroadcastInt8x64(x int8) Int8x64 {
// Emulated, CPU Feature: AVX512BW
func BroadcastInt16x32(x int16) Int16x32 {
var z Int16x8
- return z.SetElem(0, x).Broadcast512()
+ return z.SetElem(0, x).Broadcast1To32()
}
// BroadcastInt32x16 returns a vector with the input
@@ -208,7 +208,7 @@ func BroadcastInt16x32(x int16) Int16x32 {
// Emulated, CPU Feature: AVX512F
func BroadcastInt32x16(x int32) Int32x16 {
var z Int32x4
- return z.SetElem(0, x).Broadcast512()
+ return z.SetElem(0, x).Broadcast1To16()
}
// BroadcastInt64x8 returns a vector with the input
@@ -217,7 +217,7 @@ func BroadcastInt32x16(x int32) Int32x16 {
// Emulated, CPU Feature: AVX512F
func BroadcastInt64x8(x int64) Int64x8 {
var z Int64x2
- return z.SetElem(0, x).Broadcast512()
+ return z.SetElem(0, x).Broadcast1To8()
}
// BroadcastUint8x64 returns a vector with the input
@@ -226,7 +226,7 @@ func BroadcastInt64x8(x int64) Int64x8 {
// Emulated, CPU Feature: AVX512BW
func BroadcastUint8x64(x uint8) Uint8x64 {
var z Uint8x16
- return z.SetElem(0, x).Broadcast512()
+ return z.SetElem(0, x).Broadcast1To64()
}
// BroadcastUint16x32 returns a vector with the input
@@ -235,7 +235,7 @@ func BroadcastUint8x64(x uint8) Uint8x64 {
// Emulated, CPU Feature: AVX512BW
func BroadcastUint16x32(x uint16) Uint16x32 {
var z Uint16x8
- return z.SetElem(0, x).Broadcast512()
+ return z.SetElem(0, x).Broadcast1To32()
}
// BroadcastUint32x16 returns a vector with the input
@@ -244,7 +244,7 @@ func BroadcastUint16x32(x uint16) Uint16x32 {
// Emulated, CPU Feature: AVX512F
func BroadcastUint32x16(x uint32) Uint32x16 {
var z Uint32x4
- return z.SetElem(0, x).Broadcast512()
+ return z.SetElem(0, x).Broadcast1To16()
}
// BroadcastUint64x8 returns a vector with the input
@@ -253,7 +253,7 @@ func BroadcastUint32x16(x uint32) Uint32x16 {
// Emulated, CPU Feature: AVX512F
func BroadcastUint64x8(x uint64) Uint64x8 {
var z Uint64x2
- return z.SetElem(0, x).Broadcast512()
+ return z.SetElem(0, x).Broadcast1To8()
}
// BroadcastFloat32x16 returns a vector with the input
@@ -262,7 +262,7 @@ func BroadcastUint64x8(x uint64) Uint64x8 {
// Emulated, CPU Feature: AVX512F
func BroadcastFloat32x16(x float32) Float32x16 {
var z Float32x4
- return z.SetElem(0, x).Broadcast512()
+ return z.SetElem(0, x).Broadcast1To16()
}
// BroadcastFloat64x8 returns a vector with the input
@@ -271,7 +271,7 @@ func BroadcastFloat32x16(x float32) Float32x16 {
// Emulated, CPU Feature: AVX512F
func BroadcastFloat64x8(x float64) Float64x8 {
var z Float64x2
- return z.SetElem(0, x).Broadcast512()
+ return z.SetElem(0, x).Broadcast1To8()
}
// ToMask converts from Int8x16 to Mask8x16, mask element is set to true when the corresponding vector element is non-zero.