diff options
| author | Junyang Shao <shaojunyang@google.com> | 2026-01-08 18:14:02 +0000 |
|---|---|---|
| committer | Junyang Shao <shaojunyang@google.com> | 2026-01-08 18:14:02 +0000 |
| commit | 216d147a6de31c46d46857096d65c1c97c2ab25c (patch) | |
| tree | 798f6094df8a12e9314c96f1e70b29e06407f04d | |
| parent | b8191a2f9893220bdbe52ecebb37e293847d98f5 (diff) | |
| parent | 6b2505c79cb3838c6e27cf47ac09980fe51c83c2 (diff) | |
| download | go-216d147a6de31c46d46857096d65c1c97c2ab25c.tar.xz | |
[release-branch.go1.26] all: merge master (6b2505c) into release-branch.go1.26
Merge List:
+ 2026-01-08 6b2505c79c cmd/go: remove user-content from doc strings in cgo ASTs.
+ 2026-01-08 4b89bcb8b7 lib/fips140: freeze v1.26.0 FIPS 140-3 module
+ 2026-01-08 8ac4477d83 simd/archsimd: rename Broadcast methods
+ 2026-01-08 5facb3b24b internal/types: add test for cycles in value context
+ 2026-01-07 28147b5283 cmd/go: guarantee a minimum of min(4,GOMAXPROCS) to compile -c
+ 2026-01-07 874d8b98eb cmd/go/internal/work: decrement concurrentProcesses when action finishes
+ 2026-01-07 d1e7f49e3d internal/trace: fix recorder.Write return value for header-only buffers
Change-Id: I863375a1ac0f641b0b02968acf01a602b7d7f2a1
| -rw-r--r-- | lib/fips140/fips140.sum | 2 | ||||
| -rw-r--r-- | lib/fips140/v1.26.0.zip (renamed from lib/fips140/v1.1.0-rc1.zip) | bin | 678896 -> 676132 bytes | |||
| -rw-r--r-- | src/cmd/cgo/ast.go | 11 | ||||
| -rw-r--r-- | src/cmd/compile/internal/amd64/simdssa.go | 88 | ||||
| -rw-r--r-- | src/cmd/compile/internal/ssa/_gen/simdAMD64.rules | 82 | ||||
| -rw-r--r-- | src/cmd/compile/internal/ssa/_gen/simdgenericOps.go | 60 | ||||
| -rw-r--r-- | src/cmd/compile/internal/ssa/opGen.go | 120 | ||||
| -rw-r--r-- | src/cmd/compile/internal/ssa/rewriteAMD64.go | 112 | ||||
| -rw-r--r-- | src/cmd/compile/internal/ssagen/simdintrinsics.go | 60 | ||||
| -rw-r--r-- | src/cmd/go/internal/work/exec.go | 5 | ||||
| -rw-r--r-- | src/cmd/go/internal/work/gc.go | 20 | ||||
| -rw-r--r-- | src/internal/types/testdata/check/cycles6.go | 71 | ||||
| -rw-r--r-- | src/runtime/trace/recorder.go | 2 | ||||
| -rw-r--r-- | src/simd/archsimd/_gen/simdgen/ops/Moves/categories.yaml | 33 | ||||
| -rw-r--r-- | src/simd/archsimd/_gen/simdgen/ops/Moves/go.yaml | 79 | ||||
| -rw-r--r-- | src/simd/archsimd/_gen/tmplgen/main.go | 2 | ||||
| -rw-r--r-- | src/simd/archsimd/ops_amd64.go | 240 | ||||
| -rw-r--r-- | src/simd/archsimd/other_gen_amd64.go | 60 |
18 files changed, 584 insertions, 463 deletions
diff --git a/lib/fips140/fips140.sum b/lib/fips140/fips140.sum index c4d185da73..050957af60 100644 --- a/lib/fips140/fips140.sum +++ b/lib/fips140/fips140.sum @@ -10,4 +10,4 @@ # go test cmd/go/internal/fips140 -update # v1.0.0-c2097c7c.zip daf3614e0406f67ae6323c902db3f953a1effb199142362a039e7526dfb9368b -v1.1.0-rc1.zip ea94f8c3885294c9efe1bd8f9b6e86daeb25b6aff2aeb20707cd9a5101f6f54e +v1.26.0.zip 9b28f847fdf1db4a36cb2b2f8ec09443c039383f085630a03ecfaddf6db7ea23 diff --git a/lib/fips140/v1.1.0-rc1.zip b/lib/fips140/v1.26.0.zip Binary files differindex d4264bdb2e..f53ade8036 100644 --- a/lib/fips140/v1.1.0-rc1.zip +++ b/lib/fips140/v1.26.0.zip diff --git a/src/cmd/cgo/ast.go b/src/cmd/cgo/ast.go index 2da6ca5a30..df0552f525 100644 --- a/src/cmd/cgo/ast.go +++ b/src/cmd/cgo/ast.go @@ -301,17 +301,12 @@ func (f *File) saveExport(x any, context astContext) { error_(c.Pos(), "export comment has wrong name %q, want %q", name, n.Name.Name) } - doc := "" - for _, c1 := range n.Doc.List { - if c1 != c { - doc += c1.Text + "\n" - } - } - f.ExpFunc = append(f.ExpFunc, &ExpFunc{ Func: n, ExpName: name, - Doc: doc, + // Caution: Do not set the Doc field on purpose + // to ensure that there are no unintended artifacts + // in the binary. See https://go.dev/issue/76697. }) break } diff --git a/src/cmd/compile/internal/amd64/simdssa.go b/src/cmd/compile/internal/amd64/simdssa.go index c4d0fd69c6..a028cbe86d 100644 --- a/src/cmd/compile/internal/amd64/simdssa.go +++ b/src/cmd/compile/internal/amd64/simdssa.go @@ -25,23 +25,23 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool { ssa.OpAMD64VPABSQ128, ssa.OpAMD64VPABSQ256, ssa.OpAMD64VPABSQ512, - ssa.OpAMD64VBROADCASTSS128, ssa.OpAMD64VPBROADCASTQ128, - ssa.OpAMD64VPBROADCASTB128, - ssa.OpAMD64VPBROADCASTW128, + ssa.OpAMD64VBROADCASTSS128, + ssa.OpAMD64VBROADCASTSD256, ssa.OpAMD64VPBROADCASTD128, + ssa.OpAMD64VPBROADCASTQ256, ssa.OpAMD64VBROADCASTSS256, - ssa.OpAMD64VBROADCASTSD256, - ssa.OpAMD64VPBROADCASTB256, - ssa.OpAMD64VPBROADCASTW256, + ssa.OpAMD64VBROADCASTSD512, + ssa.OpAMD64VPBROADCASTW128, ssa.OpAMD64VPBROADCASTD256, - ssa.OpAMD64VPBROADCASTQ256, + ssa.OpAMD64VPBROADCASTQ512, ssa.OpAMD64VBROADCASTSS512, - ssa.OpAMD64VBROADCASTSD512, - ssa.OpAMD64VPBROADCASTB512, - ssa.OpAMD64VPBROADCASTW512, + ssa.OpAMD64VPBROADCASTB128, + ssa.OpAMD64VPBROADCASTW256, ssa.OpAMD64VPBROADCASTD512, - ssa.OpAMD64VPBROADCASTQ512, + ssa.OpAMD64VPBROADCASTB256, + ssa.OpAMD64VPBROADCASTW512, + ssa.OpAMD64VPBROADCASTB512, ssa.OpAMD64VCVTPD2PSX128, ssa.OpAMD64VCVTPD2PSY128, ssa.OpAMD64VCVTPD2PS256, @@ -832,23 +832,23 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool { ssa.OpAMD64VPABSQMasked128, ssa.OpAMD64VPABSQMasked256, ssa.OpAMD64VPABSQMasked512, - ssa.OpAMD64VBROADCASTSSMasked128, ssa.OpAMD64VPBROADCASTQMasked128, - ssa.OpAMD64VPBROADCASTBMasked128, - ssa.OpAMD64VPBROADCASTWMasked128, + ssa.OpAMD64VBROADCASTSSMasked128, + ssa.OpAMD64VBROADCASTSDMasked256, ssa.OpAMD64VPBROADCASTDMasked128, + ssa.OpAMD64VPBROADCASTQMasked256, ssa.OpAMD64VBROADCASTSSMasked256, - ssa.OpAMD64VBROADCASTSDMasked256, - ssa.OpAMD64VPBROADCASTBMasked256, - ssa.OpAMD64VPBROADCASTWMasked256, + ssa.OpAMD64VBROADCASTSDMasked512, + ssa.OpAMD64VPBROADCASTWMasked128, ssa.OpAMD64VPBROADCASTDMasked256, - ssa.OpAMD64VPBROADCASTQMasked256, + ssa.OpAMD64VPBROADCASTQMasked512, ssa.OpAMD64VBROADCASTSSMasked512, - ssa.OpAMD64VBROADCASTSDMasked512, - ssa.OpAMD64VPBROADCASTBMasked512, - ssa.OpAMD64VPBROADCASTWMasked512, + ssa.OpAMD64VPBROADCASTBMasked128, + ssa.OpAMD64VPBROADCASTWMasked256, ssa.OpAMD64VPBROADCASTDMasked512, - ssa.OpAMD64VPBROADCASTQMasked512, + ssa.OpAMD64VPBROADCASTBMasked256, + ssa.OpAMD64VPBROADCASTWMasked512, + ssa.OpAMD64VPBROADCASTBMasked512, ssa.OpAMD64VCOMPRESSPSMasked128, ssa.OpAMD64VCOMPRESSPSMasked256, ssa.OpAMD64VCOMPRESSPSMasked512, @@ -2460,23 +2460,23 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool { ssa.OpAMD64VPABSQMasked128Merging, ssa.OpAMD64VPABSQMasked256Merging, ssa.OpAMD64VPABSQMasked512Merging, - ssa.OpAMD64VBROADCASTSSMasked128Merging, ssa.OpAMD64VPBROADCASTQMasked128Merging, - ssa.OpAMD64VPBROADCASTBMasked128Merging, - ssa.OpAMD64VPBROADCASTWMasked128Merging, + ssa.OpAMD64VBROADCASTSSMasked128Merging, + ssa.OpAMD64VBROADCASTSDMasked256Merging, ssa.OpAMD64VPBROADCASTDMasked128Merging, + ssa.OpAMD64VPBROADCASTQMasked256Merging, ssa.OpAMD64VBROADCASTSSMasked256Merging, - ssa.OpAMD64VBROADCASTSDMasked256Merging, - ssa.OpAMD64VPBROADCASTBMasked256Merging, - ssa.OpAMD64VPBROADCASTWMasked256Merging, + ssa.OpAMD64VBROADCASTSDMasked512Merging, + ssa.OpAMD64VPBROADCASTWMasked128Merging, ssa.OpAMD64VPBROADCASTDMasked256Merging, - ssa.OpAMD64VPBROADCASTQMasked256Merging, + ssa.OpAMD64VPBROADCASTQMasked512Merging, ssa.OpAMD64VBROADCASTSSMasked512Merging, - ssa.OpAMD64VBROADCASTSDMasked512Merging, - ssa.OpAMD64VPBROADCASTBMasked512Merging, - ssa.OpAMD64VPBROADCASTWMasked512Merging, + ssa.OpAMD64VPBROADCASTBMasked128Merging, + ssa.OpAMD64VPBROADCASTWMasked256Merging, ssa.OpAMD64VPBROADCASTDMasked512Merging, - ssa.OpAMD64VPBROADCASTQMasked512Merging, + ssa.OpAMD64VPBROADCASTBMasked256Merging, + ssa.OpAMD64VPBROADCASTWMasked512Merging, + ssa.OpAMD64VPBROADCASTBMasked512Merging, ssa.OpAMD64VRNDSCALEPSMasked128Merging, ssa.OpAMD64VRNDSCALEPSMasked256Merging, ssa.OpAMD64VRNDSCALEPSMasked512Merging, @@ -2817,23 +2817,23 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool { ssa.OpAMD64VPAVGWMasked128, ssa.OpAMD64VPAVGWMasked256, ssa.OpAMD64VPAVGWMasked512, - ssa.OpAMD64VBROADCASTSSMasked128, ssa.OpAMD64VPBROADCASTQMasked128, - ssa.OpAMD64VPBROADCASTBMasked128, - ssa.OpAMD64VPBROADCASTWMasked128, + ssa.OpAMD64VBROADCASTSSMasked128, + ssa.OpAMD64VBROADCASTSDMasked256, ssa.OpAMD64VPBROADCASTDMasked128, + ssa.OpAMD64VPBROADCASTQMasked256, ssa.OpAMD64VBROADCASTSSMasked256, - ssa.OpAMD64VBROADCASTSDMasked256, - ssa.OpAMD64VPBROADCASTBMasked256, - ssa.OpAMD64VPBROADCASTWMasked256, + ssa.OpAMD64VBROADCASTSDMasked512, + ssa.OpAMD64VPBROADCASTWMasked128, ssa.OpAMD64VPBROADCASTDMasked256, - ssa.OpAMD64VPBROADCASTQMasked256, + ssa.OpAMD64VPBROADCASTQMasked512, ssa.OpAMD64VBROADCASTSSMasked512, - ssa.OpAMD64VBROADCASTSDMasked512, - ssa.OpAMD64VPBROADCASTBMasked512, - ssa.OpAMD64VPBROADCASTWMasked512, + ssa.OpAMD64VPBROADCASTBMasked128, + ssa.OpAMD64VPBROADCASTWMasked256, ssa.OpAMD64VPBROADCASTDMasked512, - ssa.OpAMD64VPBROADCASTQMasked512, + ssa.OpAMD64VPBROADCASTBMasked256, + ssa.OpAMD64VPBROADCASTWMasked512, + ssa.OpAMD64VPBROADCASTBMasked512, ssa.OpAMD64VRNDSCALEPSMasked128, ssa.OpAMD64VRNDSCALEPSMasked128load, ssa.OpAMD64VRNDSCALEPSMasked256, diff --git a/src/cmd/compile/internal/ssa/_gen/simdAMD64.rules b/src/cmd/compile/internal/ssa/_gen/simdAMD64.rules index 5c83f39a1f..799461610d 100644 --- a/src/cmd/compile/internal/ssa/_gen/simdAMD64.rules +++ b/src/cmd/compile/internal/ssa/_gen/simdAMD64.rules @@ -140,36 +140,36 @@ (AverageUint16x8 ...) => (VPAVGW128 ...) (AverageUint16x16 ...) => (VPAVGW256 ...) (AverageUint16x32 ...) => (VPAVGW512 ...) -(Broadcast128Float32x4 ...) => (VBROADCASTSS128 ...) -(Broadcast128Float64x2 ...) => (VPBROADCASTQ128 ...) -(Broadcast128Int8x16 ...) => (VPBROADCASTB128 ...) -(Broadcast128Int16x8 ...) => (VPBROADCASTW128 ...) -(Broadcast128Int32x4 ...) => (VPBROADCASTD128 ...) -(Broadcast128Int64x2 ...) => (VPBROADCASTQ128 ...) -(Broadcast128Uint8x16 ...) => (VPBROADCASTB128 ...) -(Broadcast128Uint16x8 ...) => (VPBROADCASTW128 ...) -(Broadcast128Uint32x4 ...) => (VPBROADCASTD128 ...) -(Broadcast128Uint64x2 ...) => (VPBROADCASTQ128 ...) -(Broadcast256Float32x4 ...) => (VBROADCASTSS256 ...) -(Broadcast256Float64x2 ...) => (VBROADCASTSD256 ...) -(Broadcast256Int8x16 ...) => (VPBROADCASTB256 ...) -(Broadcast256Int16x8 ...) => (VPBROADCASTW256 ...) -(Broadcast256Int32x4 ...) => (VPBROADCASTD256 ...) -(Broadcast256Int64x2 ...) => (VPBROADCASTQ256 ...) -(Broadcast256Uint8x16 ...) => (VPBROADCASTB256 ...) -(Broadcast256Uint16x8 ...) => (VPBROADCASTW256 ...) -(Broadcast256Uint32x4 ...) => (VPBROADCASTD256 ...) -(Broadcast256Uint64x2 ...) => (VPBROADCASTQ256 ...) -(Broadcast512Float32x4 ...) => (VBROADCASTSS512 ...) -(Broadcast512Float64x2 ...) => (VBROADCASTSD512 ...) -(Broadcast512Int8x16 ...) => (VPBROADCASTB512 ...) -(Broadcast512Int16x8 ...) => (VPBROADCASTW512 ...) -(Broadcast512Int32x4 ...) => (VPBROADCASTD512 ...) -(Broadcast512Int64x2 ...) => (VPBROADCASTQ512 ...) -(Broadcast512Uint8x16 ...) => (VPBROADCASTB512 ...) -(Broadcast512Uint16x8 ...) => (VPBROADCASTW512 ...) -(Broadcast512Uint32x4 ...) => (VPBROADCASTD512 ...) -(Broadcast512Uint64x2 ...) => (VPBROADCASTQ512 ...) +(Broadcast1To2Float64x2 ...) => (VPBROADCASTQ128 ...) +(Broadcast1To2Int64x2 ...) => (VPBROADCASTQ128 ...) +(Broadcast1To2Uint64x2 ...) => (VPBROADCASTQ128 ...) +(Broadcast1To4Float32x4 ...) => (VBROADCASTSS128 ...) +(Broadcast1To4Float64x2 ...) => (VBROADCASTSD256 ...) +(Broadcast1To4Int32x4 ...) => (VPBROADCASTD128 ...) +(Broadcast1To4Int64x2 ...) => (VPBROADCASTQ256 ...) +(Broadcast1To4Uint32x4 ...) => (VPBROADCASTD128 ...) +(Broadcast1To4Uint64x2 ...) => (VPBROADCASTQ256 ...) +(Broadcast1To8Float32x4 ...) => (VBROADCASTSS256 ...) +(Broadcast1To8Float64x2 ...) => (VBROADCASTSD512 ...) +(Broadcast1To8Int16x8 ...) => (VPBROADCASTW128 ...) +(Broadcast1To8Int32x4 ...) => (VPBROADCASTD256 ...) +(Broadcast1To8Int64x2 ...) => (VPBROADCASTQ512 ...) +(Broadcast1To8Uint16x8 ...) => (VPBROADCASTW128 ...) +(Broadcast1To8Uint32x4 ...) => (VPBROADCASTD256 ...) +(Broadcast1To8Uint64x2 ...) => (VPBROADCASTQ512 ...) +(Broadcast1To16Float32x4 ...) => (VBROADCASTSS512 ...) +(Broadcast1To16Int8x16 ...) => (VPBROADCASTB128 ...) +(Broadcast1To16Int16x8 ...) => (VPBROADCASTW256 ...) +(Broadcast1To16Int32x4 ...) => (VPBROADCASTD512 ...) +(Broadcast1To16Uint8x16 ...) => (VPBROADCASTB128 ...) +(Broadcast1To16Uint16x8 ...) => (VPBROADCASTW256 ...) +(Broadcast1To16Uint32x4 ...) => (VPBROADCASTD512 ...) +(Broadcast1To32Int8x16 ...) => (VPBROADCASTB256 ...) +(Broadcast1To32Int16x8 ...) => (VPBROADCASTW512 ...) +(Broadcast1To32Uint8x16 ...) => (VPBROADCASTB256 ...) +(Broadcast1To32Uint16x8 ...) => (VPBROADCASTW512 ...) +(Broadcast1To64Int8x16 ...) => (VPBROADCASTB512 ...) +(Broadcast1To64Uint8x16 ...) => (VPBROADCASTB512 ...) (CeilFloat32x4 x) => (VROUNDPS128 [2] x) (CeilFloat32x8 x) => (VROUNDPS256 [2] x) (CeilFloat64x2 x) => (VROUNDPD128 [2] x) @@ -1424,23 +1424,23 @@ (VMOVDQU16Masked128 (VPAVGW128 x y) mask) => (VPAVGWMasked128 x y mask) (VMOVDQU16Masked256 (VPAVGW256 x y) mask) => (VPAVGWMasked256 x y mask) (VMOVDQU16Masked512 (VPAVGW512 x y) mask) => (VPAVGWMasked512 x y mask) -(VMOVDQU32Masked128 (VBROADCASTSS128 x) mask) => (VBROADCASTSSMasked128 x mask) (VMOVDQU64Masked128 (VPBROADCASTQ128 x) mask) => (VPBROADCASTQMasked128 x mask) -(VMOVDQU8Masked128 (VPBROADCASTB128 x) mask) => (VPBROADCASTBMasked128 x mask) -(VMOVDQU16Masked128 (VPBROADCASTW128 x) mask) => (VPBROADCASTWMasked128 x mask) +(VMOVDQU32Masked128 (VBROADCASTSS128 x) mask) => (VBROADCASTSSMasked128 x mask) +(VMOVDQU64Masked256 (VBROADCASTSD256 x) mask) => (VBROADCASTSDMasked256 x mask) (VMOVDQU32Masked128 (VPBROADCASTD128 x) mask) => (VPBROADCASTDMasked128 x mask) +(VMOVDQU64Masked256 (VPBROADCASTQ256 x) mask) => (VPBROADCASTQMasked256 x mask) (VMOVDQU32Masked256 (VBROADCASTSS256 x) mask) => (VBROADCASTSSMasked256 x mask) -(VMOVDQU64Masked256 (VBROADCASTSD256 x) mask) => (VBROADCASTSDMasked256 x mask) -(VMOVDQU8Masked256 (VPBROADCASTB256 x) mask) => (VPBROADCASTBMasked256 x mask) -(VMOVDQU16Masked256 (VPBROADCASTW256 x) mask) => (VPBROADCASTWMasked256 x mask) +(VMOVDQU64Masked512 (VBROADCASTSD512 x) mask) => (VBROADCASTSDMasked512 x mask) +(VMOVDQU16Masked128 (VPBROADCASTW128 x) mask) => (VPBROADCASTWMasked128 x mask) (VMOVDQU32Masked256 (VPBROADCASTD256 x) mask) => (VPBROADCASTDMasked256 x mask) -(VMOVDQU64Masked256 (VPBROADCASTQ256 x) mask) => (VPBROADCASTQMasked256 x mask) +(VMOVDQU64Masked512 (VPBROADCASTQ512 x) mask) => (VPBROADCASTQMasked512 x mask) (VMOVDQU32Masked512 (VBROADCASTSS512 x) mask) => (VBROADCASTSSMasked512 x mask) -(VMOVDQU64Masked512 (VBROADCASTSD512 x) mask) => (VBROADCASTSDMasked512 x mask) -(VMOVDQU8Masked512 (VPBROADCASTB512 x) mask) => (VPBROADCASTBMasked512 x mask) -(VMOVDQU16Masked512 (VPBROADCASTW512 x) mask) => (VPBROADCASTWMasked512 x mask) +(VMOVDQU8Masked128 (VPBROADCASTB128 x) mask) => (VPBROADCASTBMasked128 x mask) +(VMOVDQU16Masked256 (VPBROADCASTW256 x) mask) => (VPBROADCASTWMasked256 x mask) (VMOVDQU32Masked512 (VPBROADCASTD512 x) mask) => (VPBROADCASTDMasked512 x mask) -(VMOVDQU64Masked512 (VPBROADCASTQ512 x) mask) => (VPBROADCASTQMasked512 x mask) +(VMOVDQU8Masked256 (VPBROADCASTB256 x) mask) => (VPBROADCASTBMasked256 x mask) +(VMOVDQU16Masked512 (VPBROADCASTW512 x) mask) => (VPBROADCASTWMasked512 x mask) +(VMOVDQU8Masked512 (VPBROADCASTB512 x) mask) => (VPBROADCASTBMasked512 x mask) (VMOVDQU32Masked128 (VRNDSCALEPS128 [a] x) mask) => (VRNDSCALEPSMasked128 [a] x mask) (VMOVDQU32Masked256 (VRNDSCALEPS256 [a] x) mask) => (VRNDSCALEPSMasked256 [a] x mask) (VMOVDQU32Masked512 (VRNDSCALEPS512 [a] x) mask) => (VRNDSCALEPSMasked512 [a] x mask) diff --git a/src/cmd/compile/internal/ssa/_gen/simdgenericOps.go b/src/cmd/compile/internal/ssa/_gen/simdgenericOps.go index 889ab0d84f..ff863a389f 100644 --- a/src/cmd/compile/internal/ssa/_gen/simdgenericOps.go +++ b/src/cmd/compile/internal/ssa/_gen/simdgenericOps.go @@ -143,36 +143,36 @@ func simdGenericOps() []opData { {name: "AverageUint16x8", argLength: 2, commutative: true}, {name: "AverageUint16x16", argLength: 2, commutative: true}, {name: "AverageUint16x32", argLength: 2, commutative: true}, - {name: "Broadcast128Float32x4", argLength: 1, commutative: false}, - {name: "Broadcast128Float64x2", argLength: 1, commutative: false}, - {name: "Broadcast128Int8x16", argLength: 1, commutative: false}, - {name: "Broadcast128Int16x8", argLength: 1, commutative: false}, - {name: "Broadcast128Int32x4", argLength: 1, commutative: false}, - {name: "Broadcast128Int64x2", argLength: 1, commutative: false}, - {name: "Broadcast128Uint8x16", argLength: 1, commutative: false}, - {name: "Broadcast128Uint16x8", argLength: 1, commutative: false}, - {name: "Broadcast128Uint32x4", argLength: 1, commutative: false}, - {name: "Broadcast128Uint64x2", argLength: 1, commutative: false}, - {name: "Broadcast256Float32x4", argLength: 1, commutative: false}, - {name: "Broadcast256Float64x2", argLength: 1, commutative: false}, - {name: "Broadcast256Int8x16", argLength: 1, commutative: false}, - {name: "Broadcast256Int16x8", argLength: 1, commutative: false}, - {name: "Broadcast256Int32x4", argLength: 1, commutative: false}, - {name: "Broadcast256Int64x2", argLength: 1, commutative: false}, - {name: "Broadcast256Uint8x16", argLength: 1, commutative: false}, - {name: "Broadcast256Uint16x8", argLength: 1, commutative: false}, - {name: "Broadcast256Uint32x4", argLength: 1, commutative: false}, - {name: "Broadcast256Uint64x2", argLength: 1, commutative: false}, - {name: "Broadcast512Float32x4", argLength: 1, commutative: false}, - {name: "Broadcast512Float64x2", argLength: 1, commutative: false}, - {name: "Broadcast512Int8x16", argLength: 1, commutative: false}, - {name: "Broadcast512Int16x8", argLength: 1, commutative: false}, - {name: "Broadcast512Int32x4", argLength: 1, commutative: false}, - {name: "Broadcast512Int64x2", argLength: 1, commutative: false}, - {name: "Broadcast512Uint8x16", argLength: 1, commutative: false}, - {name: "Broadcast512Uint16x8", argLength: 1, commutative: false}, - {name: "Broadcast512Uint32x4", argLength: 1, commutative: false}, - {name: "Broadcast512Uint64x2", argLength: 1, commutative: false}, + {name: "Broadcast1To2Float64x2", argLength: 1, commutative: false}, + {name: "Broadcast1To2Int64x2", argLength: 1, commutative: false}, + {name: "Broadcast1To2Uint64x2", argLength: 1, commutative: false}, + {name: "Broadcast1To4Float32x4", argLength: 1, commutative: false}, + {name: "Broadcast1To4Float64x2", argLength: 1, commutative: false}, + {name: "Broadcast1To4Int32x4", argLength: 1, commutative: false}, + {name: "Broadcast1To4Int64x2", argLength: 1, commutative: false}, + {name: "Broadcast1To4Uint32x4", argLength: 1, commutative: false}, + {name: "Broadcast1To4Uint64x2", argLength: 1, commutative: false}, + {name: "Broadcast1To8Float32x4", argLength: 1, commutative: false}, + {name: "Broadcast1To8Float64x2", argLength: 1, commutative: false}, + {name: "Broadcast1To8Int16x8", argLength: 1, commutative: false}, + {name: "Broadcast1To8Int32x4", argLength: 1, commutative: false}, + {name: "Broadcast1To8Int64x2", argLength: 1, commutative: false}, + {name: "Broadcast1To8Uint16x8", argLength: 1, commutative: false}, + {name: "Broadcast1To8Uint32x4", argLength: 1, commutative: false}, + {name: "Broadcast1To8Uint64x2", argLength: 1, commutative: false}, + {name: "Broadcast1To16Float32x4", argLength: 1, commutative: false}, + {name: "Broadcast1To16Int8x16", argLength: 1, commutative: false}, + {name: "Broadcast1To16Int16x8", argLength: 1, commutative: false}, + {name: "Broadcast1To16Int32x4", argLength: 1, commutative: false}, + {name: "Broadcast1To16Uint8x16", argLength: 1, commutative: false}, + {name: "Broadcast1To16Uint16x8", argLength: 1, commutative: false}, + {name: "Broadcast1To16Uint32x4", argLength: 1, commutative: false}, + {name: "Broadcast1To32Int8x16", argLength: 1, commutative: false}, + {name: "Broadcast1To32Int16x8", argLength: 1, commutative: false}, + {name: "Broadcast1To32Uint8x16", argLength: 1, commutative: false}, + {name: "Broadcast1To32Uint16x8", argLength: 1, commutative: false}, + {name: "Broadcast1To64Int8x16", argLength: 1, commutative: false}, + {name: "Broadcast1To64Uint8x16", argLength: 1, commutative: false}, {name: "CeilFloat32x4", argLength: 1, commutative: false}, {name: "CeilFloat32x8", argLength: 1, commutative: false}, {name: "CeilFloat64x2", argLength: 1, commutative: false}, diff --git a/src/cmd/compile/internal/ssa/opGen.go b/src/cmd/compile/internal/ssa/opGen.go index 7b70dc2686..9e5fdb1fc1 100644 --- a/src/cmd/compile/internal/ssa/opGen.go +++ b/src/cmd/compile/internal/ssa/opGen.go @@ -6309,36 +6309,36 @@ const ( OpAverageUint16x8 OpAverageUint16x16 OpAverageUint16x32 - OpBroadcast128Float32x4 - OpBroadcast128Float64x2 - OpBroadcast128Int8x16 - OpBroadcast128Int16x8 - OpBroadcast128Int32x4 - OpBroadcast128Int64x2 - OpBroadcast128Uint8x16 - OpBroadcast128Uint16x8 - OpBroadcast128Uint32x4 - OpBroadcast128Uint64x2 - OpBroadcast256Float32x4 - OpBroadcast256Float64x2 - OpBroadcast256Int8x16 - OpBroadcast256Int16x8 - OpBroadcast256Int32x4 - OpBroadcast256Int64x2 - OpBroadcast256Uint8x16 - OpBroadcast256Uint16x8 - OpBroadcast256Uint32x4 - OpBroadcast256Uint64x2 - OpBroadcast512Float32x4 - OpBroadcast512Float64x2 - OpBroadcast512Int8x16 - OpBroadcast512Int16x8 - OpBroadcast512Int32x4 - OpBroadcast512Int64x2 - OpBroadcast512Uint8x16 - OpBroadcast512Uint16x8 - OpBroadcast512Uint32x4 - OpBroadcast512Uint64x2 + OpBroadcast1To2Float64x2 + OpBroadcast1To2Int64x2 + OpBroadcast1To2Uint64x2 + OpBroadcast1To4Float32x4 + OpBroadcast1To4Float64x2 + OpBroadcast1To4Int32x4 + OpBroadcast1To4Int64x2 + OpBroadcast1To4Uint32x4 + OpBroadcast1To4Uint64x2 + OpBroadcast1To8Float32x4 + OpBroadcast1To8Float64x2 + OpBroadcast1To8Int16x8 + OpBroadcast1To8Int32x4 + OpBroadcast1To8Int64x2 + OpBroadcast1To8Uint16x8 + OpBroadcast1To8Uint32x4 + OpBroadcast1To8Uint64x2 + OpBroadcast1To16Float32x4 + OpBroadcast1To16Int8x16 + OpBroadcast1To16Int16x8 + OpBroadcast1To16Int32x4 + OpBroadcast1To16Uint8x16 + OpBroadcast1To16Uint16x8 + OpBroadcast1To16Uint32x4 + OpBroadcast1To32Int8x16 + OpBroadcast1To32Int16x8 + OpBroadcast1To32Uint8x16 + OpBroadcast1To32Uint16x8 + OpBroadcast1To64Int8x16 + OpBroadcast1To64Uint8x16 OpCeilFloat32x4 OpCeilFloat32x8 OpCeilFloat64x2 @@ -89875,152 +89875,152 @@ var opcodeTable = [...]opInfo{ generic: true, }, { - name: "Broadcast128Float32x4", + name: "Broadcast1To2Float64x2", argLen: 1, generic: true, }, { - name: "Broadcast128Float64x2", + name: "Broadcast1To2Int64x2", argLen: 1, generic: true, }, { - name: "Broadcast128Int8x16", + name: "Broadcast1To2Uint64x2", argLen: 1, generic: true, }, { - name: "Broadcast128Int16x8", + name: "Broadcast1To4Float32x4", argLen: 1, generic: true, }, { - name: "Broadcast128Int32x4", + name: "Broadcast1To4Float64x2", argLen: 1, generic: true, }, { - name: "Broadcast128Int64x2", + name: "Broadcast1To4Int32x4", argLen: 1, generic: true, }, { - name: "Broadcast128Uint8x16", + name: "Broadcast1To4Int64x2", argLen: 1, generic: true, }, { - name: "Broadcast128Uint16x8", + name: "Broadcast1To4Uint32x4", argLen: 1, generic: true, }, { - name: "Broadcast128Uint32x4", + name: "Broadcast1To4Uint64x2", argLen: 1, generic: true, }, { - name: "Broadcast128Uint64x2", + name: "Broadcast1To8Float32x4", argLen: 1, generic: true, }, { - name: "Broadcast256Float32x4", + name: "Broadcast1To8Float64x2", argLen: 1, generic: true, }, { - name: "Broadcast256Float64x2", + name: "Broadcast1To8Int16x8", argLen: 1, generic: true, }, { - name: "Broadcast256Int8x16", + name: "Broadcast1To8Int32x4", argLen: 1, generic: true, }, { - name: "Broadcast256Int16x8", + name: "Broadcast1To8Int64x2", argLen: 1, generic: true, }, { - name: "Broadcast256Int32x4", + name: "Broadcast1To8Uint16x8", argLen: 1, generic: true, }, { - name: "Broadcast256Int64x2", + name: "Broadcast1To8Uint32x4", argLen: 1, generic: true, }, { - name: "Broadcast256Uint8x16", + name: "Broadcast1To8Uint64x2", argLen: 1, generic: true, }, { - name: "Broadcast256Uint16x8", + name: "Broadcast1To16Float32x4", argLen: 1, generic: true, }, { - name: "Broadcast256Uint32x4", + name: "Broadcast1To16Int8x16", argLen: 1, generic: true, }, { - name: "Broadcast256Uint64x2", + name: "Broadcast1To16Int16x8", argLen: 1, generic: true, }, { - name: "Broadcast512Float32x4", + name: "Broadcast1To16Int32x4", argLen: 1, generic: true, }, { - name: "Broadcast512Float64x2", + name: "Broadcast1To16Uint8x16", argLen: 1, generic: true, }, { - name: "Broadcast512Int8x16", + name: "Broadcast1To16Uint16x8", argLen: 1, generic: true, }, { - name: "Broadcast512Int16x8", + name: "Broadcast1To16Uint32x4", argLen: 1, generic: true, }, { - name: "Broadcast512Int32x4", + name: "Broadcast1To32Int8x16", argLen: 1, generic: true, }, { - name: "Broadcast512Int64x2", + name: "Broadcast1To32Int16x8", argLen: 1, generic: true, }, { - name: "Broadcast512Uint8x16", + name: "Broadcast1To32Uint8x16", argLen: 1, generic: true, }, { - name: "Broadcast512Uint16x8", + name: "Broadcast1To32Uint16x8", argLen: 1, generic: true, }, { - name: "Broadcast512Uint32x4", + name: "Broadcast1To64Int8x16", argLen: 1, generic: true, }, { - name: "Broadcast512Uint64x2", + name: "Broadcast1To64Uint8x16", argLen: 1, generic: true, }, diff --git a/src/cmd/compile/internal/ssa/rewriteAMD64.go b/src/cmd/compile/internal/ssa/rewriteAMD64.go index e84bf19c83..fe0005bb05 100644 --- a/src/cmd/compile/internal/ssa/rewriteAMD64.go +++ b/src/cmd/compile/internal/ssa/rewriteAMD64.go @@ -2479,96 +2479,96 @@ func rewriteValueAMD64(v *Value) bool { return rewriteValueAMD64_OpBitLen64(v) case OpBitLen8: return rewriteValueAMD64_OpBitLen8(v) - case OpBroadcast128Float32x4: - v.Op = OpAMD64VBROADCASTSS128 + case OpBroadcast1To16Float32x4: + v.Op = OpAMD64VBROADCASTSS512 return true - case OpBroadcast128Float64x2: - v.Op = OpAMD64VPBROADCASTQ128 + case OpBroadcast1To16Int16x8: + v.Op = OpAMD64VPBROADCASTW256 return true - case OpBroadcast128Int16x8: - v.Op = OpAMD64VPBROADCASTW128 + case OpBroadcast1To16Int32x4: + v.Op = OpAMD64VPBROADCASTD512 return true - case OpBroadcast128Int32x4: - v.Op = OpAMD64VPBROADCASTD128 + case OpBroadcast1To16Int8x16: + v.Op = OpAMD64VPBROADCASTB128 return true - case OpBroadcast128Int64x2: - v.Op = OpAMD64VPBROADCASTQ128 + case OpBroadcast1To16Uint16x8: + v.Op = OpAMD64VPBROADCASTW256 return true - case OpBroadcast128Int8x16: + case OpBroadcast1To16Uint32x4: + v.Op = OpAMD64VPBROADCASTD512 + return true + case OpBroadcast1To16Uint8x16: v.Op = OpAMD64VPBROADCASTB128 return true - case OpBroadcast128Uint16x8: - v.Op = OpAMD64VPBROADCASTW128 + case OpBroadcast1To2Float64x2: + v.Op = OpAMD64VPBROADCASTQ128 return true - case OpBroadcast128Uint32x4: - v.Op = OpAMD64VPBROADCASTD128 + case OpBroadcast1To2Int64x2: + v.Op = OpAMD64VPBROADCASTQ128 return true - case OpBroadcast128Uint64x2: + case OpBroadcast1To2Uint64x2: v.Op = OpAMD64VPBROADCASTQ128 return true - case OpBroadcast128Uint8x16: - v.Op = OpAMD64VPBROADCASTB128 + case OpBroadcast1To32Int16x8: + v.Op = OpAMD64VPBROADCASTW512 return true - case OpBroadcast256Float32x4: - v.Op = OpAMD64VBROADCASTSS256 + case OpBroadcast1To32Int8x16: + v.Op = OpAMD64VPBROADCASTB256 return true - case OpBroadcast256Float64x2: - v.Op = OpAMD64VBROADCASTSD256 + case OpBroadcast1To32Uint16x8: + v.Op = OpAMD64VPBROADCASTW512 return true - case OpBroadcast256Int16x8: - v.Op = OpAMD64VPBROADCASTW256 + case OpBroadcast1To32Uint8x16: + v.Op = OpAMD64VPBROADCASTB256 return true - case OpBroadcast256Int32x4: - v.Op = OpAMD64VPBROADCASTD256 + case OpBroadcast1To4Float32x4: + v.Op = OpAMD64VBROADCASTSS128 return true - case OpBroadcast256Int64x2: - v.Op = OpAMD64VPBROADCASTQ256 + case OpBroadcast1To4Float64x2: + v.Op = OpAMD64VBROADCASTSD256 return true - case OpBroadcast256Int8x16: - v.Op = OpAMD64VPBROADCASTB256 + case OpBroadcast1To4Int32x4: + v.Op = OpAMD64VPBROADCASTD128 return true - case OpBroadcast256Uint16x8: - v.Op = OpAMD64VPBROADCASTW256 + case OpBroadcast1To4Int64x2: + v.Op = OpAMD64VPBROADCASTQ256 return true - case OpBroadcast256Uint32x4: - v.Op = OpAMD64VPBROADCASTD256 + case OpBroadcast1To4Uint32x4: + v.Op = OpAMD64VPBROADCASTD128 return true - case OpBroadcast256Uint64x2: + case OpBroadcast1To4Uint64x2: v.Op = OpAMD64VPBROADCASTQ256 return true - case OpBroadcast256Uint8x16: - v.Op = OpAMD64VPBROADCASTB256 + case OpBroadcast1To64Int8x16: + v.Op = OpAMD64VPBROADCASTB512 return true - case OpBroadcast512Float32x4: - v.Op = OpAMD64VBROADCASTSS512 + case OpBroadcast1To64Uint8x16: + v.Op = OpAMD64VPBROADCASTB512 + return true + case OpBroadcast1To8Float32x4: + v.Op = OpAMD64VBROADCASTSS256 return true - case OpBroadcast512Float64x2: + case OpBroadcast1To8Float64x2: v.Op = OpAMD64VBROADCASTSD512 return true - case OpBroadcast512Int16x8: - v.Op = OpAMD64VPBROADCASTW512 + case OpBroadcast1To8Int16x8: + v.Op = OpAMD64VPBROADCASTW128 return true - case OpBroadcast512Int32x4: - v.Op = OpAMD64VPBROADCASTD512 + case OpBroadcast1To8Int32x4: + v.Op = OpAMD64VPBROADCASTD256 return true - case OpBroadcast512Int64x2: + case OpBroadcast1To8Int64x2: v.Op = OpAMD64VPBROADCASTQ512 return true - case OpBroadcast512Int8x16: - v.Op = OpAMD64VPBROADCASTB512 - return true - case OpBroadcast512Uint16x8: - v.Op = OpAMD64VPBROADCASTW512 + case OpBroadcast1To8Uint16x8: + v.Op = OpAMD64VPBROADCASTW128 return true - case OpBroadcast512Uint32x4: - v.Op = OpAMD64VPBROADCASTD512 + case OpBroadcast1To8Uint32x4: + v.Op = OpAMD64VPBROADCASTD256 return true - case OpBroadcast512Uint64x2: + case OpBroadcast1To8Uint64x2: v.Op = OpAMD64VPBROADCASTQ512 return true - case OpBroadcast512Uint8x16: - v.Op = OpAMD64VPBROADCASTB512 - return true case OpBswap16: return rewriteValueAMD64_OpBswap16(v) case OpBswap32: diff --git a/src/cmd/compile/internal/ssagen/simdintrinsics.go b/src/cmd/compile/internal/ssagen/simdintrinsics.go index 4ad0c6032c..e50561845b 100644 --- a/src/cmd/compile/internal/ssagen/simdintrinsics.go +++ b/src/cmd/compile/internal/ssagen/simdintrinsics.go @@ -152,36 +152,36 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies . addF(simdPackage, "Uint16x8.Average", opLen2(ssa.OpAverageUint16x8, types.TypeVec128), sys.AMD64) addF(simdPackage, "Uint16x16.Average", opLen2(ssa.OpAverageUint16x16, types.TypeVec256), sys.AMD64) addF(simdPackage, "Uint16x32.Average", opLen2(ssa.OpAverageUint16x32, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Float32x4.Broadcast128", opLen1(ssa.OpBroadcast128Float32x4, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Float64x2.Broadcast128", opLen1(ssa.OpBroadcast128Float64x2, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Int8x16.Broadcast128", opLen1(ssa.OpBroadcast128Int8x16, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Int16x8.Broadcast128", opLen1(ssa.OpBroadcast128Int16x8, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Int32x4.Broadcast128", opLen1(ssa.OpBroadcast128Int32x4, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Int64x2.Broadcast128", opLen1(ssa.OpBroadcast128Int64x2, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Uint8x16.Broadcast128", opLen1(ssa.OpBroadcast128Uint8x16, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Uint16x8.Broadcast128", opLen1(ssa.OpBroadcast128Uint16x8, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Uint32x4.Broadcast128", opLen1(ssa.OpBroadcast128Uint32x4, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Uint64x2.Broadcast128", opLen1(ssa.OpBroadcast128Uint64x2, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Float32x4.Broadcast256", opLen1(ssa.OpBroadcast256Float32x4, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Float64x2.Broadcast256", opLen1(ssa.OpBroadcast256Float64x2, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Int8x16.Broadcast256", opLen1(ssa.OpBroadcast256Int8x16, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Int16x8.Broadcast256", opLen1(ssa.OpBroadcast256Int16x8, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Int32x4.Broadcast256", opLen1(ssa.OpBroadcast256Int32x4, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Int64x2.Broadcast256", opLen1(ssa.OpBroadcast256Int64x2, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Uint8x16.Broadcast256", opLen1(ssa.OpBroadcast256Uint8x16, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Uint16x8.Broadcast256", opLen1(ssa.OpBroadcast256Uint16x8, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Uint32x4.Broadcast256", opLen1(ssa.OpBroadcast256Uint32x4, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Uint64x2.Broadcast256", opLen1(ssa.OpBroadcast256Uint64x2, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Float32x4.Broadcast512", opLen1(ssa.OpBroadcast512Float32x4, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Float64x2.Broadcast512", opLen1(ssa.OpBroadcast512Float64x2, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Int8x16.Broadcast512", opLen1(ssa.OpBroadcast512Int8x16, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Int16x8.Broadcast512", opLen1(ssa.OpBroadcast512Int16x8, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Int32x4.Broadcast512", opLen1(ssa.OpBroadcast512Int32x4, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Int64x2.Broadcast512", opLen1(ssa.OpBroadcast512Int64x2, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Uint8x16.Broadcast512", opLen1(ssa.OpBroadcast512Uint8x16, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Uint16x8.Broadcast512", opLen1(ssa.OpBroadcast512Uint16x8, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Uint32x4.Broadcast512", opLen1(ssa.OpBroadcast512Uint32x4, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Uint64x2.Broadcast512", opLen1(ssa.OpBroadcast512Uint64x2, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Float64x2.Broadcast1To2", opLen1(ssa.OpBroadcast1To2Float64x2, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Int64x2.Broadcast1To2", opLen1(ssa.OpBroadcast1To2Int64x2, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Uint64x2.Broadcast1To2", opLen1(ssa.OpBroadcast1To2Uint64x2, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Float32x4.Broadcast1To4", opLen1(ssa.OpBroadcast1To4Float32x4, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Float64x2.Broadcast1To4", opLen1(ssa.OpBroadcast1To4Float64x2, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Int32x4.Broadcast1To4", opLen1(ssa.OpBroadcast1To4Int32x4, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Int64x2.Broadcast1To4", opLen1(ssa.OpBroadcast1To4Int64x2, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Uint32x4.Broadcast1To4", opLen1(ssa.OpBroadcast1To4Uint32x4, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Uint64x2.Broadcast1To4", opLen1(ssa.OpBroadcast1To4Uint64x2, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Float32x4.Broadcast1To8", opLen1(ssa.OpBroadcast1To8Float32x4, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Float64x2.Broadcast1To8", opLen1(ssa.OpBroadcast1To8Float64x2, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Int16x8.Broadcast1To8", opLen1(ssa.OpBroadcast1To8Int16x8, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Int32x4.Broadcast1To8", opLen1(ssa.OpBroadcast1To8Int32x4, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Int64x2.Broadcast1To8", opLen1(ssa.OpBroadcast1To8Int64x2, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Uint16x8.Broadcast1To8", opLen1(ssa.OpBroadcast1To8Uint16x8, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Uint32x4.Broadcast1To8", opLen1(ssa.OpBroadcast1To8Uint32x4, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Uint64x2.Broadcast1To8", opLen1(ssa.OpBroadcast1To8Uint64x2, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Float32x4.Broadcast1To16", opLen1(ssa.OpBroadcast1To16Float32x4, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Int8x16.Broadcast1To16", opLen1(ssa.OpBroadcast1To16Int8x16, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Int16x8.Broadcast1To16", opLen1(ssa.OpBroadcast1To16Int16x8, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Int32x4.Broadcast1To16", opLen1(ssa.OpBroadcast1To16Int32x4, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Uint8x16.Broadcast1To16", opLen1(ssa.OpBroadcast1To16Uint8x16, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Uint16x8.Broadcast1To16", opLen1(ssa.OpBroadcast1To16Uint16x8, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Uint32x4.Broadcast1To16", opLen1(ssa.OpBroadcast1To16Uint32x4, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Int8x16.Broadcast1To32", opLen1(ssa.OpBroadcast1To32Int8x16, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Int16x8.Broadcast1To32", opLen1(ssa.OpBroadcast1To32Int16x8, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Uint8x16.Broadcast1To32", opLen1(ssa.OpBroadcast1To32Uint8x16, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Uint16x8.Broadcast1To32", opLen1(ssa.OpBroadcast1To32Uint16x8, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Int8x16.Broadcast1To64", opLen1(ssa.OpBroadcast1To64Int8x16, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Uint8x16.Broadcast1To64", opLen1(ssa.OpBroadcast1To64Uint8x16, types.TypeVec512), sys.AMD64) addF(simdPackage, "Float32x4.Ceil", opLen1(ssa.OpCeilFloat32x4, types.TypeVec128), sys.AMD64) addF(simdPackage, "Float32x8.Ceil", opLen1(ssa.OpCeilFloat32x8, types.TypeVec256), sys.AMD64) addF(simdPackage, "Float64x2.Ceil", opLen1(ssa.OpCeilFloat64x2, types.TypeVec128), sys.AMD64) diff --git a/src/cmd/go/internal/work/exec.go b/src/cmd/go/internal/work/exec.go index 654e9e9374..f2d1b1040b 100644 --- a/src/cmd/go/internal/work/exec.go +++ b/src/cmd/go/internal/work/exec.go @@ -248,6 +248,11 @@ func (b *Builder) Do(ctx context.Context, root *Action) { wg.Wait() + if tokens != totalTokens || concurrentProcesses != 0 { + base.Fatalf("internal error: tokens not restored at end of build: tokens: %d, totalTokens: %d, concurrentProcesses: %d", + tokens, totalTokens, concurrentProcesses) + } + // Write action graph again, this time with timing information. writeActionGraph() } diff --git a/src/cmd/go/internal/work/gc.go b/src/cmd/go/internal/work/gc.go index 9a5e6c924c..6300a9135b 100644 --- a/src/cmd/go/internal/work/gc.go +++ b/src/cmd/go/internal/work/gc.go @@ -217,16 +217,17 @@ func compilerConcurrency() (int, func()) { concurrentProcesses++ // Set aside tokens so that we don't run out if we were running cfg.BuildP concurrent compiles. // We'll set aside one token for each of the action goroutines that aren't currently running a compile. - setAside := cfg.BuildP - concurrentProcesses + setAside := (cfg.BuildP - concurrentProcesses) * minTokens availableTokens := tokens - setAside - // Grab half the remaining tokens: but with a floor of at least 1 token, and + // Grab half the remaining tokens: but with a floor of at least minTokens token, and // a ceiling of the max backend concurrency. - c := max(min(availableTokens/2, maxCompilerConcurrency), 1) + c := max(min(availableTokens/2, maxCompilerConcurrency), minTokens) tokens -= c // Successfully grabbed the tokens. return c, func() { tokensMu.Lock() defer tokensMu.Unlock() + concurrentProcesses-- tokens += c } } @@ -235,17 +236,22 @@ var maxCompilerConcurrency = runtime.GOMAXPROCS(0) // max value we will use for var ( tokensMu sync.Mutex + totalTokens int // total number of tokens: this is used for checking that we get them all back in the end tokens int // number of available tokens concurrentProcesses int // number of currently running compiles + minTokens int // minimum number of tokens to give out ) // initCompilerConcurrencyPool sets the number of tokens in the pool. It needs // to be run after init, so that it can use the value of cfg.BuildP. func initCompilerConcurrencyPool() { - // Size the pool so that the worst case total number of compiles is not more - // than what it was when we capped the concurrency to 4. - oldConcurrencyCap := min(4, maxCompilerConcurrency) - tokens = oldConcurrencyCap * cfg.BuildP + // Size the pool to allow 2*maxCompilerConcurrency extra tokens to + // be distributed amongst the compile actions in addition to the minimum + // of min(4,GOMAXPROCS) tokens for each of the potentially cfg.BuildP + // concurrently running compile actions. + minTokens = min(4, maxCompilerConcurrency) + tokens = 2*maxCompilerConcurrency + minTokens*cfg.BuildP + totalTokens = tokens } // trimpath returns the -trimpath argument to use diff --git a/src/internal/types/testdata/check/cycles6.go b/src/internal/types/testdata/check/cycles6.go new file mode 100644 index 0000000000..e5635ed456 --- /dev/null +++ b/src/internal/types/testdata/check/cycles6.go @@ -0,0 +1,71 @@ +// Copyright 2026 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package p + +import "unsafe" + +// Below are the pieces of syntax corresponding to functions which can produce a +// type T without first having a value of type T. Notice that each causes a +// value of type T to be passed to unsafe.Sizeof while T is incomplete. + +// literal on type +type T0 /* ERROR "invalid recursive type" */ [unsafe.Sizeof(T0{})]int +// literal on value (not applicable) +// literal on pointer (not applicable) + +// call on type +type T1 /* ERROR "invalid recursive type" */ [unsafe.Sizeof(T1(42))]int +// call on value +func f2() T2 +type T2 /* ERROR "invalid recursive type" */ [unsafe.Sizeof(f2())]int +// call on pointer (not applicable) + +// assert on type +var i3 interface{} +type T3 /* ERROR "invalid recursive type" */ [unsafe.Sizeof(i3.(T3))]int +// assert on value (not applicable) +// assert on pointer (not applicable) + +// receive on type (not applicable) +// receive on value +func f4() <-chan T4 +type T4 /* ERROR "invalid recursive type" */ [unsafe.Sizeof(<-f4())]int +// receive on pointer (not applicable) + +// star on type (not applicable) +// star on value (not applicable) +// star on pointer +func f5() *T5 +type T5 /* ERROR "invalid recursive type" */ [unsafe.Sizeof(*f5())]int + +// Below is additional syntax which interacts with incomplete types. Notice that +// each of the below falls into 1 of 3 cases: +// 1. It cannot produce a value of (incomplete) type T. +// 2. It can, but only because it already has a value of type T. +// 3. It can, but only because it performs an implicit dereference. + +// select on type (case 1) +// select on value (case 2) +type T6 /* ERROR "invalid recursive type" */ struct { + f T7 +} +type T7 [unsafe.Sizeof(T6{}.f)]int +// select on pointer (case 3) +type T8 /* ERROR "invalid recursive type" */ struct { + f T9 +} +type T9 [unsafe.Sizeof(new(T8).f)]int + +// slice on type (not applicable) +// slice on value (case 2) +type T10 /* ERROR "invalid recursive type" */ [unsafe.Sizeof(T10{}[:])]int +// slice on pointer (case 3) +type T11 /* ERROR "invalid recursive type" */ [unsafe.Sizeof(new(T11)[:])]int + +// index on type (case 1) +// index on value (case 2) +type T12 /* ERROR "invalid recursive type" */ [unsafe.Sizeof(T12{}[42])]int +// index on pointer (case 3) +type T13 /* ERROR "invalid recursive type" */ [unsafe.Sizeof(new(T13)[42])]int diff --git a/src/runtime/trace/recorder.go b/src/runtime/trace/recorder.go index 4f2d3aa92a..a18d764141 100644 --- a/src/runtime/trace/recorder.go +++ b/src/runtime/trace/recorder.go @@ -39,7 +39,7 @@ func (w *recorder) Write(b []byte) (n int, err error) { w.headerReceived = true } if len(b) == n { - return 0, nil + return n, nil } ba, nb, err := readBatch(b[n:]) // Every write from the runtime is guaranteed to be a complete batch. if err != nil { diff --git a/src/simd/archsimd/_gen/simdgen/ops/Moves/categories.yaml b/src/simd/archsimd/_gen/simdgen/ops/Moves/categories.yaml index 38bc9374cc..3cba01ef95 100644 --- a/src/simd/archsimd/_gen/simdgen/ops/Moves/categories.yaml +++ b/src/simd/archsimd/_gen/simdgen/ops/Moves/categories.yaml @@ -69,21 +69,36 @@ documentation: !string |- // NAME performs an expansion on a vector x whose elements are packed to lower parts. // The expansion is to distribute elements as indexed by mask, from lower mask elements to upper in order. -- go: Broadcast128 +- go: Broadcast1To2 commutative: false documentation: !string |- - // NAME copies element zero of its (128-bit) input to all elements of - // the 128-bit output vector. -- go: Broadcast256 + // NAME copies the lowest element of its input to all 2 elements of + // the output vector. +- go: Broadcast1To4 commutative: false documentation: !string |- - // NAME copies element zero of its (128-bit) input to all elements of - // the 256-bit output vector. -- go: Broadcast512 + // NAME copies the lowest element of its input to all 4 elements of + // the output vector. +- go: Broadcast1To8 commutative: false documentation: !string |- - // NAME copies element zero of its (128-bit) input to all elements of - // the 512-bit output vector. + // NAME copies the lowest element of its input to all 8 elements of + // the output vector. +- go: Broadcast1To16 + commutative: false + documentation: !string |- + // NAME copies the lowest element of its input to all 16 elements of + // the output vector. +- go: Broadcast1To32 + commutative: false + documentation: !string |- + // NAME copies the lowest element of its input to all 32 elements of + // the output vector. +- go: Broadcast1To64 + commutative: false + documentation: !string |- + // NAME copies the lowest element of its input to all 64 elements of + // the output vector. - go: PermuteOrZeroGrouped commutative: false documentation: !string |- # Detailed documentation will rely on the specific ops. diff --git a/src/simd/archsimd/_gen/simdgen/ops/Moves/go.yaml b/src/simd/archsimd/_gen/simdgen/ops/Moves/go.yaml index e1fd184ed7..02daa2ea1e 100644 --- a/src/simd/archsimd/_gen/simdgen/ops/Moves/go.yaml +++ b/src/simd/archsimd/_gen/simdgen/ops/Moves/go.yaml @@ -376,21 +376,21 @@ out: - *any -- go: Broadcast128 - asm: VPBROADCAST[BWDQ] +- go: Broadcast1To2 + asm: VPBROADCASTQ in: - class: vreg bits: 128 - elemBits: $e + elemBits: 64 base: $b out: - class: vreg bits: 128 - elemBits: $e + elemBits: 64 base: $b # weirdly, this one case on AVX2 is memory-operand-only -- go: Broadcast128 +- go: Broadcast1To2 asm: VPBROADCASTQ in: - class: vreg @@ -405,71 +405,94 @@ base: int OverwriteBase: float -- go: Broadcast256 +- go: Broadcast1To4 asm: VPBROADCAST[BWDQ] in: - class: vreg bits: 128 - elemBits: $e base: $b out: - class: vreg - bits: 256 - elemBits: $e + lanes: 4 base: $b -- go: Broadcast512 +- go: Broadcast1To8 asm: VPBROADCAST[BWDQ] in: - class: vreg bits: 128 - elemBits: $e base: $b out: - class: vreg - bits: 512 - elemBits: $e + lanes: 8 base: $b -- go: Broadcast128 - asm: VBROADCASTS[SD] +- go: Broadcast1To16 + asm: VPBROADCAST[BWDQ] in: - class: vreg bits: 128 - elemBits: $e base: $b out: - class: vreg - bits: 128 - elemBits: $e + lanes: 16 base: $b -- go: Broadcast256 - asm: VBROADCASTS[SD] +- go: Broadcast1To32 + asm: VPBROADCAST[BWDQ] in: - class: vreg bits: 128 - elemBits: $e base: $b out: - class: vreg - bits: 256 - elemBits: $e + lanes: 32 base: $b -- go: Broadcast512 - asm: VBROADCASTS[SD] +- go: Broadcast1To64 + asm: VPBROADCASTB in: - class: vreg bits: 128 - elemBits: $e base: $b out: - class: vreg - bits: 512 - elemBits: $e + lanes: 64 base: $b +- go: Broadcast1To4 + asm: VBROADCASTS[SD] + in: + - class: vreg + bits: 128 + base: float + out: + - class: vreg + lanes: 4 + base: float + +- go: Broadcast1To8 + asm: VBROADCASTS[SD] + in: + - class: vreg + bits: 128 + base: float + out: + - class: vreg + lanes: 8 + base: float + +- go: Broadcast1To16 + asm: VBROADCASTS[SD] + in: + - class: vreg + bits: 128 + base: float + out: + - class: vreg + lanes: 16 + base: float + # VPSHUFB for 128-bit byte shuffles will be picked with higher priority than VPERMB, given its lower CPU feature requirement. (It's AVX) - go: PermuteOrZero asm: VPSHUFB diff --git a/src/simd/archsimd/_gen/tmplgen/main.go b/src/simd/archsimd/_gen/tmplgen/main.go index 8db185e1e0..45338b765d 100644 --- a/src/simd/archsimd/_gen/tmplgen/main.go +++ b/src/simd/archsimd/_gen/tmplgen/main.go @@ -873,7 +873,7 @@ var broadcastTemplate = templateOf("Broadcast functions", ` // Emulated, CPU Feature: {{.CPUfeatureBC}} func Broadcast{{.VType}}(x {{.Etype}}) {{.VType}} { var z {{.As128BitVec }} - return z.SetElem(0, x).Broadcast{{.Vwidth}}() + return z.SetElem(0, x).Broadcast1To{{.Count}}() } `) diff --git a/src/simd/archsimd/ops_amd64.go b/src/simd/archsimd/ops_amd64.go index eba340c793..bb162c4ff9 100644 --- a/src/simd/archsimd/ops_amd64.go +++ b/src/simd/archsimd/ops_amd64.go @@ -805,191 +805,197 @@ func (x Uint16x16) Average(y Uint16x16) Uint16x16 // Asm: VPAVGW, CPU Feature: AVX512 func (x Uint16x32) Average(y Uint16x32) Uint16x32 -/* Broadcast128 */ +/* Broadcast1To2 */ -// Broadcast128 copies element zero of its (128-bit) input to all elements of -// the 128-bit output vector. +// Broadcast1To2 copies the lowest element of its input to all 2 elements of +// the output vector. // -// Asm: VBROADCASTSS, CPU Feature: AVX2 -func (x Float32x4) Broadcast128() Float32x4 +// Asm: VPBROADCASTQ, CPU Feature: AVX2 +func (x Float64x2) Broadcast1To2() Float64x2 -// Broadcast128 copies element zero of its (128-bit) input to all elements of -// the 128-bit output vector. +// Broadcast1To2 copies the lowest element of its input to all 2 elements of +// the output vector. // // Asm: VPBROADCASTQ, CPU Feature: AVX2 -func (x Float64x2) Broadcast128() Float64x2 +func (x Int64x2) Broadcast1To2() Int64x2 -// Broadcast128 copies element zero of its (128-bit) input to all elements of -// the 128-bit output vector. +// Broadcast1To2 copies the lowest element of its input to all 2 elements of +// the output vector. // -// Asm: VPBROADCASTB, CPU Feature: AVX2 -func (x Int8x16) Broadcast128() Int8x16 +// Asm: VPBROADCASTQ, CPU Feature: AVX2 +func (x Uint64x2) Broadcast1To2() Uint64x2 -// Broadcast128 copies element zero of its (128-bit) input to all elements of -// the 128-bit output vector. -// -// Asm: VPBROADCASTW, CPU Feature: AVX2 -func (x Int16x8) Broadcast128() Int16x8 +/* Broadcast1To4 */ -// Broadcast128 copies element zero of its (128-bit) input to all elements of -// the 128-bit output vector. +// Broadcast1To4 copies the lowest element of its input to all 4 elements of +// the output vector. // -// Asm: VPBROADCASTD, CPU Feature: AVX2 -func (x Int32x4) Broadcast128() Int32x4 +// Asm: VBROADCASTSS, CPU Feature: AVX2 +func (x Float32x4) Broadcast1To4() Float32x4 -// Broadcast128 copies element zero of its (128-bit) input to all elements of -// the 128-bit output vector. +// Broadcast1To4 copies the lowest element of its input to all 4 elements of +// the output vector. // -// Asm: VPBROADCASTQ, CPU Feature: AVX2 -func (x Int64x2) Broadcast128() Int64x2 +// Asm: VBROADCASTSD, CPU Feature: AVX2 +func (x Float64x2) Broadcast1To4() Float64x4 -// Broadcast128 copies element zero of its (128-bit) input to all elements of -// the 128-bit output vector. +// Broadcast1To4 copies the lowest element of its input to all 4 elements of +// the output vector. // -// Asm: VPBROADCASTB, CPU Feature: AVX2 -func (x Uint8x16) Broadcast128() Uint8x16 +// Asm: VPBROADCASTD, CPU Feature: AVX2 +func (x Int32x4) Broadcast1To4() Int32x4 -// Broadcast128 copies element zero of its (128-bit) input to all elements of -// the 128-bit output vector. +// Broadcast1To4 copies the lowest element of its input to all 4 elements of +// the output vector. // -// Asm: VPBROADCASTW, CPU Feature: AVX2 -func (x Uint16x8) Broadcast128() Uint16x8 +// Asm: VPBROADCASTQ, CPU Feature: AVX2 +func (x Int64x2) Broadcast1To4() Int64x4 -// Broadcast128 copies element zero of its (128-bit) input to all elements of -// the 128-bit output vector. +// Broadcast1To4 copies the lowest element of its input to all 4 elements of +// the output vector. // // Asm: VPBROADCASTD, CPU Feature: AVX2 -func (x Uint32x4) Broadcast128() Uint32x4 +func (x Uint32x4) Broadcast1To4() Uint32x4 -// Broadcast128 copies element zero of its (128-bit) input to all elements of -// the 128-bit output vector. +// Broadcast1To4 copies the lowest element of its input to all 4 elements of +// the output vector. // // Asm: VPBROADCASTQ, CPU Feature: AVX2 -func (x Uint64x2) Broadcast128() Uint64x2 +func (x Uint64x2) Broadcast1To4() Uint64x4 -/* Broadcast256 */ +/* Broadcast1To8 */ -// Broadcast256 copies element zero of its (128-bit) input to all elements of -// the 256-bit output vector. +// Broadcast1To8 copies the lowest element of its input to all 8 elements of +// the output vector. // // Asm: VBROADCASTSS, CPU Feature: AVX2 -func (x Float32x4) Broadcast256() Float32x8 - -// Broadcast256 copies element zero of its (128-bit) input to all elements of -// the 256-bit output vector. -// -// Asm: VBROADCASTSD, CPU Feature: AVX2 -func (x Float64x2) Broadcast256() Float64x4 +func (x Float32x4) Broadcast1To8() Float32x8 -// Broadcast256 copies element zero of its (128-bit) input to all elements of -// the 256-bit output vector. +// Broadcast1To8 copies the lowest element of its input to all 8 elements of +// the output vector. // -// Asm: VPBROADCASTB, CPU Feature: AVX2 -func (x Int8x16) Broadcast256() Int8x32 +// Asm: VBROADCASTSD, CPU Feature: AVX512 +func (x Float64x2) Broadcast1To8() Float64x8 -// Broadcast256 copies element zero of its (128-bit) input to all elements of -// the 256-bit output vector. +// Broadcast1To8 copies the lowest element of its input to all 8 elements of +// the output vector. // // Asm: VPBROADCASTW, CPU Feature: AVX2 -func (x Int16x8) Broadcast256() Int16x16 +func (x Int16x8) Broadcast1To8() Int16x8 -// Broadcast256 copies element zero of its (128-bit) input to all elements of -// the 256-bit output vector. +// Broadcast1To8 copies the lowest element of its input to all 8 elements of +// the output vector. // // Asm: VPBROADCASTD, CPU Feature: AVX2 -func (x Int32x4) Broadcast256() Int32x8 +func (x Int32x4) Broadcast1To8() Int32x8 -// Broadcast256 copies element zero of its (128-bit) input to all elements of -// the 256-bit output vector. +// Broadcast1To8 copies the lowest element of its input to all 8 elements of +// the output vector. // -// Asm: VPBROADCASTQ, CPU Feature: AVX2 -func (x Int64x2) Broadcast256() Int64x4 - -// Broadcast256 copies element zero of its (128-bit) input to all elements of -// the 256-bit output vector. -// -// Asm: VPBROADCASTB, CPU Feature: AVX2 -func (x Uint8x16) Broadcast256() Uint8x32 +// Asm: VPBROADCASTQ, CPU Feature: AVX512 +func (x Int64x2) Broadcast1To8() Int64x8 -// Broadcast256 copies element zero of its (128-bit) input to all elements of -// the 256-bit output vector. +// Broadcast1To8 copies the lowest element of its input to all 8 elements of +// the output vector. // // Asm: VPBROADCASTW, CPU Feature: AVX2 -func (x Uint16x8) Broadcast256() Uint16x16 +func (x Uint16x8) Broadcast1To8() Uint16x8 -// Broadcast256 copies element zero of its (128-bit) input to all elements of -// the 256-bit output vector. +// Broadcast1To8 copies the lowest element of its input to all 8 elements of +// the output vector. // // Asm: VPBROADCASTD, CPU Feature: AVX2 -func (x Uint32x4) Broadcast256() Uint32x8 +func (x Uint32x4) Broadcast1To8() Uint32x8 -// Broadcast256 copies element zero of its (128-bit) input to all elements of -// the 256-bit output vector. +// Broadcast1To8 copies the lowest element of its input to all 8 elements of +// the output vector. // -// Asm: VPBROADCASTQ, CPU Feature: AVX2 -func (x Uint64x2) Broadcast256() Uint64x4 +// Asm: VPBROADCASTQ, CPU Feature: AVX512 +func (x Uint64x2) Broadcast1To8() Uint64x8 -/* Broadcast512 */ +/* Broadcast1To16 */ -// Broadcast512 copies element zero of its (128-bit) input to all elements of -// the 512-bit output vector. +// Broadcast1To16 copies the lowest element of its input to all 16 elements of +// the output vector. // // Asm: VBROADCASTSS, CPU Feature: AVX512 -func (x Float32x4) Broadcast512() Float32x16 +func (x Float32x4) Broadcast1To16() Float32x16 -// Broadcast512 copies element zero of its (128-bit) input to all elements of -// the 512-bit output vector. +// Broadcast1To16 copies the lowest element of its input to all 16 elements of +// the output vector. // -// Asm: VBROADCASTSD, CPU Feature: AVX512 -func (x Float64x2) Broadcast512() Float64x8 +// Asm: VPBROADCASTB, CPU Feature: AVX2 +func (x Int8x16) Broadcast1To16() Int8x16 -// Broadcast512 copies element zero of its (128-bit) input to all elements of -// the 512-bit output vector. +// Broadcast1To16 copies the lowest element of its input to all 16 elements of +// the output vector. // -// Asm: VPBROADCASTB, CPU Feature: AVX512 -func (x Int8x16) Broadcast512() Int8x64 +// Asm: VPBROADCASTW, CPU Feature: AVX2 +func (x Int16x8) Broadcast1To16() Int16x16 -// Broadcast512 copies element zero of its (128-bit) input to all elements of -// the 512-bit output vector. +// Broadcast1To16 copies the lowest element of its input to all 16 elements of +// the output vector. // -// Asm: VPBROADCASTW, CPU Feature: AVX512 -func (x Int16x8) Broadcast512() Int16x32 +// Asm: VPBROADCASTD, CPU Feature: AVX512 +func (x Int32x4) Broadcast1To16() Int32x16 -// Broadcast512 copies element zero of its (128-bit) input to all elements of -// the 512-bit output vector. +// Broadcast1To16 copies the lowest element of its input to all 16 elements of +// the output vector. +// +// Asm: VPBROADCASTB, CPU Feature: AVX2 +func (x Uint8x16) Broadcast1To16() Uint8x16 + +// Broadcast1To16 copies the lowest element of its input to all 16 elements of +// the output vector. +// +// Asm: VPBROADCASTW, CPU Feature: AVX2 +func (x Uint16x8) Broadcast1To16() Uint16x16 + +// Broadcast1To16 copies the lowest element of its input to all 16 elements of +// the output vector. // // Asm: VPBROADCASTD, CPU Feature: AVX512 -func (x Int32x4) Broadcast512() Int32x16 +func (x Uint32x4) Broadcast1To16() Uint32x16 + +/* Broadcast1To32 */ -// Broadcast512 copies element zero of its (128-bit) input to all elements of -// the 512-bit output vector. +// Broadcast1To32 copies the lowest element of its input to all 32 elements of +// the output vector. // -// Asm: VPBROADCASTQ, CPU Feature: AVX512 -func (x Int64x2) Broadcast512() Int64x8 +// Asm: VPBROADCASTB, CPU Feature: AVX2 +func (x Int8x16) Broadcast1To32() Int8x32 -// Broadcast512 copies element zero of its (128-bit) input to all elements of -// the 512-bit output vector. +// Broadcast1To32 copies the lowest element of its input to all 32 elements of +// the output vector. // -// Asm: VPBROADCASTB, CPU Feature: AVX512 -func (x Uint8x16) Broadcast512() Uint8x64 +// Asm: VPBROADCASTW, CPU Feature: AVX512 +func (x Int16x8) Broadcast1To32() Int16x32 + +// Broadcast1To32 copies the lowest element of its input to all 32 elements of +// the output vector. +// +// Asm: VPBROADCASTB, CPU Feature: AVX2 +func (x Uint8x16) Broadcast1To32() Uint8x32 -// Broadcast512 copies element zero of its (128-bit) input to all elements of -// the 512-bit output vector. +// Broadcast1To32 copies the lowest element of its input to all 32 elements of +// the output vector. // // Asm: VPBROADCASTW, CPU Feature: AVX512 -func (x Uint16x8) Broadcast512() Uint16x32 +func (x Uint16x8) Broadcast1To32() Uint16x32 + +/* Broadcast1To64 */ -// Broadcast512 copies element zero of its (128-bit) input to all elements of -// the 512-bit output vector. +// Broadcast1To64 copies the lowest element of its input to all 64 elements of +// the output vector. // -// Asm: VPBROADCASTD, CPU Feature: AVX512 -func (x Uint32x4) Broadcast512() Uint32x16 +// Asm: VPBROADCASTB, CPU Feature: AVX512 +func (x Int8x16) Broadcast1To64() Int8x64 -// Broadcast512 copies element zero of its (128-bit) input to all elements of -// the 512-bit output vector. +// Broadcast1To64 copies the lowest element of its input to all 64 elements of +// the output vector. // -// Asm: VPBROADCASTQ, CPU Feature: AVX512 -func (x Uint64x2) Broadcast512() Uint64x8 +// Asm: VPBROADCASTB, CPU Feature: AVX512 +func (x Uint8x16) Broadcast1To64() Uint8x64 /* Ceil */ diff --git a/src/simd/archsimd/other_gen_amd64.go b/src/simd/archsimd/other_gen_amd64.go index 647001acce..c250dc2436 100644 --- a/src/simd/archsimd/other_gen_amd64.go +++ b/src/simd/archsimd/other_gen_amd64.go @@ -10,7 +10,7 @@ package archsimd // Emulated, CPU Feature: AVX2 func BroadcastInt8x16(x int8) Int8x16 { var z Int8x16 - return z.SetElem(0, x).Broadcast128() + return z.SetElem(0, x).Broadcast1To16() } // BroadcastInt16x8 returns a vector with the input @@ -19,7 +19,7 @@ func BroadcastInt8x16(x int8) Int8x16 { // Emulated, CPU Feature: AVX2 func BroadcastInt16x8(x int16) Int16x8 { var z Int16x8 - return z.SetElem(0, x).Broadcast128() + return z.SetElem(0, x).Broadcast1To8() } // BroadcastInt32x4 returns a vector with the input @@ -28,7 +28,7 @@ func BroadcastInt16x8(x int16) Int16x8 { // Emulated, CPU Feature: AVX2 func BroadcastInt32x4(x int32) Int32x4 { var z Int32x4 - return z.SetElem(0, x).Broadcast128() + return z.SetElem(0, x).Broadcast1To4() } // BroadcastInt64x2 returns a vector with the input @@ -37,7 +37,7 @@ func BroadcastInt32x4(x int32) Int32x4 { // Emulated, CPU Feature: AVX2 func BroadcastInt64x2(x int64) Int64x2 { var z Int64x2 - return z.SetElem(0, x).Broadcast128() + return z.SetElem(0, x).Broadcast1To2() } // BroadcastUint8x16 returns a vector with the input @@ -46,7 +46,7 @@ func BroadcastInt64x2(x int64) Int64x2 { // Emulated, CPU Feature: AVX2 func BroadcastUint8x16(x uint8) Uint8x16 { var z Uint8x16 - return z.SetElem(0, x).Broadcast128() + return z.SetElem(0, x).Broadcast1To16() } // BroadcastUint16x8 returns a vector with the input @@ -55,7 +55,7 @@ func BroadcastUint8x16(x uint8) Uint8x16 { // Emulated, CPU Feature: AVX2 func BroadcastUint16x8(x uint16) Uint16x8 { var z Uint16x8 - return z.SetElem(0, x).Broadcast128() + return z.SetElem(0, x).Broadcast1To8() } // BroadcastUint32x4 returns a vector with the input @@ -64,7 +64,7 @@ func BroadcastUint16x8(x uint16) Uint16x8 { // Emulated, CPU Feature: AVX2 func BroadcastUint32x4(x uint32) Uint32x4 { var z Uint32x4 - return z.SetElem(0, x).Broadcast128() + return z.SetElem(0, x).Broadcast1To4() } // BroadcastUint64x2 returns a vector with the input @@ -73,7 +73,7 @@ func BroadcastUint32x4(x uint32) Uint32x4 { // Emulated, CPU Feature: AVX2 func BroadcastUint64x2(x uint64) Uint64x2 { var z Uint64x2 - return z.SetElem(0, x).Broadcast128() + return z.SetElem(0, x).Broadcast1To2() } // BroadcastFloat32x4 returns a vector with the input @@ -82,7 +82,7 @@ func BroadcastUint64x2(x uint64) Uint64x2 { // Emulated, CPU Feature: AVX2 func BroadcastFloat32x4(x float32) Float32x4 { var z Float32x4 - return z.SetElem(0, x).Broadcast128() + return z.SetElem(0, x).Broadcast1To4() } // BroadcastFloat64x2 returns a vector with the input @@ -91,7 +91,7 @@ func BroadcastFloat32x4(x float32) Float32x4 { // Emulated, CPU Feature: AVX2 func BroadcastFloat64x2(x float64) Float64x2 { var z Float64x2 - return z.SetElem(0, x).Broadcast128() + return z.SetElem(0, x).Broadcast1To2() } // BroadcastInt8x32 returns a vector with the input @@ -100,7 +100,7 @@ func BroadcastFloat64x2(x float64) Float64x2 { // Emulated, CPU Feature: AVX2 func BroadcastInt8x32(x int8) Int8x32 { var z Int8x16 - return z.SetElem(0, x).Broadcast256() + return z.SetElem(0, x).Broadcast1To32() } // BroadcastInt16x16 returns a vector with the input @@ -109,7 +109,7 @@ func BroadcastInt8x32(x int8) Int8x32 { // Emulated, CPU Feature: AVX2 func BroadcastInt16x16(x int16) Int16x16 { var z Int16x8 - return z.SetElem(0, x).Broadcast256() + return z.SetElem(0, x).Broadcast1To16() } // BroadcastInt32x8 returns a vector with the input @@ -118,7 +118,7 @@ func BroadcastInt16x16(x int16) Int16x16 { // Emulated, CPU Feature: AVX2 func BroadcastInt32x8(x int32) Int32x8 { var z Int32x4 - return z.SetElem(0, x).Broadcast256() + return z.SetElem(0, x).Broadcast1To8() } // BroadcastInt64x4 returns a vector with the input @@ -127,7 +127,7 @@ func BroadcastInt32x8(x int32) Int32x8 { // Emulated, CPU Feature: AVX2 func BroadcastInt64x4(x int64) Int64x4 { var z Int64x2 - return z.SetElem(0, x).Broadcast256() + return z.SetElem(0, x).Broadcast1To4() } // BroadcastUint8x32 returns a vector with the input @@ -136,7 +136,7 @@ func BroadcastInt64x4(x int64) Int64x4 { // Emulated, CPU Feature: AVX2 func BroadcastUint8x32(x uint8) Uint8x32 { var z Uint8x16 - return z.SetElem(0, x).Broadcast256() + return z.SetElem(0, x).Broadcast1To32() } // BroadcastUint16x16 returns a vector with the input @@ -145,7 +145,7 @@ func BroadcastUint8x32(x uint8) Uint8x32 { // Emulated, CPU Feature: AVX2 func BroadcastUint16x16(x uint16) Uint16x16 { var z Uint16x8 - return z.SetElem(0, x).Broadcast256() + return z.SetElem(0, x).Broadcast1To16() } // BroadcastUint32x8 returns a vector with the input @@ -154,7 +154,7 @@ func BroadcastUint16x16(x uint16) Uint16x16 { // Emulated, CPU Feature: AVX2 func BroadcastUint32x8(x uint32) Uint32x8 { var z Uint32x4 - return z.SetElem(0, x).Broadcast256() + return z.SetElem(0, x).Broadcast1To8() } // BroadcastUint64x4 returns a vector with the input @@ -163,7 +163,7 @@ func BroadcastUint32x8(x uint32) Uint32x8 { // Emulated, CPU Feature: AVX2 func BroadcastUint64x4(x uint64) Uint64x4 { var z Uint64x2 - return z.SetElem(0, x).Broadcast256() + return z.SetElem(0, x).Broadcast1To4() } // BroadcastFloat32x8 returns a vector with the input @@ -172,7 +172,7 @@ func BroadcastUint64x4(x uint64) Uint64x4 { // Emulated, CPU Feature: AVX2 func BroadcastFloat32x8(x float32) Float32x8 { var z Float32x4 - return z.SetElem(0, x).Broadcast256() + return z.SetElem(0, x).Broadcast1To8() } // BroadcastFloat64x4 returns a vector with the input @@ -181,7 +181,7 @@ func BroadcastFloat32x8(x float32) Float32x8 { // Emulated, CPU Feature: AVX2 func BroadcastFloat64x4(x float64) Float64x4 { var z Float64x2 - return z.SetElem(0, x).Broadcast256() + return z.SetElem(0, x).Broadcast1To4() } // BroadcastInt8x64 returns a vector with the input @@ -190,7 +190,7 @@ func BroadcastFloat64x4(x float64) Float64x4 { // Emulated, CPU Feature: AVX512BW func BroadcastInt8x64(x int8) Int8x64 { var z Int8x16 - return z.SetElem(0, x).Broadcast512() + return z.SetElem(0, x).Broadcast1To64() } // BroadcastInt16x32 returns a vector with the input @@ -199,7 +199,7 @@ func BroadcastInt8x64(x int8) Int8x64 { // Emulated, CPU Feature: AVX512BW func BroadcastInt16x32(x int16) Int16x32 { var z Int16x8 - return z.SetElem(0, x).Broadcast512() + return z.SetElem(0, x).Broadcast1To32() } // BroadcastInt32x16 returns a vector with the input @@ -208,7 +208,7 @@ func BroadcastInt16x32(x int16) Int16x32 { // Emulated, CPU Feature: AVX512F func BroadcastInt32x16(x int32) Int32x16 { var z Int32x4 - return z.SetElem(0, x).Broadcast512() + return z.SetElem(0, x).Broadcast1To16() } // BroadcastInt64x8 returns a vector with the input @@ -217,7 +217,7 @@ func BroadcastInt32x16(x int32) Int32x16 { // Emulated, CPU Feature: AVX512F func BroadcastInt64x8(x int64) Int64x8 { var z Int64x2 - return z.SetElem(0, x).Broadcast512() + return z.SetElem(0, x).Broadcast1To8() } // BroadcastUint8x64 returns a vector with the input @@ -226,7 +226,7 @@ func BroadcastInt64x8(x int64) Int64x8 { // Emulated, CPU Feature: AVX512BW func BroadcastUint8x64(x uint8) Uint8x64 { var z Uint8x16 - return z.SetElem(0, x).Broadcast512() + return z.SetElem(0, x).Broadcast1To64() } // BroadcastUint16x32 returns a vector with the input @@ -235,7 +235,7 @@ func BroadcastUint8x64(x uint8) Uint8x64 { // Emulated, CPU Feature: AVX512BW func BroadcastUint16x32(x uint16) Uint16x32 { var z Uint16x8 - return z.SetElem(0, x).Broadcast512() + return z.SetElem(0, x).Broadcast1To32() } // BroadcastUint32x16 returns a vector with the input @@ -244,7 +244,7 @@ func BroadcastUint16x32(x uint16) Uint16x32 { // Emulated, CPU Feature: AVX512F func BroadcastUint32x16(x uint32) Uint32x16 { var z Uint32x4 - return z.SetElem(0, x).Broadcast512() + return z.SetElem(0, x).Broadcast1To16() } // BroadcastUint64x8 returns a vector with the input @@ -253,7 +253,7 @@ func BroadcastUint32x16(x uint32) Uint32x16 { // Emulated, CPU Feature: AVX512F func BroadcastUint64x8(x uint64) Uint64x8 { var z Uint64x2 - return z.SetElem(0, x).Broadcast512() + return z.SetElem(0, x).Broadcast1To8() } // BroadcastFloat32x16 returns a vector with the input @@ -262,7 +262,7 @@ func BroadcastUint64x8(x uint64) Uint64x8 { // Emulated, CPU Feature: AVX512F func BroadcastFloat32x16(x float32) Float32x16 { var z Float32x4 - return z.SetElem(0, x).Broadcast512() + return z.SetElem(0, x).Broadcast1To16() } // BroadcastFloat64x8 returns a vector with the input @@ -271,7 +271,7 @@ func BroadcastFloat32x16(x float32) Float32x16 { // Emulated, CPU Feature: AVX512F func BroadcastFloat64x8(x float64) Float64x8 { var z Float64x2 - return z.SetElem(0, x).Broadcast512() + return z.SetElem(0, x).Broadcast1To8() } // ToMask converts from Int8x16 to Mask8x16, mask element is set to true when the corresponding vector element is non-zero. |
