aboutsummaryrefslogtreecommitdiff
path: root/src/cmd/compile
diff options
context:
space:
mode:
authorDavid Chase <drchase@google.com>2025-09-05 19:05:18 -0400
committerDavid Chase <drchase@google.com>2025-10-07 14:06:28 -0700
commitd2270bccbda381a542b77157c9960e4ae90df8ad (patch)
treed49ac12fc638dcf16a93c272ddc44b52ccf669fd /src/cmd/compile
parent48756abd3a8f4e0d40d67979c5943979571f450e (diff)
downloadgo-d2270bccbda381a542b77157c9960e4ae90df8ad.tar.xz
[dev.simd] cmd/compile: track which CPU features are in scope
analysis for - is this block only reached through feature checks? - does the function signature imply AVX-something? - is there an instruction in this block which implies AVX-something? and keep track of which features those are. Features = AVX, AVX2, AVX512, etc. Has a test. Change-Id: I0b6f2e87d01ec587818db11cf71fac1e4d500650 Reviewed-on: https://go-review.googlesource.com/c/go/+/706337 LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com> Reviewed-by: Junyang Shao <shaojunyang@google.com>
Diffstat (limited to 'src/cmd/compile')
-rw-r--r--src/cmd/compile/internal/ssa/block.go53
-rw-r--r--src/cmd/compile/internal/ssa/compile.go3
-rw-r--r--src/cmd/compile/internal/ssa/cpufeatures.go261
-rw-r--r--src/cmd/compile/internal/ssa/sizeof_test.go2
-rw-r--r--src/cmd/compile/internal/types/type.go2
5 files changed, 320 insertions, 1 deletions
diff --git a/src/cmd/compile/internal/ssa/block.go b/src/cmd/compile/internal/ssa/block.go
index 1240bfd655..f457e66f16 100644
--- a/src/cmd/compile/internal/ssa/block.go
+++ b/src/cmd/compile/internal/ssa/block.go
@@ -18,6 +18,9 @@ type Block struct {
// Source position for block's control operation
Pos src.XPos
+ // What cpu features (AVXnnn, SVEyyy) are implied to reach/execute this block?
+ CPUfeatures CPUfeatures
+
// The kind of block this is.
Kind BlockKind
@@ -449,3 +452,53 @@ const (
HotPgoInitial = HotPgo | HotInitial // special case; single block loop, initial block is header block has a flow-in entry, but PGO says it is hot
HotPgoInitialNotFLowIn = HotPgo | HotInitial | HotNotFlowIn // PGO says it is hot, and the loop is rotated so flow enters loop with a branch
)
+
+type CPUfeatures uint32
+
+const (
+ CPUNone CPUfeatures = 0
+ CPUAll CPUfeatures = ^CPUfeatures(0)
+ CPUavx CPUfeatures = 1 << iota
+ CPUavx2
+ CPUavxvnni
+ CPUavx512
+ CPUbitalg
+ CPUgfni
+ CPUvbmi
+ CPUvbmi2
+ CPUvpopcntdq
+ CPUavx512vnni
+
+ CPUneon
+ CPUsve2
+)
+
+func (f CPUfeatures) String() string {
+ if f == CPUNone {
+ return "none"
+ }
+ if f == CPUAll {
+ return "all"
+ }
+ s := ""
+ foo := func(what string, feat CPUfeatures) {
+ if feat&f != 0 {
+ if s != "" {
+ s += "+"
+ }
+ s += what
+ }
+ }
+ foo("avx", CPUavx)
+ foo("avx2", CPUavx2)
+ foo("avx512", CPUavx512)
+ foo("avxvnni", CPUavxvnni)
+ foo("bitalg", CPUbitalg)
+ foo("gfni", CPUgfni)
+ foo("vbmi", CPUvbmi)
+ foo("vbmi2", CPUvbmi2)
+ foo("popcntdq", CPUvpopcntdq)
+ foo("avx512vnni", CPUavx512vnni)
+
+ return s
+}
diff --git a/src/cmd/compile/internal/ssa/compile.go b/src/cmd/compile/internal/ssa/compile.go
index 1f47362583..be1a6f158e 100644
--- a/src/cmd/compile/internal/ssa/compile.go
+++ b/src/cmd/compile/internal/ssa/compile.go
@@ -485,6 +485,7 @@ var passes = [...]pass{
{name: "writebarrier", fn: writebarrier, required: true}, // expand write barrier ops
{name: "insert resched checks", fn: insertLoopReschedChecks,
disabled: !buildcfg.Experiment.PreemptibleLoops}, // insert resched checks in loops.
+ {name: "cpufeatures", fn: cpufeatures, required: buildcfg.Experiment.SIMD, disabled: !buildcfg.Experiment.SIMD},
{name: "lower", fn: lower, required: true},
{name: "addressing modes", fn: addressingModes, required: false},
{name: "late lower", fn: lateLower, required: true},
@@ -587,6 +588,8 @@ var passOrder = [...]constraint{
{"branchelim", "late opt"},
// ranchelim is an arch-independent pass.
{"branchelim", "lower"},
+ // lower needs cpu feature information (for SIMD)
+ {"cpufeatures", "lower"},
}
func init() {
diff --git a/src/cmd/compile/internal/ssa/cpufeatures.go b/src/cmd/compile/internal/ssa/cpufeatures.go
new file mode 100644
index 0000000000..77b1db552d
--- /dev/null
+++ b/src/cmd/compile/internal/ssa/cpufeatures.go
@@ -0,0 +1,261 @@
+// Copyright 2025 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package ssa
+
+import (
+ "cmd/compile/internal/types"
+ "cmd/internal/obj"
+ "fmt"
+ "internal/goarch"
+)
+
+type localEffect struct {
+ start CPUfeatures // features present at beginning of block
+ internal CPUfeatures // features implied by execution of block
+ end [2]CPUfeatures // for BlockIf, features present on outgoing edges
+ visited bool // On the first iteration this will be false for backedges.
+}
+
+func (e localEffect) String() string {
+ return fmt.Sprintf("visited=%v, start=%v, internal=%v, end[0]=%v, end[1]=%v", e.visited, e.start, e.internal, e.end[0], e.end[1])
+}
+
+// ifEffect pattern matches for a BlockIf conditional on a load
+// of a field from internal/cpu.X86 and returns the corresponding
+// effect.
+func ifEffect(b *Block) (features CPUfeatures, taken int) {
+ // TODO generalize for other architectures.
+ if b.Kind != BlockIf {
+ return
+ }
+ c := b.Controls[0]
+
+ if c.Op == OpNot {
+ taken = 1
+ c = c.Args[0]
+ }
+ if c.Op != OpLoad {
+ return
+ }
+ offPtr := c.Args[0]
+ if offPtr.Op != OpOffPtr {
+ return
+ }
+ addr := offPtr.Args[0]
+ if addr.Op != OpAddr || addr.Args[0].Op != OpSB {
+ return
+ }
+ sym := addr.Aux.(*obj.LSym)
+ if sym.Name != "internal/cpu.X86" {
+ return
+ }
+ o := offPtr.AuxInt
+ t := addr.Type
+ if !t.IsPtr() {
+ b.Func.Fatalf("The symbol %s is not a pointer, found %v instead", sym.Name, t)
+ }
+ t = t.Elem()
+ if !t.IsStruct() {
+ b.Func.Fatalf("The referent of symbol %s is not a struct, found %v instead", sym.Name, t)
+ }
+ match := ""
+ for _, f := range t.Fields() {
+ if o == f.Offset && f.Sym != nil {
+ match = f.Sym.Name
+ break
+ }
+ }
+
+ switch match {
+
+ case "HasAVX":
+ features = CPUavx
+ case "HasAVXVNNI":
+ features = CPUavx | CPUavxvnni
+ case "HasAVX2":
+ features = CPUavx2 | CPUavx
+
+ // Compiler currently treats these all alike.
+ case "HasAVX512", "HasAVX512F", "HasAVX512CD", "HasAVX512BW",
+ "HasAVX512DQ", "HasAVX512VL", "HasAVX512VPCLMULQDQ":
+ features = CPUavx512 | CPUavx2 | CPUavx
+
+ case "HasAVX512GFNI":
+ features = CPUavx512 | CPUgfni | CPUavx2 | CPUavx
+ case "HasAVX512VNNI":
+ features = CPUavx512 | CPUavx512vnni | CPUavx2 | CPUavx
+ case "HasAVX512VBMI":
+ features = CPUavx512 | CPUvbmi | CPUavx2 | CPUavx
+ case "HasAVX512VBMI2":
+ features = CPUavx512 | CPUvbmi2 | CPUavx2 | CPUavx
+ case "HasAVX512BITALG":
+ features = CPUavx512 | CPUbitalg | CPUavx2 | CPUavx
+ case "HasAVX512VPOPCNTDQ":
+ features = CPUavx512 | CPUvpopcntdq | CPUavx2 | CPUavx
+
+ case "HasBMI1":
+ features = CPUvbmi
+ case "HasBMI2":
+ features = CPUvbmi2
+
+ // Features that are not currently interesting to the compiler.
+ case "HasAES", "HasADX", "HasERMS", "HasFSRM", "HasFMA", "HasGFNI", "HasOSXSAVE",
+ "HasPCLMULQDQ", "HasPOPCNT", "HasRDTSCP", "HasSHA",
+ "HasSSE3", "HasSSSE3", "HasSSE41", "HasSSE42":
+
+ }
+ if b.Func.pass.debug > 2 {
+ b.Func.Warnl(b.Pos, "%s, block b%v has features offset %d, match is %s, features is %v", b.Func.Name, b.ID, o, match, features)
+ }
+ return
+}
+
+func cpufeatures(f *Func) {
+ arch := f.Config.Ctxt().Arch.Family
+ // TODO there are other SIMD architectures
+ if arch != goarch.AMD64 {
+ return
+ }
+
+ po := f.Postorder()
+
+ effects := make([]localEffect, 1+f.NumBlocks(), 1+f.NumBlocks())
+
+ features := func(t *types.Type) CPUfeatures {
+ if t.IsSIMD() {
+ switch t.Size() {
+ case 16, 32:
+ return CPUavx
+ case 64:
+ return CPUavx512 | CPUavx2 | CPUavx
+ }
+ }
+ return CPUNone
+ }
+
+ // visit blocks in reverse post order
+ // when b is visited, all of its predecessors (except for loop back edges)
+ // will have been visited
+ for i := len(po) - 1; i >= 0; i-- {
+ b := po[i]
+
+ var feat CPUfeatures
+
+ if b == f.Entry {
+ // Check the types of inputs and outputs, as well as annotations.
+ // Start with none and union all that is implied by all the types seen.
+ if f.Type != nil { // a problem for SSA tests
+ for _, field := range f.Type.RecvParamsResults() {
+ feat |= features(field.Type)
+ }
+ }
+
+ } else {
+ // Start with all and intersect over predecessors
+ feat = CPUAll
+ for _, p := range b.Preds {
+ pb := p.Block()
+ if !effects[pb.ID].visited {
+
+ continue
+ }
+ pi := p.Index()
+ if pb.Kind != BlockIf {
+ pi = 0
+ }
+
+ feat &= effects[pb.ID].end[pi]
+ }
+ }
+
+ e := localEffect{start: feat, visited: true}
+
+ // Separately capture the internal effects of this block
+ var internal CPUfeatures
+ for _, v := range b.Values {
+ // the rule applied here is, if the block contains any
+ // instruction that would fault if the feature (avx, avx512)
+ // were not present, then assume that the feature is present
+ // for all the instructions in the block, a fault is a fault.
+ t := v.Type
+ if t.IsResults() {
+ for i := 0; i < t.NumFields(); i++ {
+ feat |= features(t.FieldType(i))
+ }
+ } else {
+ internal |= features(v.Type)
+ }
+ }
+ e.internal = internal
+ feat |= internal
+
+ branchEffect, taken := ifEffect(b)
+ e.end = [2]CPUfeatures{feat, feat}
+ e.end[taken] |= branchEffect
+
+ effects[b.ID] = e
+ if f.pass.debug > 1 && feat != CPUNone {
+ f.Warnl(b.Pos, "%s, block b%v has features %v", b.Func.Name, b.ID, feat)
+ }
+
+ b.CPUfeatures = feat
+ }
+
+ // If the flow graph is irreducible, things can still change on backedges.
+ change := true
+ for change {
+ change = false
+ for i := len(po) - 1; i >= 0; i-- {
+ b := po[i]
+
+ if b == f.Entry {
+ continue // cannot change
+ }
+ feat := CPUAll
+ for _, p := range b.Preds {
+ pb := p.Block()
+ pi := p.Index()
+ if pb.Kind != BlockIf {
+ pi = 0
+ }
+ feat &= effects[pb.ID].end[pi]
+ }
+ e := effects[b.ID]
+ if feat == e.start {
+ continue
+ }
+ e.start = feat
+ effects[b.ID] = e
+ // uh-oh, something changed
+ if f.pass.debug > 1 {
+ f.Warnl(b.Pos, "%s, block b%v saw predecessor feature change", b.Func.Name, b.ID)
+ }
+
+ feat |= e.internal
+ if feat == e.end[0]&e.end[1] {
+ continue
+ }
+
+ branchEffect, taken := ifEffect(b)
+ e.end = [2]CPUfeatures{feat, feat}
+ e.end[taken] |= branchEffect
+
+ effects[b.ID] = e
+ b.CPUfeatures = feat
+ if f.pass.debug > 1 {
+ f.Warnl(b.Pos, "%s, block b%v has new features %v", b.Func.Name, b.ID, feat)
+ }
+ change = true
+ }
+ }
+ if f.pass.debug > 0 {
+ for _, b := range f.Blocks {
+ if b.CPUfeatures != CPUNone {
+ f.Warnl(b.Pos, "%s, block b%v has features %v", b.Func.Name, b.ID, b.CPUfeatures)
+ }
+
+ }
+ }
+}
diff --git a/src/cmd/compile/internal/ssa/sizeof_test.go b/src/cmd/compile/internal/ssa/sizeof_test.go
index a27002ee3a..9a58197925 100644
--- a/src/cmd/compile/internal/ssa/sizeof_test.go
+++ b/src/cmd/compile/internal/ssa/sizeof_test.go
@@ -21,7 +21,7 @@ func TestSizeof(t *testing.T) {
_64bit uintptr // size on 64bit platforms
}{
{Value{}, 72, 112},
- {Block{}, 164, 304},
+ {Block{}, 168, 312},
{LocalSlot{}, 28, 40},
{valState{}, 28, 40},
}
diff --git a/src/cmd/compile/internal/types/type.go b/src/cmd/compile/internal/types/type.go
index 652d4362ce..fc2c0435bd 100644
--- a/src/cmd/compile/internal/types/type.go
+++ b/src/cmd/compile/internal/types/type.go
@@ -989,6 +989,7 @@ func (t *Type) ArgWidth() int64 {
return t.extra.(*Func).Argwid
}
+// Size returns the width of t in bytes.
func (t *Type) Size() int64 {
if t.kind == TSSA {
return t.width
@@ -997,6 +998,7 @@ func (t *Type) Size() int64 {
return t.width
}
+// Alignment returns the alignment of t in bytes.
func (t *Type) Alignment() int64 {
CalcSize(t)
return int64(t.align)