11 files changed, 1049 insertions, 122 deletions
diff --git a/src/cmd/compile/internal/base/debug.go b/src/cmd/compile/internal/base/debug.go
index e217b3e9b0..1f05ed9831 100644
--- a/src/cmd/compile/internal/base/debug.go
+++ b/src/cmd/compile/internal/base/debug.go
@@ -54,6 +54,7 @@ type DebugFlags struct {
 	PGOInline             int    `help:"enable profile-guided inlining" concurrent:"ok"`
 	PGOInlineCDFThreshold string `help:"cumulative threshold percentage for determining call sites as hot candidates for inlining" concurrent:"ok"`
 	PGOInlineBudget       int    `help:"inline budget for hot functions" concurrent:"ok"`
+	PGODevirtualize       int    `help:"enable profile-guided devirtualization" concurrent:"ok"`
 	WrapGlobalMapDbg      int    `help:"debug trace output for global map init wrapping"`
 	WrapGlobalMapCtl      int    `help:"global map init wrap control (0 => default, 1 => off, 2 => stress mode, no size cutoff)"`
 
diff --git a/src/cmd/compile/internal/base/flag.go b/src/cmd/compile/internal/base/flag.go
index f1656fc98c..bac421d303 100644
--- a/src/cmd/compile/internal/base/flag.go
+++ b/src/cmd/compile/internal/base/flag.go
@@ -169,6 +169,7 @@ func ParseFlags() {
 	Debug.InlFuncsWithClosures = 1
 	Debug.InlStaticInit = 1
 	Debug.PGOInline = 1
+	Debug.PGODevirtualize = 1
 	Debug.SyncFrames = -1 // disable sync markers by default
 
 	Debug.Checkptr = -1 // so we can tell whether it is set explicitly
diff --git a/src/cmd/compile/internal/devirtualize/devirtualize.go b/src/cmd/compile/internal/devirtualize/devirtualize.go
index 6c41d4efd8..cfeb8d8ee9 100644
--- a/src/cmd/compile/internal/devirtualize/devirtualize.go
+++ b/src/cmd/compile/internal/devirtualize/devirtualize.go
@@ -2,9 +2,13 @@
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.
 
-// Package devirtualize implements a simple "devirtualization"
-// optimization pass, which replaces interface method calls with
-// direct concrete-type method calls where possible.
+// Package devirtualize implements two "devirtualization" optimization passes:
+//
+// - "Static" devirtualization which replaces interface method calls with
+//   direct concrete-type method calls where possible.
+// - "Profile-guided" devirtualization which replaces indirect calls with a
+//   conditional direct call to the hottest concrete callee from a profile, as
+//   well as a fallback using the original indirect call.
 package devirtualize
 
 import (
@@ -14,8 +18,9 @@ import (
 	"cmd/compile/internal/types"
 )
 
-// Func devirtualizes calls within fn where possible.
-func Func(fn *ir.Func) {
+// Static devirtualizes calls within fn where possible when the concrete callee
+// is available statically.
+func Static(fn *ir.Func) {
 	ir.CurFunc = fn
 
 	// For promoted methods (including value-receiver methods promoted to pointer-receivers),
@@ -34,14 +39,15 @@ func Func(fn *ir.Func) {
 			return
 		case *ir.CallExpr:
 			if !goDeferCall[n] {
-				Call(n)
+				staticCall(n)
 			}
 		}
 	})
 }
 
-// Call devirtualizes the given call if possible.
-func Call(call *ir.CallExpr) {
+// staticCall devirtualizes the given call if possible when the concrete callee
+// is available statically.
+func staticCall(call *ir.CallExpr) {
 	if call.Op() != ir.OCALLINTER {
 		return
 	}
diff --git a/src/cmd/compile/internal/devirtualize/pgo.go b/src/cmd/compile/internal/devirtualize/pgo.go
new file mode 100644
index 0000000000..69c421ca5a
--- /dev/null
+++ b/src/cmd/compile/internal/devirtualize/pgo.go
@@ -0,0 +1,532 @@
+// Copyright 2023 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package devirtualize
+
+import (
+	"cmd/compile/internal/base"
+	"cmd/compile/internal/inline"
+	"cmd/compile/internal/ir"
+	"cmd/compile/internal/logopt"
+	"cmd/compile/internal/pgo"
+	"cmd/compile/internal/typecheck"
+	"cmd/compile/internal/types"
+	"encoding/json"
+	"fmt"
+	"os"
+)
+
+// CallStat summarizes a single call site.
+//
+// This is used only for debug logging.
+type CallStat struct {
+	Pkg string // base.Ctxt.Pkgpath
+	Pos string // file:line:col of call.
+
+	Caller string // Linker symbol name of calling function.
+
+	// Direct or indirect call.
+	Direct bool
+
+	// For indirect calls, interface call or other indirect function call.
+	Interface bool
+
+	// Total edge weight from this call site.
+	Weight int64
+
+	// Hottest callee from this call site, regardless of type
+	// compatibility.
+	Hottest       string
+	HottestWeight int64
+
+	// Devirtualized callee if != "".
+	//
+	// Note that this may be different than Hottest because we apply
+	// type-check restrictions, which helps distinguish multiple calls on
+	// the same line.
+	Devirtualized       string
+	DevirtualizedWeight int64
+}
+
+// ProfileGuided performs call devirtualization of indirect calls based on
+// profile information.
+//
+// Specifically, it performs conditional devirtualization of interface calls
+// for the hottest callee. That is, it performs a transformation like:
+//
+//	type Iface interface {
+//		Foo()
+//	}
+//
+//	type Concrete struct{}
+//
+//	func (Concrete) Foo() {}
+//
+//	func foo(i Iface) {
+//		i.Foo()
+//	}
+//
+// to:
+//
+//	func foo(i Iface) {
+//		if c, ok := i.(Concrete); ok {
+//			c.Foo()
+//		} else {
+//			i.Foo()
+//		}
+//	}
+//
+// The primary benefit of this transformation is enabling inlining of the
+// direct call.
+func ProfileGuided(fn *ir.Func, p *pgo.Profile) {
+	ir.CurFunc = fn
+
+	name := ir.LinkFuncName(fn)
+
+	// Can't devirtualize go/defer calls. See comment in Static.
+	goDeferCall := make(map[*ir.CallExpr]bool)
+
+	var jsonW *json.Encoder
+	if base.Debug.PGODebug >= 3 {
+		jsonW = json.NewEncoder(os.Stdout)
+	}
+
+	var edit func(n ir.Node) ir.Node
+	edit = func(n ir.Node) ir.Node {
+		if n == nil {
+			return n
+		}
+
+		if gds, ok := n.(*ir.GoDeferStmt); ok {
+			if call, ok := gds.Call.(*ir.CallExpr); ok {
+				goDeferCall[call] = true
+			}
+		}
+
+		ir.EditChildren(n, edit)
+
+		call, ok := n.(*ir.CallExpr)
+		if !ok {
+			return n
+		}
+
+		var stat *CallStat
+		if base.Debug.PGODebug >= 3 {
+			// Statistics about every single call. Handy for external data analysis.
+			//
+			// TODO(prattmic): Log via logopt?
+			stat = constructCallStat(p, fn, name, call)
+			if stat != nil {
+				defer func() {
+					jsonW.Encode(&stat)
+				}()
+			}
+		}
+
+		if call.Op() != ir.OCALLINTER {
+			return n
+		}
+
+		if base.Debug.PGODebug >= 2 {
+			fmt.Printf("%v: PGO devirtualize considering call %v\n", ir.Line(call), call)
+		}
+
+		if goDeferCall[call] {
+			if base.Debug.PGODebug >= 2 {
+				fmt.Printf("%v: can't PGO devirtualize go/defer call %v\n", ir.Line(call), call)
+			}
+			return n
+		}
+
+		// Bail if we do not have a hot callee.
+		callee, weight := findHotConcreteCallee(p, fn, call)
+		if callee == nil {
+			return n
+		}
+		// Bail if we do not have a Type node for the hot callee.
+		ctyp := methodRecvType(callee)
+		if ctyp == nil {
+			return n
+		}
+		// Bail if we know for sure it won't inline.
+		if !shouldPGODevirt(callee) {
+			return n
+		}
+
+		if stat != nil {
+			stat.Devirtualized = ir.LinkFuncName(callee)
+			stat.DevirtualizedWeight = weight
+		}
+
+		return rewriteCondCall(call, fn, callee, ctyp)
+	}
+
+	ir.EditChildren(fn, edit)
+}
+
+// shouldPGODevirt checks if we should perform PGO devirtualization to the
+// target function.
+//
+// PGO devirtualization is most valuable when the callee is inlined, so if it
+// won't inline we can skip devirtualizing.
+func shouldPGODevirt(fn *ir.Func) bool {
+	var reason string
+	if base.Flag.LowerM > 1 || logopt.Enabled() {
+		defer func() {
+			if reason != "" {
+				if base.Flag.LowerM > 1 {
+					fmt.Printf("%v: should not PGO devirtualize %v: %s\n", ir.Line(fn), ir.FuncName(fn), reason)
+				}
+				if logopt.Enabled() {
+					logopt.LogOpt(fn.Pos(), ": should not PGO devirtualize function", "pgo-devirtualize", ir.FuncName(fn), reason)
+				}
+			}
+		}()
+	}
+
+	reason = inline.InlineImpossible(fn)
+	if reason != "" {
+		return false
+	}
+
+	// TODO(prattmic): checking only InlineImpossible is very conservative,
+	// primarily excluding only functions with pragmas. We probably want to
+	// move in either direction. Either:
+	//
+	// 1. Don't even bother to check InlineImpossible, as it affects so few
+	// functions.
+	//
+	// 2. Or consider the function body (notably cost) to better determine
+	// if the function will actually inline.
+
+	return true
+}
+
+// constructCallStat builds an initial CallStat describing this call, for
+// logging. If the call is devirtualized, the devirtualization fields should be
+// updated.
+func constructCallStat(p *pgo.Profile, fn *ir.Func, name string, call *ir.CallExpr) *CallStat {
+	switch call.Op() {
+	case ir.OCALLFUNC, ir.OCALLINTER, ir.OCALLMETH:
+	default:
+		// We don't care about logging builtin functions.
+		return nil
+	}
+
+	stat := CallStat{
+		Pkg:    base.Ctxt.Pkgpath,
+		Pos:    ir.Line(call),
+		Caller: name,
+	}
+
+	offset := pgo.NodeLineOffset(call, fn)
+
+	// Sum of all edges from this callsite, regardless of callee.
+	// For direct calls, this should be the same as the single edge
+	// weight (except for multiple calls on one line, which we
+	// can't distinguish).
+	callerNode := p.WeightedCG.IRNodes[name]
+	for _, edge := range callerNode.OutEdges {
+		if edge.CallSiteOffset != offset {
+			continue
+		}
+		stat.Weight += edge.Weight
+		if edge.Weight > stat.HottestWeight {
+			stat.HottestWeight = edge.Weight
+			stat.Hottest = edge.Dst.Name()
+		}
+	}
+
+	switch call.Op() {
+	case ir.OCALLFUNC:
+		stat.Interface = false
+
+		callee := pgo.DirectCallee(call.X)
+		if callee != nil {
+			stat.Direct = true
+			if stat.Hottest == "" {
+				stat.Hottest = ir.LinkFuncName(callee)
+			}
+		} else {
+			stat.Direct = false
+		}
+	case ir.OCALLINTER:
+		stat.Direct = false
+		stat.Interface = true
+	case ir.OCALLMETH:
+		base.FatalfAt(call.Pos(), "OCALLMETH missed by typecheck")
+	}
+
+	return &stat
+}
+
+// rewriteCondCall devirtualizes the given call using a direct method call to
+// concretetyp.
+func rewriteCondCall(call *ir.CallExpr, curfn, callee *ir.Func, concretetyp *types.Type) ir.Node {
+	if base.Flag.LowerM != 0 {
+		fmt.Printf("%v: PGO devirtualizing call to %v\n", ir.Line(call), callee)
+	}
+
+	// We generate an OINCALL of:
+	//
+	// var recv Iface
+	//
+	// var arg1 A1
+	// var argN AN
+	//
+	// var ret1 R1
+	// var retN RN
+	//
+	// recv, arg1, argN = recv expr, arg1 expr, argN expr
+	//
+	// t, ok := recv.(Concrete)
+	// if ok {
+	//   ret1, retN = t.Method(arg1, ... argN)
+	// } else {
+	//   ret1, retN = recv.Method(arg1, ... argN)
+	// }
+	//
+	// OINCALL retvars: ret1, ... retN
+	//
+	// This isn't really an inlined call of course, but InlinedCallExpr
+	// makes handling reassignment of return values easier.
+	//
+	// TODO(prattmic): This increases the size of the AST in the caller,
+	// making it less like to inline. We may want to compensate for this
+	// somehow.
+
+	var retvars []ir.Node
+
+	sig := call.X.Type()
+
+	for _, ret := range sig.Results().FieldSlice() {
+		retvars = append(retvars, typecheck.Temp(ret.Type))
+	}
+
+	sel := call.X.(*ir.SelectorExpr)
+	method := sel.Sel
+	pos := call.Pos()
+	init := ir.TakeInit(call)
+
+	// Evaluate receiver and argument expressions. The receiver is used
+	// twice but we don't want to cause side effects twice. The arguments
+	// are used in two different calls and we can't trivially copy them.
+	//
+	// recv must be first in the assignment list as its side effects must
+	// be ordered before argument side effects.
+	var lhs, rhs []ir.Node
+	recv := typecheck.Temp(sel.X.Type())
+	lhs = append(lhs, recv)
+	rhs = append(rhs, sel.X)
+
+	// Move arguments to assignments prior to the if statement. We cannot
+	// simply copy the args' IR, as some IR constructs cannot be copied,
+	// such as labels (possible in InlinedCall nodes).
+	args := call.Args.Take()
+	for _, arg := range args {
+		argvar := typecheck.Temp(arg.Type())
+
+		lhs = append(lhs, argvar)
+		rhs = append(rhs, arg)
+	}
+
+	asList := ir.NewAssignListStmt(pos, ir.OAS2, lhs, rhs)
+	init.Append(typecheck.Stmt(asList))
+
+	// Copy slice so edits in one location don't affect another.
+	argvars := append([]ir.Node(nil), lhs[1:]...)
+	call.Args = argvars
+
+	tmpnode := typecheck.Temp(concretetyp)
+	tmpok := typecheck.Temp(types.Types[types.TBOOL])
+
+	assert := ir.NewTypeAssertExpr(pos, recv, concretetyp)
+
+	assertAsList := ir.NewAssignListStmt(pos, ir.OAS2, []ir.Node{tmpnode, tmpok}, []ir.Node{typecheck.Expr(assert)})
+	init.Append(typecheck.Stmt(assertAsList))
+
+	concreteCallee := typecheck.Callee(ir.NewSelectorExpr(pos, ir.OXDOT, tmpnode, method))
+	// Copy slice so edits in one location don't affect another.
+	argvars = append([]ir.Node(nil), argvars...)
+	concreteCall := typecheck.Call(pos, concreteCallee, argvars, call.IsDDD)
+
+	var thenBlock, elseBlock ir.Nodes
+	if len(retvars) == 0 {
+		thenBlock.Append(concreteCall)
+		elseBlock.Append(call)
+	} else {
+		// Copy slice so edits in one location don't affect another.
+		thenRet := append([]ir.Node(nil),  retvars...)
+		thenAsList := ir.NewAssignListStmt(pos, ir.OAS2, thenRet, []ir.Node{concreteCall})
+		thenBlock.Append(typecheck.Stmt(thenAsList))
+
+		elseRet := append([]ir.Node(nil),  retvars...)
+		elseAsList := ir.NewAssignListStmt(pos, ir.OAS2, elseRet, []ir.Node{call})
+		elseBlock.Append(typecheck.Stmt(elseAsList))
+	}
+
+	cond := ir.NewIfStmt(pos, nil, nil, nil)
+	cond.SetInit(init)
+	cond.Cond = tmpok
+	cond.Body = thenBlock
+	cond.Else = elseBlock
+	cond.Likely = true
+
+	body := []ir.Node{typecheck.Stmt(cond)}
+
+	res := ir.NewInlinedCallExpr(pos, body, retvars)
+	res.SetType(call.Type())
+	res.SetTypecheck(1)
+
+	if base.Debug.PGODebug >= 3 {
+		fmt.Printf("PGO devirtualizing call to %+v. After: %+v\n", concretetyp, res)
+	}
+
+	return res
+}
+
+// methodRecvType returns the type containing method fn. Returns nil if fn
+// is not a method.
+func methodRecvType(fn *ir.Func) *types.Type {
+	recv := fn.Nname.Type().Recv()
+	if recv == nil {
+		return nil
+	}
+	return recv.Type
+}
+
+// interfaceCallRecvType returns the type of the interface used in an interface
+// call.
+func interfaceCallRecvType(call *ir.CallExpr) *types.Type {
+	if call.Op() != ir.OCALLINTER {
+		base.Fatalf("Call isn't OCALLINTER: %+v", call)
+	}
+
+	sel, ok := call.X.(*ir.SelectorExpr)
+	if !ok {
+		base.Fatalf("OCALLINTER doesn't contain SelectorExpr: %+v", call)
+	}
+
+	return sel.X.Type()
+}
+
+// findHotConcreteCallee returns the *ir.Func of the hottest callee of an
+// indirect call, if available, and its edge weight.
+func findHotConcreteCallee(p *pgo.Profile, caller *ir.Func, call *ir.CallExpr) (*ir.Func, int64) {
+	callerName := ir.LinkFuncName(caller)
+	callerNode := p.WeightedCG.IRNodes[callerName]
+	callOffset := pgo.NodeLineOffset(call, caller)
+
+	inter := interfaceCallRecvType(call)
+
+	var hottest *pgo.IREdge
+
+	// Returns true if e is hotter than hottest.
+	//
+	// Naively this is just e.Weight > hottest.Weight, but because OutEdges
+	// has arbitrary iteration order, we need to apply additional sort
+	// criteria when e.Weight == hottest.Weight to ensure we have stable
+	// selection.
+	hotter := func(e *pgo.IREdge) bool {
+		if hottest == nil {
+			return true
+		}
+		if e.Weight != hottest.Weight {
+			return e.Weight > hottest.Weight
+		}
+
+		// Now e.Weight == hottest.Weight, we must select on other
+		// criteria.
+
+		if hottest.Dst.AST == nil && e.Dst.AST != nil {
+			// Prefer the edge with IR available.
+			return true
+		}
+
+		// Arbitrary, but the callee names will always differ. Select
+		// the lexicographically first callee.
+		return e.Dst.Name() < hottest.Dst.Name()
+	}
+
+	for _, e := range callerNode.OutEdges {
+		if e.CallSiteOffset != callOffset {
+			continue
+		}
+
+		if !hotter(e) {
+			// TODO(prattmic): consider total caller weight? i.e.,
+			// if the hottest callee is only 10% of the weight,
+			// maybe don't devirtualize? Similarly, if this is call
+			// is globally very cold, there is not much value in
+			// devirtualizing.
+			if base.Debug.PGODebug >= 2 {
+				fmt.Printf("%v: edge %s:%d -> %s (weight %d): too cold (hottest %d)\n", ir.Line(call), callerName, callOffset, e.Dst.Name(), e.Weight, hottest.Weight)
+			}
+			continue
+		}
+
+		if e.Dst.AST == nil {
+			// Destination isn't visible from this package
+			// compilation.
+			//
+			// We must assume it implements the interface.
+			//
+			// We still record this as the hottest callee so far
+			// because we only want to return the #1 hottest
+			// callee. If we skip this then we'd return the #2
+			// hottest callee.
+			if base.Debug.PGODebug >= 2 {
+				fmt.Printf("%v: edge %s:%d -> %s (weight %d) (missing IR): hottest so far\n", ir.Line(call), callerName, callOffset, e.Dst.Name(), e.Weight)
+			}
+			hottest = e
+			continue
+		}
+
+		ctyp := methodRecvType(e.Dst.AST)
+		if ctyp == nil {
+			// Not a method.
+			// TODO(prattmic): Support non-interface indirect calls.
+			if base.Debug.PGODebug >= 2 {
+				fmt.Printf("%v: edge %s:%d -> %s (weight %d): callee not a method\n", ir.Line(call), callerName, callOffset, e.Dst.Name(), e.Weight)
+			}
+			continue
+		}
+
+		// If ctyp doesn't implement inter it is most likely from a
+		// different call on the same line
+		if !typecheck.Implements(ctyp, inter) {
+			// TODO(prattmic): this is overly strict. Consider if
+			// ctyp is a partial implementation of an interface
+			// that gets embedded in types that complete the
+			// interface. It would still be OK to devirtualize a
+			// call to this method.
+			//
+			// What we'd need to do is check that the function
+			// pointer in the itab matches the method we want,
+			// rather than doing a full type assertion.
+			if base.Debug.PGODebug >= 2 {
+				why := typecheck.ImplementsExplain(ctyp, inter)
+				fmt.Printf("%v: edge %s:%d -> %s (weight %d): %v doesn't implement %v (%s)\n", ir.Line(call), callerName, callOffset, e.Dst.Name(), e.Weight, ctyp, inter, why)
+			}
+			continue
+		}
+
+		if base.Debug.PGODebug >= 2 {
+			fmt.Printf("%v: edge %s:%d -> %s (weight %d): hottest so far\n", ir.Line(call), callerName, callOffset, e.Dst.Name(), e.Weight)
+		}
+		hottest = e
+	}
+
+	if hottest == nil {
+		if base.Debug.PGODebug >= 2 {
+			fmt.Printf("%v: call %s:%d: no hot callee\n", ir.Line(call), callerName, callOffset)
+		}
+		return nil, 0
+	}
+
+	if base.Debug.PGODebug >= 2 {
+		fmt.Printf("%v call %s:%d: hottest callee %s (weight %d)\n", ir.Line(call), callerName, callOffset, hottest.Dst.Name(), hottest.Weight)
+	}
+	return hottest.Dst.AST, hottest.Weight
+}
diff --git a/src/cmd/compile/internal/gc/main.go b/src/cmd/compile/internal/gc/main.go
index 464707242a..937d1c4751 100644
--- a/src/cmd/compile/internal/gc/main.go
+++ b/src/cmd/compile/internal/gc/main.go
@@ -253,7 +253,7 @@ func Main(archInit func(*ssagen.ArchInfo)) {
 	typecheck.IncrementalAddrtaken = true
 
 	// Read profile file and build profile-graph and weighted-call-graph.
-	base.Timer.Start("fe", "pgoprofile")
+	base.Timer.Start("fe", "pgo-load-profile")
 	var profile *pgo.Profile
 	if base.Flag.PgoProfile != "" {
 		var err error
@@ -263,6 +263,19 @@ func Main(archInit func(*ssagen.ArchInfo)) {
 		}
 	}
 
+	base.Timer.Start("fe", "pgo-devirtualization")
+	if profile != nil && base.Debug.PGODevirtualize > 0 {
+		// TODO(prattmic): No need to use bottom-up visit order. This
+		// is mirroring the PGO IRGraph visit order, which also need
+		// not be bottom-up.
+		ir.VisitFuncsBottomUp(typecheck.Target.Decls, func(list []*ir.Func, recursive bool) {
+			for _, fn := range list {
+				devirtualize.ProfileGuided(fn, profile)
+			}
+		})
+		ir.CurFunc = nil
+	}
+
 	// Inlining
 	base.Timer.Start("fe", "inlining")
 	if base.Flag.LowerL != 0 {
@@ -274,7 +287,7 @@ func Main(archInit func(*ssagen.ArchInfo)) {
 	var transformed []loopvar.VarAndLoop
 	for _, n := range typecheck.Target.Decls {
 		if n.Op() == ir.ODCLFUNC {
-			devirtualize.Func(n.(*ir.Func))
+			devirtualize.Static(n.(*ir.Func))
 			transformed = append(transformed, loopvar.ForCapture(n.(*ir.Func))...)
 		}
 	}
diff --git a/src/cmd/compile/internal/inline/inl.go b/src/cmd/compile/internal/inline/inl.go
index ff7e929ef4..96a6f3028a 100644
--- a/src/cmd/compile/internal/inline/inl.go
+++ b/src/cmd/compile/internal/inline/inl.go
@@ -101,13 +101,13 @@ func pgoInlinePrologue(p *pgo.Profile, decls []ir.Node) {
 			candHotCalleeMap[callee] = struct{}{}
 		}
 		// mark hot call sites
-		if caller := p.WeightedCG.IRNodes[n.CallerName]; caller != nil {
+		if caller := p.WeightedCG.IRNodes[n.CallerName]; caller != nil && caller.AST != nil {
 			csi := pgo.CallSiteInfo{LineOffset: n.CallSiteOffset, Caller: caller.AST}
 			candHotEdgeMap[csi] = struct{}{}
 		}
 	}
 
-	if base.Debug.PGODebug >= 2 {
+	if base.Debug.PGODebug >= 3 {
 		fmt.Printf("hot-cg before inline in dot format:")
 		p.PrintWeightedCallGraphDOT(inlineHotCallSiteThresholdPercent)
 	}
@@ -283,71 +283,10 @@ func CanInline(fn *ir.Func, profile *pgo.Profile) {
 		}()
 	}
 
-	// If marked "go:noinline", don't inline
-	if fn.Pragma&ir.Noinline != 0 {
-		reason = "marked go:noinline"
-		return
-	}
-
-	// If marked "go:norace" and -race compilation, don't inline.
-	if base.Flag.Race && fn.Pragma&ir.Norace != 0 {
-		reason = "marked go:norace with -race compilation"
-		return
-	}
-
-	// If marked "go:nocheckptr" and -d checkptr compilation, don't inline.
-	if base.Debug.Checkptr != 0 && fn.Pragma&ir.NoCheckPtr != 0 {
-		reason = "marked go:nocheckptr"
-		return
-	}
-
-	// If marked "go:cgo_unsafe_args", don't inline, since the
-	// function makes assumptions about its argument frame layout.
-	if fn.Pragma&ir.CgoUnsafeArgs != 0 {
-		reason = "marked go:cgo_unsafe_args"
-		return
-	}
-
-	// If marked as "go:uintptrkeepalive", don't inline, since the
-	// keep alive information is lost during inlining.
-	//
-	// TODO(prattmic): This is handled on calls during escape analysis,
-	// which is after inlining. Move prior to inlining so the keep-alive is
-	// maintained after inlining.
-	if fn.Pragma&ir.UintptrKeepAlive != 0 {
-		reason = "marked as having a keep-alive uintptr argument"
-		return
-	}
-
-	// If marked as "go:uintptrescapes", don't inline, since the
-	// escape information is lost during inlining.
-	if fn.Pragma&ir.UintptrEscapes != 0 {
-		reason = "marked as having an escaping uintptr argument"
-		return
-	}
-
-	// The nowritebarrierrec checker currently works at function
-	// granularity, so inlining yeswritebarrierrec functions can
-	// confuse it (#22342). As a workaround, disallow inlining
-	// them for now.
-	if fn.Pragma&ir.Yeswritebarrierrec != 0 {
-		reason = "marked go:yeswritebarrierrec"
-		return
-	}
-
-	// If fn has no body (is defined outside of Go), cannot inline it.
-	if len(fn.Body) == 0 {
-		reason = "no function body"
-		return
-	}
-
-	// If fn is synthetic hash or eq function, cannot inline it.
-	// The function is not generated in Unified IR frontend at this moment.
-	if ir.IsEqOrHashFunc(fn) {
-		reason = "type eq/hash function"
+	reason = InlineImpossible(fn)
+	if reason != "" {
 		return
 	}
-
 	if fn.Typecheck() == 0 {
 		base.Fatalf("CanInline on non-typechecked function %v", fn)
 	}
@@ -415,6 +354,82 @@ func CanInline(fn *ir.Func, profile *pgo.Profile) {
 	}
 }
 
+// InlineImpossible returns a non-empty reason string if fn is impossible to
+// inline regardless of cost or contents.
+func InlineImpossible(fn *ir.Func) string {
+	var reason string // reason, if any, that the function can not be inlined.
+	if fn.Nname == nil {
+		reason = "no name"
+		return reason
+	}
+
+	// If marked "go:noinline", don't inline.
+	if fn.Pragma&ir.Noinline != 0 {
+		reason = "marked go:noinline"
+		return reason
+	}
+
+	// If marked "go:norace" and -race compilation, don't inline.
+	if base.Flag.Race && fn.Pragma&ir.Norace != 0 {
+		reason = "marked go:norace with -race compilation"
+		return reason
+	}
+
+	// If marked "go:nocheckptr" and -d checkptr compilation, don't inline.
+	if base.Debug.Checkptr != 0 && fn.Pragma&ir.NoCheckPtr != 0 {
+		reason = "marked go:nocheckptr"
+		return reason
+	}
+
+	// If marked "go:cgo_unsafe_args", don't inline, since the function
+	// makes assumptions about its argument frame layout.
+	if fn.Pragma&ir.CgoUnsafeArgs != 0 {
+		reason = "marked go:cgo_unsafe_args"
+		return reason
+	}
+
+	// If marked as "go:uintptrkeepalive", don't inline, since the keep
+	// alive information is lost during inlining.
+	//
+	// TODO(prattmic): This is handled on calls during escape analysis,
+	// which is after inlining. Move prior to inlining so the keep-alive is
+	// maintained after inlining.
+	if fn.Pragma&ir.UintptrKeepAlive != 0 {
+		reason = "marked as having a keep-alive uintptr argument"
+		return reason
+	}
+
+	// If marked as "go:uintptrescapes", don't inline, since the escape
+	// information is lost during inlining.
+	if fn.Pragma&ir.UintptrEscapes != 0 {
+		reason = "marked as having an escaping uintptr argument"
+		return reason
+	}
+
+	// The nowritebarrierrec checker currently works at function
+	// granularity, so inlining yeswritebarrierrec functions can confuse it
+	// (#22342). As a workaround, disallow inlining them for now.
+	if fn.Pragma&ir.Yeswritebarrierrec != 0 {
+		reason = "marked go:yeswritebarrierrec"
+		return reason
+	}
+
+	// If fn has no body (is defined outside of Go), cannot inline it.
+	if len(fn.Body) == 0 {
+		reason = "no function body"
+		return reason
+	}
+
+	// If fn is synthetic hash or eq function, cannot inline it.
+	// The function is not generated in Unified IR frontend at this moment.
+	if ir.IsEqOrHashFunc(fn) {
+		reason = "type eq/hash function"
+		return reason
+	}
+
+	return ""
+}
+
 // canDelayResults reports whether inlined calls to fn can delay
 // declaring the result parameter until the "return" statement.
 func canDelayResults(fn *ir.Func) bool {
diff --git a/src/cmd/compile/internal/pgo/irgraph.go b/src/cmd/compile/internal/pgo/irgraph.go
index b9c39f6090..074f4a5a2f 100644
--- a/src/cmd/compile/internal/pgo/irgraph.go
+++ b/src/cmd/compile/internal/pgo/irgraph.go
@@ -51,25 +51,41 @@ import (
 	"os"
 )
 
-// IRGraph is the key data structure that is built from profile. It is
-// essentially a call graph with nodes pointing to IRs of functions and edges
-// carrying weights and callsite information. The graph is bidirectional that
-// helps in removing nodes efficiently.
+// IRGraph is a call graph with nodes pointing to IRs of functions and edges
+// carrying weights and callsite information.
+//
+// Nodes for indirect calls may have missing IR (IRNode.AST == nil) if the node
+// is not visible from this package (e.g., not in the transitive deps). Keeping
+// these nodes allows determining the hottest edge from a call even if that
+// callee is not available.
+//
+// TODO(prattmic): Consider merging this data structure with Graph. This is
+// effectively a copy of Graph aggregated to line number and pointing to IR.
 type IRGraph struct {
 	// Nodes of the graph
-	IRNodes  map[string]*IRNode
-	OutEdges IREdgeMap
-	InEdges  IREdgeMap
+	IRNodes map[string]*IRNode
 }
 
-// IRNode represents a node in the IRGraph.
+// IRNode represents a node (function) in the IRGraph.
 type IRNode struct {
 	// Pointer to the IR of the Function represented by this node.
 	AST *ir.Func
+	// Linker symbol name of the Function represented by this node.
+	// Populated only if AST == nil.
+	LinkerSymbolName string
+
+	// Set of out-edges in the callgraph. The map uniquely identifies each
+	// edge based on the callsite and callee, for fast lookup.
+	OutEdges map[NodeMapKey]*IREdge
 }
 
-// IREdgeMap maps an IRNode to its successors.
-type IREdgeMap map[*IRNode][]*IREdge
+// Name returns the symbol name of this function.
+func (i *IRNode) Name() string {
+	if i.AST != nil {
+		return ir.LinkFuncName(i.AST)
+	}
+	return i.LinkerSymbolName
+}
 
 // IREdge represents a call edge in the IRGraph with source, destination,
 // weight, callsite, and line number information.
@@ -82,6 +98,8 @@ type IREdge struct {
 
 // NodeMapKey represents a hash key to identify unique call-edges in profile
 // and in IR. Used for deduplication of call edges found in profile.
+//
+// TODO(prattmic): rename to something more descriptive.
 type NodeMapKey struct {
 	CallerName     string
 	CalleeName     string
@@ -244,10 +262,22 @@ func (p *Profile) processprofileGraph(g *graph.Graph) error {
 func (p *Profile) initializeIRGraph() {
 	// Bottomup walk over the function to create IRGraph.
 	ir.VisitFuncsBottomUp(typecheck.Target.Decls, func(list []*ir.Func, recursive bool) {
-		for _, n := range list {
-			p.VisitIR(n)
+		for _, fn := range list {
+			p.VisitIR(fn)
 		}
 	})
+
+	// Add additional edges for indirect calls. This must be done second so
+	// that IRNodes is fully populated (see the dummy node TODO in
+	// addIndirectEdges).
+	//
+	// TODO(prattmic): VisitIR above populates the graph via direct calls
+	// discovered via the IR. addIndirectEdges populates the graph via
+	// calls discovered via the profile. This combination of opposite
+	// approaches is a bit awkward, particularly because direct calls are
+	// discoverable via the profile as well. Unify these into a single
+	// approach.
+	p.addIndirectEdges()
 }
 
 // VisitIR traverses the body of each ir.Func and use NodeMap to determine if
@@ -258,12 +288,7 @@ func (p *Profile) VisitIR(fn *ir.Func) {
 	if g.IRNodes == nil {
 		g.IRNodes = make(map[string]*IRNode)
 	}
-	if g.OutEdges == nil {
-		g.OutEdges = make(map[*IRNode][]*IREdge)
-	}
-	if g.InEdges == nil {
-		g.InEdges = make(map[*IRNode][]*IREdge)
-	}
+
 	name := ir.LinkFuncName(fn)
 	node, ok := g.IRNodes[name]
 	if !ok {
@@ -317,21 +342,107 @@ func (p *Profile) addIREdge(callerNode *IRNode, callerName string, call ir.Node,
 		Weight:         weight,
 		CallSiteOffset: nodeinfo.CallSiteOffset,
 	}
-	g.OutEdges[callerNode] = append(g.OutEdges[callerNode], edge)
-	g.InEdges[calleeNode] = append(g.InEdges[calleeNode], edge)
+
+	if callerNode.OutEdges == nil {
+		callerNode.OutEdges = make(map[NodeMapKey]*IREdge)
+	}
+	callerNode.OutEdges[nodeinfo] = edge
+}
+
+// addIndirectEdges adds indirect call edges found in the profile to the graph,
+// to be used for devirtualization.
+//
+// targetDeclFuncs is the set of functions in typecheck.Target.Decls. Only
+// edges from these functions will be added.
+//
+// Devirtualization is only applied to typecheck.Target.Decls functions, so there
+// is no need to add edges from other functions.
+//
+// N.B. despite the name, addIndirectEdges will add any edges discovered via
+// the profile. We don't know for sure that they are indirect, but assume they
+// are since direct calls would already be added. (e.g., direct calls that have
+// been deleted from source since the profile was taken would be added here).
+//
+// TODO(prattmic): Devirtualization runs before inlining, so we can't devirtualize
+// calls inside inlined call bodies. If we did add that, we'd need edges from
+// inlined bodies as well.
+func (p *Profile) addIndirectEdges() {
+	g := p.WeightedCG
+
+	// g.IRNodes is populated with the set of functions in the local
+	// package build by VisitIR. We want to filter for local functions
+	// below, but we also add unknown callees to IRNodes as we go. So make
+	// an initial copy of IRNodes to recall just the local functions.
+	localNodes := make(map[string]*IRNode, len(g.IRNodes))
+	for k, v := range g.IRNodes {
+		localNodes[k] = v
+	}
+
+	for key, weights := range p.NodeMap {
+		// All callers in the local package build were added to IRNodes
+		// in VisitIR. If a caller isn't in the local package build we
+		// can skip adding edges, since we won't be devirtualizing in
+		// them anyway. This keeps the graph smaller.
+		callerNode, ok := localNodes[key.CallerName]
+		if !ok {
+			continue
+		}
+
+		// Already handled this edge?
+		if _, ok := callerNode.OutEdges[key]; ok {
+			continue
+		}
+
+		calleeNode, ok := g.IRNodes[key.CalleeName]
+		if !ok {
+			// IR is missing for this callee. Most likely this is
+			// because the callee isn't in the transitive deps of
+			// this package.
+			//
+			// Record this call anyway. If this is the hottest,
+			// then we want to skip devirtualization rather than
+			// devirtualizing to the second most common callee.
+			//
+			// TODO(prattmic): VisitIR populates IRNodes with all
+			// of the functions discovered via local package
+			// function declarations and calls. Thus we could miss
+			// functions that are available in export data of
+			// transitive deps, but aren't directly reachable. We
+			// need to do a lookup directly from package export
+			// data to get complete coverage.
+			calleeNode = &IRNode{
+				LinkerSymbolName: key.CalleeName,
+				// TODO: weights? We don't need them.
+			}
+			// Add dummy node back to IRNodes. We don't need this
+			// directly, but PrintWeightedCallGraphDOT uses these
+			// to print nodes.
+			g.IRNodes[key.CalleeName] = calleeNode
+		}
+		edge := &IREdge{
+			Src:            callerNode,
+			Dst:            calleeNode,
+			Weight:         weights.EWeight,
+			CallSiteOffset: key.CallSiteOffset,
+		}
+
+		if callerNode.OutEdges == nil {
+			callerNode.OutEdges = make(map[NodeMapKey]*IREdge)
+		}
+		callerNode.OutEdges[key] = edge
+	}
 }
 
-// createIRGraphEdge traverses the nodes in the body of ir.Func and add edges between callernode which points to the ir.Func and the nodes in the body.
+// createIRGraphEdge traverses the nodes in the body of ir.Func and adds edges
+// between the callernode which points to the ir.Func and the nodes in the
+// body.
 func (p *Profile) createIRGraphEdge(fn *ir.Func, callernode *IRNode, name string) {
-	var doNode func(ir.Node) bool
-	doNode = func(n ir.Node) bool {
+	ir.VisitList(fn.Body, func(n ir.Node) {
 		switch n.Op() {
-		default:
-			ir.DoChildren(n, doNode)
 		case ir.OCALLFUNC:
 			call := n.(*ir.CallExpr)
 			// Find the callee function from the call site and add the edge.
-			callee := inlCallee(call.X)
+			callee := DirectCallee(call.X)
 			if callee != nil {
 				p.addIREdge(callernode, name, n, callee)
 			}
@@ -341,9 +452,7 @@ func (p *Profile) createIRGraphEdge(fn *ir.Func, callernode *IRNode, name string
 			callee := ir.MethodExprName(call.X).Func
 			p.addIREdge(callernode, name, n, callee)
 		}
-		return false
-	}
-	doNode(fn)
+	})
 }
 
 // WeightInPercentage converts profile weights to a percentage.
@@ -366,19 +475,22 @@ func (p *Profile) PrintWeightedCallGraphDOT(edgeThreshold float64) {
 	})
 
 	// Determine nodes of DOT.
+	//
+	// Note that ir.Func may be nil for functions not visible from this
+	// package.
 	nodes := make(map[string]*ir.Func)
 	for name := range funcs {
 		if n, ok := p.WeightedCG.IRNodes[name]; ok {
-			for _, e := range p.WeightedCG.OutEdges[n] {
-				if _, ok := nodes[ir.LinkFuncName(e.Src.AST)]; !ok {
-					nodes[ir.LinkFuncName(e.Src.AST)] = e.Src.AST
+			for _, e := range n.OutEdges {
+				if _, ok := nodes[e.Src.Name()]; !ok {
+					nodes[e.Src.Name()] = e.Src.AST
 				}
-				if _, ok := nodes[ir.LinkFuncName(e.Dst.AST)]; !ok {
-					nodes[ir.LinkFuncName(e.Dst.AST)] = e.Dst.AST
+				if _, ok := nodes[e.Dst.Name()]; !ok {
+					nodes[e.Dst.Name()] = e.Dst.AST
 				}
 			}
-			if _, ok := nodes[ir.LinkFuncName(n.AST)]; !ok {
-				nodes[ir.LinkFuncName(n.AST)] = n.AST
+			if _, ok := nodes[n.Name()]; !ok {
+				nodes[n.Name()] = n.AST
 			}
 		}
 	}
@@ -386,11 +498,15 @@ func (p *Profile) PrintWeightedCallGraphDOT(edgeThreshold float64) {
 	// Print nodes.
 	for name, ast := range nodes {
 		if _, ok := p.WeightedCG.IRNodes[name]; ok {
-			color := "black"
-			if ast.Inl != nil {
-				fmt.Printf("\"%v\" [color=%v,label=\"%v,inl_cost=%d\"];\n", ir.LinkFuncName(ast), color, ir.LinkFuncName(ast), ast.Inl.Cost)
+			style := "solid"
+			if ast == nil {
+				style = "dashed"
+			}
+
+			if ast != nil && ast.Inl != nil {
+				fmt.Printf("\"%v\" [color=black, style=%s, label=\"%v,inl_cost=%d\"];\n", name, style, name, ast.Inl.Cost)
 			} else {
-				fmt.Printf("\"%v\" [color=%v, label=\"%v\"];\n", ir.LinkFuncName(ast), color, ir.LinkFuncName(ast))
+				fmt.Printf("\"%v\" [color=black, style=%s, label=\"%v\"];\n", name, style, name)
 			}
 		}
 	}
@@ -399,15 +515,19 @@ func (p *Profile) PrintWeightedCallGraphDOT(edgeThreshold float64) {
 		for _, f := range list {
 			name := ir.LinkFuncName(f)
 			if n, ok := p.WeightedCG.IRNodes[name]; ok {
-				for _, e := range p.WeightedCG.OutEdges[n] {
+				for _, e := range n.OutEdges {
+					style := "solid"
+					if e.Dst.AST == nil {
+						style = "dashed"
+					}
+					color := "black"
 					edgepercent := WeightInPercentage(e.Weight, p.TotalEdgeWeight)
 					if edgepercent > edgeThreshold {
-						fmt.Printf("edge [color=red, style=solid];\n")
-					} else {
-						fmt.Printf("edge [color=black, style=solid];\n")
+						color = "red"
 					}
 
-					fmt.Printf("\"%v\" -> \"%v\" [label=\"%.2f\"];\n", ir.LinkFuncName(n.AST), ir.LinkFuncName(e.Dst.AST), edgepercent)
+					fmt.Printf("edge [color=%s, style=%s];\n", color, style)
+					fmt.Printf("\"%v\" -> \"%v\" [label=\"%.2f\"];\n", n.Name(), e.Dst.Name(), edgepercent)
 				}
 			}
 		}
@@ -415,8 +535,11 @@ func (p *Profile) PrintWeightedCallGraphDOT(edgeThreshold float64) {
 	fmt.Printf("}\n")
 }
 
-// inlCallee is same as the implementation for inl.go with one change. The change is that we do not invoke CanInline on a closure.
-func inlCallee(fn ir.Node) *ir.Func {
+// DirectCallee takes a function-typed expression and returns the underlying
+// function that it refers to if statically known. Otherwise, it returns nil.
+//
+// Equivalent to inline.inlCallee without calling CanInline on closures.
+func DirectCallee(fn ir.Node) *ir.Func {
 	fn = ir.StaticValue(fn)
 	switch fn.Op() {
 	case ir.OMETHEXPR:
diff --git a/src/cmd/compile/internal/test/pgo_devirtualize_test.go b/src/cmd/compile/internal/test/pgo_devirtualize_test.go
new file mode 100644
index 0000000000..5ddd626962
--- /dev/null
+++ b/src/cmd/compile/internal/test/pgo_devirtualize_test.go
@@ -0,0 +1,126 @@
+// Copyright 2023 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package test
+
+import (
+	"bufio"
+	"fmt"
+	"internal/testenv"
+	"os"
+	"path/filepath"
+	"regexp"
+	"testing"
+)
+
+// testPGODevirtualize tests that specific PGO devirtualize rewrites are performed.
+func testPGODevirtualize(t *testing.T, dir string) {
+	testenv.MustHaveGoRun(t)
+	t.Parallel()
+
+	const pkg = "example.com/pgo/devirtualize"
+
+	// Add a go.mod so we have a consistent symbol names in this temp dir.
+	goMod := fmt.Sprintf(`module %s
+go 1.19
+`, pkg)
+	if err := os.WriteFile(filepath.Join(dir, "go.mod"), []byte(goMod), 0644); err != nil {
+		t.Fatalf("error writing go.mod: %v", err)
+	}
+
+	// Build the test with the profile.
+	pprof := filepath.Join(dir, "devirt.pprof")
+	gcflag := fmt.Sprintf("-gcflags=-m=2 -pgoprofile=%s -d=pgodebug=2", pprof)
+	out := filepath.Join(dir, "test.exe")
+	cmd := testenv.CleanCmdEnv(testenv.Command(t, testenv.GoToolPath(t), "build", "-o", out, gcflag, "."))
+	cmd.Dir = dir
+
+	pr, pw, err := os.Pipe()
+	if err != nil {
+		t.Fatalf("error creating pipe: %v", err)
+	}
+	defer pr.Close()
+	cmd.Stdout = pw
+	cmd.Stderr = pw
+
+	err = cmd.Start()
+	pw.Close()
+	if err != nil {
+		t.Fatalf("error starting go test: %v", err)
+	}
+
+	type devirtualization struct {
+		pos    string
+		callee string
+	}
+
+	want := []devirtualization{
+		{
+			pos:    "./devirt.go:81:21",
+			callee: "Mult.Multiply",
+		},
+		{
+			pos:    "./devirt.go:81:31",
+			callee: "Add.Add",
+		},
+	}
+
+	got := make(map[devirtualization]struct{})
+
+	devirtualizedLine := regexp.MustCompile(`(.*): PGO devirtualizing call to (.*)`)
+
+	scanner := bufio.NewScanner(pr)
+	for scanner.Scan() {
+		line := scanner.Text()
+		t.Logf("child: %s", line)
+
+		m := devirtualizedLine.FindStringSubmatch(line)
+		if m == nil {
+			continue
+		}
+
+		d := devirtualization{
+			pos:    m[1],
+			callee: m[2],
+		}
+		got[d] = struct{}{}
+	}
+	if err := cmd.Wait(); err != nil {
+		t.Fatalf("error running go test: %v", err)
+	}
+	if err := scanner.Err(); err != nil {
+		t.Fatalf("error reading go test output: %v", err)
+	}
+
+	if len(got) != len(want) {
+		t.Errorf("mismatched devirtualization count; got %v want %v", got, want)
+	}
+	for _, w := range want {
+		if _, ok := got[w]; ok {
+			continue
+		}
+		t.Errorf("devirtualization %v missing; got %v", w, got)
+	}
+}
+
+// TestPGODevirtualize tests that specific functions are devirtualized when PGO
+// is applied to the exact source that was profiled.
+func TestPGODevirtualize(t *testing.T) {
+	wd, err := os.Getwd()
+	if err != nil {
+		t.Fatalf("error getting wd: %v", err)
+	}
+	srcDir := filepath.Join(wd, "testdata", "pgo", "devirtualize")
+
+	// Copy the module to a scratch location so we can add a go.mod.
+	dir := t.TempDir()
+
+	for _, file := range []string{"devirt.go", "devirt_test.go", "devirt.pprof"} {
+		if err := copyFile(filepath.Join(dir, file), filepath.Join(srcDir, file)); err != nil {
+			t.Fatalf("error copying %s: %v", file, err)
+		}
+	}
+
+	testPGODevirtualize(t, dir)
+}
diff --git a/src/cmd/compile/internal/test/testdata/pgo/devirtualize/devirt.go b/src/cmd/compile/internal/test/testdata/pgo/devirtualize/devirt.go
new file mode 100644
index 0000000000..3f22093b34
--- /dev/null
+++ b/src/cmd/compile/internal/test/testdata/pgo/devirtualize/devirt.go
@@ -0,0 +1,83 @@
+// Copyright 2023 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// WARNING: Please avoid updating this file. If this file needs to be updated,
+// then a new devirt.pprof file should be generated:
+//
+//	$ cd $GOROOT/src/cmd/compile/internal/test/testdata/pgo/devirtualize/
+//	$ go mod init example.com/pgo/devirtualize
+//	$ go test -bench=. -cpuprofile ./devirt.pprof
+
+package devirt
+
+type Multiplier interface {
+	Multiply(a, b int) int
+}
+
+type Adder interface {
+	Add(a, b int) int
+}
+
+var sink int
+
+type Mult struct{}
+
+func (Mult) Multiply(a, b int) int {
+	for i := 0; i < 1000; i++ {
+		sink++
+	}
+	return a * b
+}
+
+type NegMult struct{}
+
+func (NegMult) Multiply(a, b int) int {
+	for i := 0; i < 1000; i++ {
+		sink++
+	}
+	return -1 * a * b
+}
+
+type Add struct{}
+
+func (Add) Add(a, b int) int {
+	for i := 0; i < 1000; i++ {
+		sink++
+	}
+	return a + b
+}
+
+type Sub struct{}
+
+func (Sub) Add(a, b int) int {
+	for i := 0; i < 1000; i++ {
+		sink++
+	}
+	return a - b
+}
+
+// Exercise calls mostly a1 and m1.
+//
+//go:noinline
+func Exercise(iter int, a1, a2 Adder, m1, m2 Multiplier) {
+	for i := 0; i < iter; i++ {
+		a := a1
+		m := m1
+		if i%10 == 0 {
+			a = a2
+			m = m2
+		}
+
+		// N.B. Profiles only distinguish calls on a per-line level,
+		// making the two calls ambiguous. However because the
+		// interfaces and implementations are mutually exclusive,
+		// devirtualization can still select the correct callee for
+		// each.
+		//
+		// If they were not mutually exclusive (for example, two Add
+		// calls), then we could not definitively select the correct
+		// callee.
+		sink += m.Multiply(42, a.Add(1, 2))
+	}
+}
diff --git a/src/cmd/compile/internal/test/testdata/pgo/devirtualize/devirt.pprof b/src/cmd/compile/internal/test/testdata/pgo/devirtualize/devirt.pprof
new file mode 100644
index 0000000000..b72f7cf4b3
--- /dev/null
+++ b/src/cmd/compile/internal/test/testdata/pgo/devirtualize/devirt.pprof
diff --git a/src/cmd/compile/internal/test/testdata/pgo/devirtualize/devirt_test.go b/src/cmd/compile/internal/test/testdata/pgo/devirtualize/devirt_test.go
new file mode 100644
index 0000000000..03c966f6de
--- /dev/null
+++ b/src/cmd/compile/internal/test/testdata/pgo/devirtualize/devirt_test.go
@@ -0,0 +1,27 @@
+// Copyright 2023 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// WARNING: Please avoid updating this file. If this file needs to be updated,
+// then a new devirt.pprof file should be generated:
+//
+//	$ cd $GOROOT/src/cmd/compile/internal/test/testdata/pgo/devirtualize/
+//	$ go mod init example.com/pgo/devirtualize
+//	$ go test -bench=. -cpuprofile ./devirt.pprof
+
+package devirt
+
+import (
+	"testing"
+)
+
+func BenchmarkDevirt(b *testing.B) {
+	var (
+		a1 Add
+		a2 Sub
+		m1 Mult
+		m2 NegMult
+	)
+
+	Exercise(b.N, a1, a2, m1, m2)
+}