diff --git a/src/runtime/pprof/proto.go b/src/runtime/pprof/proto.go index 073a076802..215bd0bf96 100644 --- a/src/runtime/pprof/proto.go +++ b/src/runtime/pprof/proto.go @@ -244,6 +244,10 @@ type locInfo struct { // to represent inlined functions // https://github.com/golang/go/blob/d6f2f833c93a41ec1c68e49804b8387a06b131c5/src/runtime/traceback.go#L347-L368 pcs []uintptr + + // results of allFrames call for this PC + frames []runtime.Frame + symbolizeResult symbolizeFlag } // newProfileBuilder returns a new profileBuilder. @@ -399,6 +403,24 @@ func (b *profileBuilder) appendLocsForStack(locs []uint64, stk []uintptr) (newLo for len(stk) > 0 { addr := stk[0] if l, ok := b.locs[addr]; ok { + // When generating code for an inlined function, the compiler adds + // NOP instructions to the outermost function as a placeholder for + // each layer of inlining. When the runtime generates tracebacks for + // stacks that include inlined functions, it uses the addresses of + // those NOPs as "fake" PCs on the stack as if they were regular + // function call sites. But if a profiling signal arrives while the + // CPU is executing one of those NOPs, its PC will show up as a leaf + // in the profile with its own Location entry. So, always check + // whether addr is a "fake" PC in the context of the current call + // stack by trying to add it to the inlining deck before assuming + // that the deck is complete. + if len(b.deck.pcs) > 0 { + if added := b.deck.tryAdd(addr, l.frames, l.symbolizeResult); added { + stk = stk[1:] + continue + } + } + // first record the location if there is any pending accumulated info. if id := b.emitLocation(); id > 0 { locs = append(locs, id) @@ -451,6 +473,27 @@ func (b *profileBuilder) appendLocsForStack(locs []uint64, stk []uintptr) (newLo return locs } +// Here's an example of how Go 1.17 writes out inlined functions, compiled for +// linux/amd64. The disassembly of main.main shows two levels of inlining: main +// calls b, b calls a, a does some work. +// +// inline.go:9 0x4553ec 90 NOPL // func main() { b(v) } +// inline.go:6 0x4553ed 90 NOPL // func b(v *int) { a(v) } +// inline.go:5 0x4553ee 48c7002a000000 MOVQ $0x2a, 0(AX) // func a(v *int) { *v = 42 } +// +// If a profiling signal arrives while executing the MOVQ at 0x4553ee (for line +// 5), the runtime will report the stack as the MOVQ frame being called by the +// NOPL at 0x4553ed (for line 6) being called by the NOPL at 0x4553ec (for line +// 9). +// +// The role of pcDeck is to collapse those three frames back into a single +// location at 0x4553ee, with file/line/function symbolization info representing +// the three layers of calls. It does that via sequential calls to pcDeck.tryAdd +// starting with the leaf-most address. The fourth call to pcDeck.tryAdd will be +// for the caller of main.main. Because main.main was not inlined in its caller, +// the deck will reject the addition, and the fourth PC on the stack will get +// its own location. + // pcDeck is a helper to detect a sequence of inlined functions from // a stack trace returned by the runtime. // @@ -535,7 +578,12 @@ func (b *profileBuilder) emitLocation() uint64 { newFuncs := make([]newFunc, 0, 8) id := uint64(len(b.locs)) + 1 - b.locs[addr] = locInfo{id: id, pcs: append([]uintptr{}, b.deck.pcs...)} + b.locs[addr] = locInfo{ + id: id, + pcs: append([]uintptr{}, b.deck.pcs...), + symbolizeResult: b.deck.symbolizeResult, + frames: append([]runtime.Frame{}, b.deck.frames...), + } start := b.pb.startMessage() b.pb.uint64Opt(tagLocation_ID, id)