diff --git a/src/runtime/mprof.go b/src/runtime/mprof.go index b51edcbcab..f0e5533cec 100644 --- a/src/runtime/mprof.go +++ b/src/runtime/mprof.go @@ -43,10 +43,29 @@ const ( // Note that it's only used internally as a guard against // wildly out-of-bounds slicing of the PCs that come after // a bucket struct, and it could increase in the future. - // The "+ 1" is to account for the first stack entry being + // The term "1" accounts for the first stack entry being // taken up by a "skip" sentinel value for profilers which // defer inline frame expansion until the profile is reported. - maxStack = 32 + 1 + // The term "maxSkip" is for frame pointer unwinding, where we + // want to end up with maxLogicalStack frames but will discard + // some "physical" frames to account for skipping. + maxStack = 1 + maxSkip + maxLogicalStack + + // maxLogicalStack is the maximum stack size of a call stack + // to encode in a profile. This counts "logical" frames, which + // includes inlined frames. We may record more than this many + // "physical" frames when using frame pointer unwinding to account + // for deferred handling of skipping frames & inline expansion. + maxLogicalStack = 32 + // maxSkip is to account for deferred inline expansion + // when using frame pointer unwinding. We record the stack + // with "physical" frame pointers but handle skipping "logical" + // frames at some point after collecting the stack. So + // we need extra space in order to avoid getting fewer than the + // desired maximum number of frames after expansion. + // This should be at least as large as the largest skip value + // used for profiling; otherwise stacks may be truncated inconsistently + maxSkip = 5 ) type bucketType int @@ -513,6 +532,11 @@ func blocksampled(cycles, rate int64) bool { // skip should be positive if this event is recorded from the current stack // (e.g. when this is not called from a system stack) func saveblockevent(cycles, rate int64, skip int, which bucketType) { + if skip > maxSkip { + print("requested skip=", skip) + throw("invalid skip value") + } + gp := getg() mp := acquirem() // we must not be preempted while accessing profstack nstk := 1 diff --git a/src/runtime/pprof/pprof_test.go b/src/runtime/pprof/pprof_test.go index f57c1fed50..1c92c7e1f4 100644 --- a/src/runtime/pprof/pprof_test.go +++ b/src/runtime/pprof/pprof_test.go @@ -2431,3 +2431,143 @@ func TestTimeVDSO(t *testing.T) { } } } + +func TestProfilerStackDepth(t *testing.T) { + // Disable sampling, otherwise it's difficult to assert anything. + oldMemRate := runtime.MemProfileRate + runtime.MemProfileRate = 1 + runtime.SetBlockProfileRate(1) + oldMutexRate := runtime.SetMutexProfileFraction(1) + t.Cleanup(func() { + runtime.MemProfileRate = oldMemRate + runtime.SetBlockProfileRate(0) + runtime.SetMutexProfileFraction(oldMutexRate) + }) + + const depth = 32 + go produceProfileEvents(t, depth) + awaitBlockedGoroutine(t, "chan receive", "goroutineDeep", 1) + + tests := []struct { + profiler string + prefix []string + }{ + {"heap", []string{"runtime/pprof.allocDeep"}}, + {"block", []string{"runtime.chanrecv1", "runtime/pprof.blockChanDeep"}}, + {"mutex", []string{"sync.(*Mutex).Unlock", "runtime/pprof.blockMutexDeep"}}, + {"goroutine", []string{"runtime.gopark", "runtime.chanrecv", "runtime.chanrecv1", "runtime/pprof.goroutineDeep"}}, + } + + for _, test := range tests { + t.Run(test.profiler, func(t *testing.T) { + var buf bytes.Buffer + if err := Lookup(test.profiler).WriteTo(&buf, 0); err != nil { + t.Fatalf("failed to write heap profile: %v", err) + } + p, err := profile.Parse(&buf) + if err != nil { + t.Fatalf("failed to parse heap profile: %v", err) + } + t.Logf("Profile = %v", p) + + stks := stacks(p) + var stk []string + for _, s := range stks { + if hasPrefix(s, test.prefix) { + stk = s + break + } + } + if len(stk) != depth { + t.Fatalf("want stack depth = %d, got %d", depth, len(stk)) + } + + if rootFn, wantFn := stk[depth-1], "runtime/pprof.produceProfileEvents"; rootFn != wantFn { + t.Fatalf("want stack stack root %s, got %v", wantFn, rootFn) + } + }) + } +} + +func hasPrefix(stk []string, prefix []string) bool { + if len(prefix) > len(stk) { + return false + } + for i := range prefix { + if stk[i] != prefix[i] { + return false + } + } + return true +} + +// ensure that stack records are valid map keys (comparable) +var _ = map[runtime.MemProfileRecord]struct{}{} +var _ = map[runtime.StackRecord]struct{}{} + +// allocDeep calls itself n times before calling fn. +func allocDeep(n int) { + if n > 1 { + allocDeep(n - 1) + return + } + memSink = make([]byte, 1<<20) +} + +// blockChanDeep produces a block profile event at stack depth n, including the +// caller. +func blockChanDeep(t *testing.T, n int) { + if n > 1 { + blockChanDeep(t, n-1) + return + } + ch := make(chan struct{}) + go func() { + awaitBlockedGoroutine(t, "chan receive", "blockChanDeep", 1) + ch <- struct{}{} + }() + <-ch +} + +// blockMutexDeep produces a block profile event at stack depth n, including the +// caller. +func blockMutexDeep(t *testing.T, n int) { + if n > 1 { + blockMutexDeep(t, n-1) + return + } + var mu sync.Mutex + go func() { + mu.Lock() + mu.Lock() + }() + awaitBlockedGoroutine(t, "sync.Mutex.Lock", "blockMutexDeep", 1) + mu.Unlock() +} + +// goroutineDeep blocks at stack depth n, including the caller until the test is +// finished. +func goroutineDeep(t *testing.T, n int) { + if n > 1 { + goroutineDeep(t, n-1) + return + } + wait := make(chan struct{}, 1) + t.Cleanup(func() { + wait <- struct{}{} + }) + <-wait +} + +// produceProfileEvents produces pprof events at the given stack depth and then +// blocks in goroutineDeep until the test completes. The stack traces are +// guaranteed to have exactly the desired depth with produceProfileEvents as +// their root frame which is expected by TestProfilerStackDepth. +func produceProfileEvents(t *testing.T, depth int) { + allocDeep(depth - 1) // -1 for produceProfileEvents, ** + blockChanDeep(t, depth-2) // -2 for produceProfileEvents, **, chanrecv1 + blockMutexDeep(t, depth-2) // -2 for produceProfileEvents, **, Unlock + memSink = nil + runtime.GC() + goroutineDeep(t, depth-4) // -4 for produceProfileEvents, **, chanrecv1, chanrev, gopark +}