runtime: increase profiling stack depth to 128

The current stack depth limit for alloc, mutex, block, threadcreate and
goroutine profiles of 32 frequently leads to truncated stack traces in
production applications. Increase the limit to 128 which is the same
size used by the execution tracer.

Create internal/profilerecord to define variants of the runtime's
StackRecord, MemProfileRecord and BlockProfileRecord types that can hold
arbitrarily big stack traces. Implement internal profiling APIs based on
these new types and use them for creating protobuf profiles and to act
as shims for the public profiling APIs using the old types.

This will lead to an increase in memory usage for applications that
use the impacted profile types and have stack traces exceeding the
current limit of 32. Those applications will also experience a slight
increase in CPU usage, but this will hopefully soon be mitigated via CL
540476 and 533258 which introduce frame pointer unwinding for the
relevant profile types.

For #43669.

Change-Id: Ie53762e65d0f6295f5d4c7d3c87172d5a052164e
Reviewed-on: https://go-review.googlesource.com/c/go/+/572396
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Austin Clements <austin@google.com>
Reviewed-by: Cherry Mui <cherryyz@google.com>
This commit is contained in:
Felix Geisendörfer 2024-05-17 15:07:07 +02:00 committed by Austin Clements
parent 47187a4f4f
commit 1b9dc3e178
12 changed files with 310 additions and 110 deletions

View File

@ -0,0 +1,2 @@
The maximum stack depth for alloc, mutex, block, threadcreate and goroutine
profiles has been raised from 32 to 128 frames.

View File

@ -58,6 +58,7 @@ var runtimePkgs = []string{
"internal/godebugs", "internal/godebugs",
"internal/goexperiment", "internal/goexperiment",
"internal/goos", "internal/goos",
"internal/profilerecord",
"internal/stringslite", "internal/stringslite",
} }

View File

@ -45,7 +45,7 @@ var depsRules = `
internal/goarch, internal/godebugs, internal/goarch, internal/godebugs,
internal/goexperiment, internal/goos, internal/byteorder, internal/goexperiment, internal/goos, internal/byteorder,
internal/goversion, internal/nettrace, internal/platform, internal/goversion, internal/nettrace, internal/platform,
internal/trace/traceviewer/format, internal/profilerecord, internal/trace/traceviewer/format,
log/internal, log/internal,
unicode/utf8, unicode/utf16, unicode, unicode/utf8, unicode/utf16, unicode,
unsafe; unsafe;
@ -65,7 +65,8 @@ var depsRules = `
internal/goarch, internal/goarch,
internal/godebugs, internal/godebugs,
internal/goexperiment, internal/goexperiment,
internal/goos internal/goos,
internal/profilerecord
< internal/bytealg < internal/bytealg
< internal/stringslite < internal/stringslite
< internal/itoa < internal/itoa

View File

@ -0,0 +1,28 @@
// Copyright 2024 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Package profilerecord holds internal types used to represent profiling
// records with deep stack traces.
//
// TODO: Consider moving this to internal/runtime, see golang.org/issue/65355.
package profilerecord
type StackRecord struct {
Stack []uintptr
}
type MemProfileRecord struct {
AllocBytes, FreeBytes int64
AllocObjects, FreeObjects int64
Stack []uintptr
}
func (r *MemProfileRecord) InUseBytes() int64 { return r.AllocBytes - r.FreeBytes }
func (r *MemProfileRecord) InUseObjects() int64 { return r.AllocObjects - r.FreeObjects }
type BlockProfileRecord struct {
Count int64
Cycles int64
Stack []uintptr
}

View File

@ -209,8 +209,8 @@ func CPUProfile() []byte {
panic("CPUProfile no longer available") panic("CPUProfile no longer available")
} }
//go:linkname runtime_pprof_runtime_cyclesPerSecond runtime/pprof.runtime_cyclesPerSecond //go:linkname pprof_cyclesPerSecond
func runtime_pprof_runtime_cyclesPerSecond() int64 { func pprof_cyclesPerSecond() int64 {
return ticksPerSecond() return ticksPerSecond()
} }

View File

@ -9,6 +9,7 @@ package runtime
import ( import (
"internal/abi" "internal/abi"
"internal/profilerecord"
"internal/runtime/atomic" "internal/runtime/atomic"
"runtime/internal/sys" "runtime/internal/sys"
"unsafe" "unsafe"
@ -56,7 +57,7 @@ const (
// includes inlined frames. We may record more than this many // includes inlined frames. We may record more than this many
// "physical" frames when using frame pointer unwinding to account // "physical" frames when using frame pointer unwinding to account
// for deferred handling of skipping frames & inline expansion. // for deferred handling of skipping frames & inline expansion.
maxLogicalStack = 32 maxLogicalStack = 128
// maxSkip is to account for deferred inline expansion // maxSkip is to account for deferred inline expansion
// when using frame pointer unwinding. We record the stack // when using frame pointer unwinding. We record the stack
// with "physical" frame pointers but handle skipping "logical" // with "physical" frame pointers but handle skipping "logical"
@ -445,7 +446,16 @@ func mProf_PostSweep() {
// Called by malloc to record a profiled block. // Called by malloc to record a profiled block.
func mProf_Malloc(mp *m, p unsafe.Pointer, size uintptr) { func mProf_Malloc(mp *m, p unsafe.Pointer, size uintptr) {
nstk := callers(4, mp.profStack) if mp.profStack == nil {
// mp.profStack is nil if we happen to sample an allocation during the
// initialization of mp. This case is rare, so we just ignore such
// allocations. Change MemProfileRate to 1 if you need to reproduce such
// cases for testing purposes.
return
}
// Only use the part of mp.profStack we need and ignore the extra space
// reserved for delayed inline expansion with frame pointer unwinding.
nstk := callers(4, mp.profStack[:maxLogicalStack])
index := (mProfCycle.read() + 2) % uint32(len(memRecord{}.future)) index := (mProfCycle.read() + 2) % uint32(len(memRecord{}.future))
b := stkbucket(memProfile, size, mp.profStack[:nstk], true) b := stkbucket(memProfile, size, mp.profStack[:nstk], true)
@ -536,7 +546,6 @@ func saveblockevent(cycles, rate int64, skip int, which bucketType) {
print("requested skip=", skip) print("requested skip=", skip)
throw("invalid skip value") throw("invalid skip value")
} }
gp := getg() gp := getg()
mp := acquirem() // we must not be preempted while accessing profstack mp := acquirem() // we must not be preempted while accessing profstack
nstk := 1 nstk := 1
@ -937,6 +946,16 @@ func (r *MemProfileRecord) Stack() []uintptr {
// the testing package's -test.memprofile flag instead // the testing package's -test.memprofile flag instead
// of calling MemProfile directly. // of calling MemProfile directly.
func MemProfile(p []MemProfileRecord, inuseZero bool) (n int, ok bool) { func MemProfile(p []MemProfileRecord, inuseZero bool) (n int, ok bool) {
return memProfileInternal(len(p), inuseZero, func(r profilerecord.MemProfileRecord) {
copyMemProfileRecord(&p[0], r)
p = p[1:]
})
}
// memProfileInternal returns the number of records n in the profile. If there
// are less than size records, copyFn is invoked for each record, and ok returns
// true.
func memProfileInternal(size int, inuseZero bool, copyFn func(profilerecord.MemProfileRecord)) (n int, ok bool) {
cycle := mProfCycle.read() cycle := mProfCycle.read()
// If we're between mProf_NextCycle and mProf_Flush, take care // If we're between mProf_NextCycle and mProf_Flush, take care
// of flushing to the active profile so we only have to look // of flushing to the active profile so we only have to look
@ -976,14 +995,19 @@ func MemProfile(p []MemProfileRecord, inuseZero bool) (n int, ok bool) {
} }
} }
} }
if n <= len(p) { if n <= size {
ok = true ok = true
idx := 0
for b := head; b != nil; b = b.allnext { for b := head; b != nil; b = b.allnext {
mp := b.mp() mp := b.mp()
if inuseZero || mp.active.alloc_bytes != mp.active.free_bytes { if inuseZero || mp.active.alloc_bytes != mp.active.free_bytes {
record(&p[idx], b) r := profilerecord.MemProfileRecord{
idx++ AllocBytes: int64(mp.active.alloc_bytes),
FreeBytes: int64(mp.active.free_bytes),
AllocObjects: int64(mp.active.allocs),
FreeObjects: int64(mp.active.frees),
Stack: b.stk(),
}
copyFn(r)
} }
} }
} }
@ -991,24 +1015,30 @@ func MemProfile(p []MemProfileRecord, inuseZero bool) (n int, ok bool) {
return return
} }
// Write b's data to r. func copyMemProfileRecord(dst *MemProfileRecord, src profilerecord.MemProfileRecord) {
func record(r *MemProfileRecord, b *bucket) { dst.AllocBytes = src.AllocBytes
mp := b.mp() dst.FreeBytes = src.FreeBytes
r.AllocBytes = int64(mp.active.alloc_bytes) dst.AllocObjects = src.AllocObjects
r.FreeBytes = int64(mp.active.free_bytes) dst.FreeObjects = src.FreeObjects
r.AllocObjects = int64(mp.active.allocs)
r.FreeObjects = int64(mp.active.frees)
if raceenabled { if raceenabled {
racewriterangepc(unsafe.Pointer(&r.Stack0[0]), unsafe.Sizeof(r.Stack0), getcallerpc(), abi.FuncPCABIInternal(MemProfile)) racewriterangepc(unsafe.Pointer(&dst.Stack0[0]), unsafe.Sizeof(dst.Stack0), getcallerpc(), abi.FuncPCABIInternal(MemProfile))
} }
if msanenabled { if msanenabled {
msanwrite(unsafe.Pointer(&r.Stack0[0]), unsafe.Sizeof(r.Stack0)) msanwrite(unsafe.Pointer(&dst.Stack0[0]), unsafe.Sizeof(dst.Stack0))
} }
if asanenabled { if asanenabled {
asanwrite(unsafe.Pointer(&r.Stack0[0]), unsafe.Sizeof(r.Stack0)) asanwrite(unsafe.Pointer(&dst.Stack0[0]), unsafe.Sizeof(dst.Stack0))
} }
i := copy(r.Stack0[:], b.stk()) i := copy(dst.Stack0[:], src.Stack)
clear(r.Stack0[i:]) clear(dst.Stack0[i:])
}
//go:linkname pprof_memProfileInternal
func pprof_memProfileInternal(p []profilerecord.MemProfileRecord, inuseZero bool) (n int, ok bool) {
return memProfileInternal(len(p), inuseZero, func(r profilerecord.MemProfileRecord) {
p[0] = r
p = p[1:]
})
} }
func iterate_memprof(fn func(*bucket, uintptr, *uintptr, uintptr, uintptr, uintptr)) { func iterate_memprof(fn func(*bucket, uintptr, *uintptr, uintptr, uintptr, uintptr)) {
@ -1037,41 +1067,66 @@ type BlockProfileRecord struct {
// the [testing] package's -test.blockprofile flag instead // the [testing] package's -test.blockprofile flag instead
// of calling BlockProfile directly. // of calling BlockProfile directly.
func BlockProfile(p []BlockProfileRecord) (n int, ok bool) { func BlockProfile(p []BlockProfileRecord) (n int, ok bool) {
return blockProfileInternal(len(p), func(r profilerecord.BlockProfileRecord) {
copyBlockProfileRecord(&p[0], r)
p = p[1:]
})
}
// blockProfileInternal returns the number of records n in the profile. If there
// are less than size records, copyFn is invoked for each record, and ok returns
// true.
func blockProfileInternal(size int, copyFn func(profilerecord.BlockProfileRecord)) (n int, ok bool) {
lock(&profBlockLock) lock(&profBlockLock)
head := (*bucket)(bbuckets.Load()) head := (*bucket)(bbuckets.Load())
for b := head; b != nil; b = b.allnext { for b := head; b != nil; b = b.allnext {
n++ n++
} }
if n <= len(p) { if n <= size {
ok = true ok = true
for b := head; b != nil; b = b.allnext { for b := head; b != nil; b = b.allnext {
bp := b.bp() bp := b.bp()
r := &p[0] r := profilerecord.BlockProfileRecord{
r.Count = int64(bp.count) Count: int64(bp.count),
Cycles: bp.cycles,
Stack: b.stk(),
}
// Prevent callers from having to worry about division by zero errors. // Prevent callers from having to worry about division by zero errors.
// See discussion on http://golang.org/cl/299991. // See discussion on http://golang.org/cl/299991.
if r.Count == 0 { if r.Count == 0 {
r.Count = 1 r.Count = 1
} }
r.Cycles = bp.cycles copyFn(r)
if raceenabled {
racewriterangepc(unsafe.Pointer(&r.Stack0[0]), unsafe.Sizeof(r.Stack0), getcallerpc(), abi.FuncPCABIInternal(BlockProfile))
}
if msanenabled {
msanwrite(unsafe.Pointer(&r.Stack0[0]), unsafe.Sizeof(r.Stack0))
}
if asanenabled {
asanwrite(unsafe.Pointer(&r.Stack0[0]), unsafe.Sizeof(r.Stack0))
}
i := fpunwindExpand(r.Stack0[:], b.stk())
clear(r.Stack0[i:])
p = p[1:]
} }
} }
unlock(&profBlockLock) unlock(&profBlockLock)
return return
} }
func copyBlockProfileRecord(dst *BlockProfileRecord, src profilerecord.BlockProfileRecord) {
dst.Count = src.Count
dst.Cycles = src.Cycles
if raceenabled {
racewriterangepc(unsafe.Pointer(&dst.Stack0[0]), unsafe.Sizeof(dst.Stack0), getcallerpc(), abi.FuncPCABIInternal(BlockProfile))
}
if msanenabled {
msanwrite(unsafe.Pointer(&dst.Stack0[0]), unsafe.Sizeof(dst.Stack0))
}
if asanenabled {
asanwrite(unsafe.Pointer(&dst.Stack0[0]), unsafe.Sizeof(dst.Stack0))
}
i := fpunwindExpand(dst.Stack0[:], src.Stack)
clear(dst.Stack0[i:])
}
//go:linkname pprof_blockProfileInternal
func pprof_blockProfileInternal(p []profilerecord.BlockProfileRecord) (n int, ok bool) {
return blockProfileInternal(len(p), func(r profilerecord.BlockProfileRecord) {
p[0] = r
p = p[1:]
})
}
// MutexProfile returns n, the number of records in the current mutex profile. // MutexProfile returns n, the number of records in the current mutex profile.
// If len(p) >= n, MutexProfile copies the profile into p and returns n, true. // If len(p) >= n, MutexProfile copies the profile into p and returns n, true.
// Otherwise, MutexProfile does not change p, and returns n, false. // Otherwise, MutexProfile does not change p, and returns n, false.
@ -1079,27 +1134,45 @@ func BlockProfile(p []BlockProfileRecord) (n int, ok bool) {
// Most clients should use the [runtime/pprof] package // Most clients should use the [runtime/pprof] package
// instead of calling MutexProfile directly. // instead of calling MutexProfile directly.
func MutexProfile(p []BlockProfileRecord) (n int, ok bool) { func MutexProfile(p []BlockProfileRecord) (n int, ok bool) {
return mutexProfileInternal(len(p), func(r profilerecord.BlockProfileRecord) {
copyBlockProfileRecord(&p[0], r)
p = p[1:]
})
}
// mutexProfileInternal returns the number of records n in the profile. If there
// are less than size records, copyFn is invoked for each record, and ok returns
// true.
func mutexProfileInternal(size int, copyFn func(profilerecord.BlockProfileRecord)) (n int, ok bool) {
lock(&profBlockLock) lock(&profBlockLock)
head := (*bucket)(xbuckets.Load()) head := (*bucket)(xbuckets.Load())
for b := head; b != nil; b = b.allnext { for b := head; b != nil; b = b.allnext {
n++ n++
} }
if n <= len(p) { if n <= size {
ok = true ok = true
for b := head; b != nil; b = b.allnext { for b := head; b != nil; b = b.allnext {
bp := b.bp() bp := b.bp()
r := &p[0] r := profilerecord.BlockProfileRecord{
r.Count = int64(bp.count) Count: int64(bp.count),
r.Cycles = bp.cycles Cycles: bp.cycles,
i := fpunwindExpand(r.Stack0[:], b.stk()) Stack: b.stk(),
clear(r.Stack0[i:]) }
p = p[1:] copyFn(r)
} }
} }
unlock(&profBlockLock) unlock(&profBlockLock)
return return
} }
//go:linkname pprof_mutexProfileInternal
func pprof_mutexProfileInternal(p []profilerecord.BlockProfileRecord) (n int, ok bool) {
return mutexProfileInternal(len(p), func(r profilerecord.BlockProfileRecord) {
p[0] = r
p = p[1:]
})
}
// ThreadCreateProfile returns n, the number of records in the thread creation profile. // ThreadCreateProfile returns n, the number of records in the thread creation profile.
// If len(p) >= n, ThreadCreateProfile copies the profile into p and returns n, true. // If len(p) >= n, ThreadCreateProfile copies the profile into p and returns n, true.
// If len(p) < n, ThreadCreateProfile does not change p and returns n, false. // If len(p) < n, ThreadCreateProfile does not change p and returns n, false.
@ -1107,28 +1180,45 @@ func MutexProfile(p []BlockProfileRecord) (n int, ok bool) {
// Most clients should use the runtime/pprof package instead // Most clients should use the runtime/pprof package instead
// of calling ThreadCreateProfile directly. // of calling ThreadCreateProfile directly.
func ThreadCreateProfile(p []StackRecord) (n int, ok bool) { func ThreadCreateProfile(p []StackRecord) (n int, ok bool) {
return threadCreateProfileInternal(len(p), func(r profilerecord.StackRecord) {
copy(p[0].Stack0[:], r.Stack)
p = p[1:]
})
}
// threadCreateProfileInternal returns the number of records n in the profile.
// If there are less than size records, copyFn is invoked for each record, and
// ok returns true.
func threadCreateProfileInternal(size int, copyFn func(profilerecord.StackRecord)) (n int, ok bool) {
first := (*m)(atomic.Loadp(unsafe.Pointer(&allm))) first := (*m)(atomic.Loadp(unsafe.Pointer(&allm)))
for mp := first; mp != nil; mp = mp.alllink { for mp := first; mp != nil; mp = mp.alllink {
n++ n++
} }
if n <= len(p) { if n <= size {
ok = true ok = true
i := 0
for mp := first; mp != nil; mp = mp.alllink { for mp := first; mp != nil; mp = mp.alllink {
p[i].Stack0 = mp.createstack r := profilerecord.StackRecord{Stack: mp.createstack[:]}
i++ copyFn(r)
} }
} }
return return
} }
//go:linkname runtime_goroutineProfileWithLabels runtime/pprof.runtime_goroutineProfileWithLabels //go:linkname pprof_threadCreateInternal
func runtime_goroutineProfileWithLabels(p []StackRecord, labels []unsafe.Pointer) (n int, ok bool) { func pprof_threadCreateInternal(p []profilerecord.StackRecord) (n int, ok bool) {
return threadCreateProfileInternal(len(p), func(r profilerecord.StackRecord) {
p[0] = r
p = p[1:]
})
}
//go:linkname pprof_goroutineProfileWithLabels
func pprof_goroutineProfileWithLabels(p []profilerecord.StackRecord, labels []unsafe.Pointer) (n int, ok bool) {
return goroutineProfileWithLabels(p, labels) return goroutineProfileWithLabels(p, labels)
} }
// labels may be nil. If labels is non-nil, it must have the same length as p. // labels may be nil. If labels is non-nil, it must have the same length as p.
func goroutineProfileWithLabels(p []StackRecord, labels []unsafe.Pointer) (n int, ok bool) { func goroutineProfileWithLabels(p []profilerecord.StackRecord, labels []unsafe.Pointer) (n int, ok bool) {
if labels != nil && len(labels) != len(p) { if labels != nil && len(labels) != len(p) {
labels = nil labels = nil
} }
@ -1140,7 +1230,7 @@ var goroutineProfile = struct {
sema uint32 sema uint32
active bool active bool
offset atomic.Int64 offset atomic.Int64
records []StackRecord records []profilerecord.StackRecord
labels []unsafe.Pointer labels []unsafe.Pointer
}{ }{
sema: 1, sema: 1,
@ -1179,7 +1269,7 @@ func (p *goroutineProfileStateHolder) CompareAndSwap(old, new goroutineProfileSt
return (*atomic.Uint32)(p).CompareAndSwap(uint32(old), uint32(new)) return (*atomic.Uint32)(p).CompareAndSwap(uint32(old), uint32(new))
} }
func goroutineProfileWithLabelsConcurrent(p []StackRecord, labels []unsafe.Pointer) (n int, ok bool) { func goroutineProfileWithLabelsConcurrent(p []profilerecord.StackRecord, labels []unsafe.Pointer) (n int, ok bool) {
if len(p) == 0 { if len(p) == 0 {
// An empty slice is obviously too small. Return a rough // An empty slice is obviously too small. Return a rough
// allocation estimate without bothering to STW. As long as // allocation estimate without bothering to STW. As long as
@ -1192,6 +1282,7 @@ func goroutineProfileWithLabelsConcurrent(p []StackRecord, labels []unsafe.Point
ourg := getg() ourg := getg()
pcbuf := makeProfStack() // see saveg() for explanation
stw := stopTheWorld(stwGoroutineProfile) stw := stopTheWorld(stwGoroutineProfile)
// Using gcount while the world is stopped should give us a consistent view // Using gcount while the world is stopped should give us a consistent view
// of the number of live goroutines, minus the number of goroutines that are // of the number of live goroutines, minus the number of goroutines that are
@ -1218,7 +1309,7 @@ func goroutineProfileWithLabelsConcurrent(p []StackRecord, labels []unsafe.Point
sp := getcallersp() sp := getcallersp()
pc := getcallerpc() pc := getcallerpc()
systemstack(func() { systemstack(func() {
saveg(pc, sp, ourg, &p[0]) saveg(pc, sp, ourg, &p[0], pcbuf)
}) })
if labels != nil { if labels != nil {
labels[0] = ourg.labels labels[0] = ourg.labels
@ -1240,7 +1331,7 @@ func goroutineProfileWithLabelsConcurrent(p []StackRecord, labels []unsafe.Point
if fing != nil { if fing != nil {
fing.goroutineProfiled.Store(goroutineProfileSatisfied) fing.goroutineProfiled.Store(goroutineProfileSatisfied)
if readgstatus(fing) != _Gdead && !isSystemGoroutine(fing, false) { if readgstatus(fing) != _Gdead && !isSystemGoroutine(fing, false) {
doRecordGoroutineProfile(fing) doRecordGoroutineProfile(fing, pcbuf)
} }
} }
startTheWorld(stw) startTheWorld(stw)
@ -1257,7 +1348,7 @@ func goroutineProfileWithLabelsConcurrent(p []StackRecord, labels []unsafe.Point
// call will start by adding itself to the profile (before the act of // call will start by adding itself to the profile (before the act of
// executing can cause any changes in its stack). // executing can cause any changes in its stack).
forEachGRace(func(gp1 *g) { forEachGRace(func(gp1 *g) {
tryRecordGoroutineProfile(gp1, Gosched) tryRecordGoroutineProfile(gp1, pcbuf, Gosched)
}) })
stw = stopTheWorld(stwGoroutineProfileCleanup) stw = stopTheWorld(stwGoroutineProfileCleanup)
@ -1301,13 +1392,13 @@ func tryRecordGoroutineProfileWB(gp1 *g) {
if getg().m.p.ptr() == nil { if getg().m.p.ptr() == nil {
throw("no P available, write barriers are forbidden") throw("no P available, write barriers are forbidden")
} }
tryRecordGoroutineProfile(gp1, osyield) tryRecordGoroutineProfile(gp1, nil, osyield)
} }
// tryRecordGoroutineProfile ensures that gp1 has the appropriate representation // tryRecordGoroutineProfile ensures that gp1 has the appropriate representation
// in the current goroutine profile: either that it should not be profiled, or // in the current goroutine profile: either that it should not be profiled, or
// that a snapshot of its call stack and labels are now in the profile. // that a snapshot of its call stack and labels are now in the profile.
func tryRecordGoroutineProfile(gp1 *g, yield func()) { func tryRecordGoroutineProfile(gp1 *g, pcbuf []uintptr, yield func()) {
if readgstatus(gp1) == _Gdead { if readgstatus(gp1) == _Gdead {
// Dead goroutines should not appear in the profile. Goroutines that // Dead goroutines should not appear in the profile. Goroutines that
// start while profile collection is active will get goroutineProfiled // start while profile collection is active will get goroutineProfiled
@ -1342,7 +1433,7 @@ func tryRecordGoroutineProfile(gp1 *g, yield func()) {
// in this limbo. // in this limbo.
mp := acquirem() mp := acquirem()
if gp1.goroutineProfiled.CompareAndSwap(goroutineProfileAbsent, goroutineProfileInProgress) { if gp1.goroutineProfiled.CompareAndSwap(goroutineProfileAbsent, goroutineProfileInProgress) {
doRecordGoroutineProfile(gp1) doRecordGoroutineProfile(gp1, pcbuf)
gp1.goroutineProfiled.Store(goroutineProfileSatisfied) gp1.goroutineProfiled.Store(goroutineProfileSatisfied)
} }
releasem(mp) releasem(mp)
@ -1356,7 +1447,7 @@ func tryRecordGoroutineProfile(gp1 *g, yield func()) {
// goroutine that is coordinating the goroutine profile (running on its own // goroutine that is coordinating the goroutine profile (running on its own
// stack), or from the scheduler in preparation to execute gp1 (running on the // stack), or from the scheduler in preparation to execute gp1 (running on the
// system stack). // system stack).
func doRecordGoroutineProfile(gp1 *g) { func doRecordGoroutineProfile(gp1 *g, pcbuf []uintptr) {
if readgstatus(gp1) == _Grunning { if readgstatus(gp1) == _Grunning {
print("doRecordGoroutineProfile gp1=", gp1.goid, "\n") print("doRecordGoroutineProfile gp1=", gp1.goid, "\n")
throw("cannot read stack of running goroutine") throw("cannot read stack of running goroutine")
@ -1379,14 +1470,14 @@ func doRecordGoroutineProfile(gp1 *g) {
// set gp1.goroutineProfiled to goroutineProfileInProgress and so are still // set gp1.goroutineProfiled to goroutineProfileInProgress and so are still
// preventing it from being truly _Grunnable. So we'll use the system stack // preventing it from being truly _Grunnable. So we'll use the system stack
// to avoid schedule delays. // to avoid schedule delays.
systemstack(func() { saveg(^uintptr(0), ^uintptr(0), gp1, &goroutineProfile.records[offset]) }) systemstack(func() { saveg(^uintptr(0), ^uintptr(0), gp1, &goroutineProfile.records[offset], pcbuf) })
if goroutineProfile.labels != nil { if goroutineProfile.labels != nil {
goroutineProfile.labels[offset] = gp1.labels goroutineProfile.labels[offset] = gp1.labels
} }
} }
func goroutineProfileWithLabelsSync(p []StackRecord, labels []unsafe.Pointer) (n int, ok bool) { func goroutineProfileWithLabelsSync(p []profilerecord.StackRecord, labels []unsafe.Pointer) (n int, ok bool) {
gp := getg() gp := getg()
isOK := func(gp1 *g) bool { isOK := func(gp1 *g) bool {
@ -1395,6 +1486,7 @@ func goroutineProfileWithLabelsSync(p []StackRecord, labels []unsafe.Pointer) (n
return gp1 != gp && readgstatus(gp1) != _Gdead && !isSystemGoroutine(gp1, false) return gp1 != gp && readgstatus(gp1) != _Gdead && !isSystemGoroutine(gp1, false)
} }
pcbuf := makeProfStack() // see saveg() for explanation
stw := stopTheWorld(stwGoroutineProfile) stw := stopTheWorld(stwGoroutineProfile)
// World is stopped, no locking required. // World is stopped, no locking required.
@ -1413,7 +1505,7 @@ func goroutineProfileWithLabelsSync(p []StackRecord, labels []unsafe.Pointer) (n
sp := getcallersp() sp := getcallersp()
pc := getcallerpc() pc := getcallerpc()
systemstack(func() { systemstack(func() {
saveg(pc, sp, gp, &r[0]) saveg(pc, sp, gp, &r[0], pcbuf)
}) })
r = r[1:] r = r[1:]
@ -1438,7 +1530,7 @@ func goroutineProfileWithLabelsSync(p []StackRecord, labels []unsafe.Pointer) (n
// The world is stopped, so it cannot use cgocall (which will be // The world is stopped, so it cannot use cgocall (which will be
// blocked at exitsyscall). Do it on the system stack so it won't // blocked at exitsyscall). Do it on the system stack so it won't
// call into the schedular (see traceback.go:cgoContextPCs). // call into the schedular (see traceback.go:cgoContextPCs).
systemstack(func() { saveg(^uintptr(0), ^uintptr(0), gp1, &r[0]) }) systemstack(func() { saveg(^uintptr(0), ^uintptr(0), gp1, &r[0], pcbuf) })
if labels != nil { if labels != nil {
lbl[0] = gp1.labels lbl[0] = gp1.labels
lbl = lbl[1:] lbl = lbl[1:]
@ -1462,17 +1554,41 @@ func goroutineProfileWithLabelsSync(p []StackRecord, labels []unsafe.Pointer) (n
// Most clients should use the [runtime/pprof] package instead // Most clients should use the [runtime/pprof] package instead
// of calling GoroutineProfile directly. // of calling GoroutineProfile directly.
func GoroutineProfile(p []StackRecord) (n int, ok bool) { func GoroutineProfile(p []StackRecord) (n int, ok bool) {
records := make([]profilerecord.StackRecord, len(p))
n, ok = goroutineProfileInternal(records)
if !ok {
return
}
for i, mr := range records[0:n] {
copy(p[i].Stack0[:], mr.Stack)
}
return
}
func goroutineProfileInternal(p []profilerecord.StackRecord) (n int, ok bool) {
return goroutineProfileWithLabels(p, nil) return goroutineProfileWithLabels(p, nil)
} }
func saveg(pc, sp uintptr, gp *g, r *StackRecord) { func saveg(pc, sp uintptr, gp *g, r *profilerecord.StackRecord, pcbuf []uintptr) {
// To reduce memory usage, we want to allocate a r.Stack that is just big
// enough to hold gp's stack trace. Naively we might achieve this by
// recording our stack trace into mp.profStack, and then allocating a
// r.Stack of the right size. However, mp.profStack is also used for
// allocation profiling, so it could get overwritten if the slice allocation
// gets profiled. So instead we record the stack trace into a temporary
// pcbuf which is usually given to us by our caller. When it's not, we have
// to allocate one here. This will only happen for goroutines that were in a
// syscall when the goroutine profile started or for goroutines that manage
// to execute before we finish iterating over all the goroutines.
if pcbuf == nil {
pcbuf = makeProfStack()
}
var u unwinder var u unwinder
u.initAt(pc, sp, 0, gp, unwindSilentErrors) u.initAt(pc, sp, 0, gp, unwindSilentErrors)
n := tracebackPCs(&u, 0, r.Stack0[:]) n := tracebackPCs(&u, 0, pcbuf)
if n < len(r.Stack0) { r.Stack = make([]uintptr, n)
r.Stack0[n] = 0 copy(r.Stack, pcbuf)
}
} }
// Stack formats a stack trace of the calling goroutine into buf // Stack formats a stack trace of the calling goroutine into buf

View File

@ -76,6 +76,7 @@ import (
"bufio" "bufio"
"fmt" "fmt"
"internal/abi" "internal/abi"
"internal/profilerecord"
"io" "io"
"runtime" "runtime"
"sort" "sort"
@ -411,7 +412,7 @@ type countProfile interface {
// as the pprof-proto format output. Translations from cycle count to time duration // as the pprof-proto format output. Translations from cycle count to time duration
// are done because The proto expects count and time (nanoseconds) instead of count // are done because The proto expects count and time (nanoseconds) instead of count
// and the number of cycles for block, contention profiles. // and the number of cycles for block, contention profiles.
func printCountCycleProfile(w io.Writer, countName, cycleName string, records []runtime.BlockProfileRecord) error { func printCountCycleProfile(w io.Writer, countName, cycleName string, records []profilerecord.BlockProfileRecord) error {
// Output profile in protobuf form. // Output profile in protobuf form.
b := newProfileBuilder(w) b := newProfileBuilder(w)
b.pbValueType(tagProfile_PeriodType, countName, "count") b.pbValueType(tagProfile_PeriodType, countName, "count")
@ -419,16 +420,18 @@ func printCountCycleProfile(w io.Writer, countName, cycleName string, records []
b.pbValueType(tagProfile_SampleType, countName, "count") b.pbValueType(tagProfile_SampleType, countName, "count")
b.pbValueType(tagProfile_SampleType, cycleName, "nanoseconds") b.pbValueType(tagProfile_SampleType, cycleName, "nanoseconds")
cpuGHz := float64(runtime_cyclesPerSecond()) / 1e9 cpuGHz := float64(pprof_cyclesPerSecond()) / 1e9
values := []int64{0, 0} values := []int64{0, 0}
var locs []uint64 var locs []uint64
expandedStack := pprof_makeProfStack()
for _, r := range records { for _, r := range records {
values[0] = r.Count values[0] = r.Count
values[1] = int64(float64(r.Cycles) / cpuGHz) values[1] = int64(float64(r.Cycles) / cpuGHz)
// For count profiles, all stack addresses are // For count profiles, all stack addresses are
// return PCs, which is what appendLocsForStack expects. // return PCs, which is what appendLocsForStack expects.
locs = b.appendLocsForStack(locs[:0], r.Stack()) n := pprof_fpunwindExpand(expandedStack[:], r.Stack)
locs = b.appendLocsForStack(locs[:0], expandedStack[:n])
b.pbSample(values, locs, nil) b.pbSample(values, locs, nil)
} }
b.build() b.build()
@ -593,14 +596,14 @@ func writeHeapInternal(w io.Writer, debug int, defaultSampleType string) error {
// the two calls—so allocate a few extra records for safety // the two calls—so allocate a few extra records for safety
// and also try again if we're very unlucky. // and also try again if we're very unlucky.
// The loop should only execute one iteration in the common case. // The loop should only execute one iteration in the common case.
var p []runtime.MemProfileRecord var p []profilerecord.MemProfileRecord
n, ok := runtime.MemProfile(nil, true) n, ok := pprof_memProfileInternal(nil, true)
for { for {
// Allocate room for a slightly bigger profile, // Allocate room for a slightly bigger profile,
// in case a few more entries have been added // in case a few more entries have been added
// since the call to MemProfile. // since the call to MemProfile.
p = make([]runtime.MemProfileRecord, n+50) p = make([]profilerecord.MemProfileRecord, n+50)
n, ok = runtime.MemProfile(p, true) n, ok = pprof_memProfileInternal(p, true)
if ok { if ok {
p = p[0:n] p = p[0:n]
break break
@ -654,11 +657,11 @@ func writeHeapInternal(w io.Writer, debug int, defaultSampleType string) error {
fmt.Fprintf(w, "%d: %d [%d: %d] @", fmt.Fprintf(w, "%d: %d [%d: %d] @",
r.InUseObjects(), r.InUseBytes(), r.InUseObjects(), r.InUseBytes(),
r.AllocObjects, r.AllocBytes) r.AllocObjects, r.AllocBytes)
for _, pc := range r.Stack() { for _, pc := range r.Stack {
fmt.Fprintf(w, " %#x", pc) fmt.Fprintf(w, " %#x", pc)
} }
fmt.Fprintf(w, "\n") fmt.Fprintf(w, "\n")
printStackRecord(w, r.Stack(), false) printStackRecord(w, r.Stack, false)
} }
// Print memstats information too. // Print memstats information too.
@ -713,8 +716,8 @@ func writeThreadCreate(w io.Writer, debug int) error {
// Until https://golang.org/issues/6104 is addressed, wrap // Until https://golang.org/issues/6104 is addressed, wrap
// ThreadCreateProfile because there's no point in tracking labels when we // ThreadCreateProfile because there's no point in tracking labels when we
// don't get any stack-traces. // don't get any stack-traces.
return writeRuntimeProfile(w, debug, "threadcreate", func(p []runtime.StackRecord, _ []unsafe.Pointer) (n int, ok bool) { return writeRuntimeProfile(w, debug, "threadcreate", func(p []profilerecord.StackRecord, _ []unsafe.Pointer) (n int, ok bool) {
return runtime.ThreadCreateProfile(p) return pprof_threadCreateInternal(p)
}) })
} }
@ -723,15 +726,12 @@ func countGoroutine() int {
return runtime.NumGoroutine() return runtime.NumGoroutine()
} }
// runtime_goroutineProfileWithLabels is defined in runtime/mprof.go
func runtime_goroutineProfileWithLabels(p []runtime.StackRecord, labels []unsafe.Pointer) (n int, ok bool)
// writeGoroutine writes the current runtime GoroutineProfile to w. // writeGoroutine writes the current runtime GoroutineProfile to w.
func writeGoroutine(w io.Writer, debug int) error { func writeGoroutine(w io.Writer, debug int) error {
if debug >= 2 { if debug >= 2 {
return writeGoroutineStacks(w) return writeGoroutineStacks(w)
} }
return writeRuntimeProfile(w, debug, "goroutine", runtime_goroutineProfileWithLabels) return writeRuntimeProfile(w, debug, "goroutine", pprof_goroutineProfileWithLabels)
} }
func writeGoroutineStacks(w io.Writer) error { func writeGoroutineStacks(w io.Writer) error {
@ -755,14 +755,14 @@ func writeGoroutineStacks(w io.Writer) error {
return err return err
} }
func writeRuntimeProfile(w io.Writer, debug int, name string, fetch func([]runtime.StackRecord, []unsafe.Pointer) (int, bool)) error { func writeRuntimeProfile(w io.Writer, debug int, name string, fetch func([]profilerecord.StackRecord, []unsafe.Pointer) (int, bool)) error {
// Find out how many records there are (fetch(nil)), // Find out how many records there are (fetch(nil)),
// allocate that many records, and get the data. // allocate that many records, and get the data.
// There's a race—more records might be added between // There's a race—more records might be added between
// the two calls—so allocate a few extra records for safety // the two calls—so allocate a few extra records for safety
// and also try again if we're very unlucky. // and also try again if we're very unlucky.
// The loop should only execute one iteration in the common case. // The loop should only execute one iteration in the common case.
var p []runtime.StackRecord var p []profilerecord.StackRecord
var labels []unsafe.Pointer var labels []unsafe.Pointer
n, ok := fetch(nil, nil) n, ok := fetch(nil, nil)
@ -770,7 +770,7 @@ func writeRuntimeProfile(w io.Writer, debug int, name string, fetch func([]runti
// Allocate room for a slightly bigger profile, // Allocate room for a slightly bigger profile,
// in case a few more entries have been added // in case a few more entries have been added
// since the call to ThreadProfile. // since the call to ThreadProfile.
p = make([]runtime.StackRecord, n+10) p = make([]profilerecord.StackRecord, n+10)
labels = make([]unsafe.Pointer, n+10) labels = make([]unsafe.Pointer, n+10)
n, ok = fetch(p, labels) n, ok = fetch(p, labels)
if ok { if ok {
@ -784,12 +784,12 @@ func writeRuntimeProfile(w io.Writer, debug int, name string, fetch func([]runti
} }
type runtimeProfile struct { type runtimeProfile struct {
stk []runtime.StackRecord stk []profilerecord.StackRecord
labels []unsafe.Pointer labels []unsafe.Pointer
} }
func (p *runtimeProfile) Len() int { return len(p.stk) } func (p *runtimeProfile) Len() int { return len(p.stk) }
func (p *runtimeProfile) Stack(i int) []uintptr { return p.stk[i].Stack() } func (p *runtimeProfile) Stack(i int) []uintptr { return p.stk[i].Stack }
func (p *runtimeProfile) Label(i int) *labelMap { return (*labelMap)(p.labels[i]) } func (p *runtimeProfile) Label(i int) *labelMap { return (*labelMap)(p.labels[i]) }
var cpu struct { var cpu struct {
@ -894,20 +894,20 @@ func countMutex() int {
// writeBlock writes the current blocking profile to w. // writeBlock writes the current blocking profile to w.
func writeBlock(w io.Writer, debug int) error { func writeBlock(w io.Writer, debug int) error {
return writeProfileInternal(w, debug, "contention", runtime.BlockProfile) return writeProfileInternal(w, debug, "contention", pprof_blockProfileInternal)
} }
// writeMutex writes the current mutex profile to w. // writeMutex writes the current mutex profile to w.
func writeMutex(w io.Writer, debug int) error { func writeMutex(w io.Writer, debug int) error {
return writeProfileInternal(w, debug, "mutex", runtime.MutexProfile) return writeProfileInternal(w, debug, "mutex", pprof_mutexProfileInternal)
} }
// writeProfileInternal writes the current blocking or mutex profile depending on the passed parameters. // writeProfileInternal writes the current blocking or mutex profile depending on the passed parameters.
func writeProfileInternal(w io.Writer, debug int, name string, runtimeProfile func([]runtime.BlockProfileRecord) (int, bool)) error { func writeProfileInternal(w io.Writer, debug int, name string, runtimeProfile func([]profilerecord.BlockProfileRecord) (int, bool)) error {
var p []runtime.BlockProfileRecord var p []profilerecord.BlockProfileRecord
n, ok := runtimeProfile(nil) n, ok := runtimeProfile(nil)
for { for {
p = make([]runtime.BlockProfileRecord, n+50) p = make([]profilerecord.BlockProfileRecord, n+50)
n, ok = runtimeProfile(p) n, ok = runtimeProfile(p)
if ok { if ok {
p = p[:n] p = p[:n]
@ -926,19 +926,22 @@ func writeProfileInternal(w io.Writer, debug int, name string, runtimeProfile fu
w = tw w = tw
fmt.Fprintf(w, "--- %v:\n", name) fmt.Fprintf(w, "--- %v:\n", name)
fmt.Fprintf(w, "cycles/second=%v\n", runtime_cyclesPerSecond()) fmt.Fprintf(w, "cycles/second=%v\n", pprof_cyclesPerSecond())
if name == "mutex" { if name == "mutex" {
fmt.Fprintf(w, "sampling period=%d\n", runtime.SetMutexProfileFraction(-1)) fmt.Fprintf(w, "sampling period=%d\n", runtime.SetMutexProfileFraction(-1))
} }
expandedStack := pprof_makeProfStack()
for i := range p { for i := range p {
r := &p[i] r := &p[i]
fmt.Fprintf(w, "%v %v @", r.Cycles, r.Count) fmt.Fprintf(w, "%v %v @", r.Cycles, r.Count)
for _, pc := range r.Stack() { n := pprof_fpunwindExpand(expandedStack, r.Stack)
stack := expandedStack[:n]
for _, pc := range stack {
fmt.Fprintf(w, " %#x", pc) fmt.Fprintf(w, " %#x", pc)
} }
fmt.Fprint(w, "\n") fmt.Fprint(w, "\n")
if debug > 0 { if debug > 0 {
printStackRecord(w, r.Stack(), true) printStackRecord(w, stack, true)
} }
} }
@ -948,4 +951,26 @@ func writeProfileInternal(w io.Writer, debug int, name string, runtimeProfile fu
return b.Flush() return b.Flush()
} }
func runtime_cyclesPerSecond() int64 //go:linkname pprof_goroutineProfileWithLabels runtime.pprof_goroutineProfileWithLabels
func pprof_goroutineProfileWithLabels(p []profilerecord.StackRecord, labels []unsafe.Pointer) (n int, ok bool)
//go:linkname pprof_cyclesPerSecond runtime.pprof_cyclesPerSecond
func pprof_cyclesPerSecond() int64
//go:linkname pprof_memProfileInternal runtime.pprof_memProfileInternal
func pprof_memProfileInternal(p []profilerecord.MemProfileRecord, inuseZero bool) (n int, ok bool)
//go:linkname pprof_blockProfileInternal runtime.pprof_blockProfileInternal
func pprof_blockProfileInternal(p []profilerecord.BlockProfileRecord) (n int, ok bool)
//go:linkname pprof_mutexProfileInternal runtime.pprof_mutexProfileInternal
func pprof_mutexProfileInternal(p []profilerecord.BlockProfileRecord) (n int, ok bool)
//go:linkname pprof_threadCreateInternal runtime.pprof_threadCreateInternal
func pprof_threadCreateInternal(p []profilerecord.StackRecord) (n int, ok bool)
//go:linkname pprof_fpunwindExpand runtime.pprof_fpunwindExpand
func pprof_fpunwindExpand(dst, src []uintptr) int
//go:linkname pprof_makeProfStack runtime.pprof_makeProfStack
func pprof_makeProfStack() []uintptr

View File

@ -2444,7 +2444,7 @@ func TestProfilerStackDepth(t *testing.T) {
runtime.SetMutexProfileFraction(oldMutexRate) runtime.SetMutexProfileFraction(oldMutexRate)
}) })
const depth = 32 const depth = 128
go produceProfileEvents(t, depth) go produceProfileEvents(t, depth)
awaitBlockedGoroutine(t, "chan receive", "goroutineDeep", 1) awaitBlockedGoroutine(t, "chan receive", "goroutineDeep", 1)

View File

@ -5,6 +5,7 @@
package pprof package pprof
import ( import (
"internal/profilerecord"
"io" "io"
"math" "math"
"runtime" "runtime"
@ -12,7 +13,7 @@ import (
) )
// writeHeapProto writes the current heap profile in protobuf format to w. // writeHeapProto writes the current heap profile in protobuf format to w.
func writeHeapProto(w io.Writer, p []runtime.MemProfileRecord, rate int64, defaultSampleType string) error { func writeHeapProto(w io.Writer, p []profilerecord.MemProfileRecord, rate int64, defaultSampleType string) error {
b := newProfileBuilder(w) b := newProfileBuilder(w)
b.pbValueType(tagProfile_PeriodType, "space", "bytes") b.pbValueType(tagProfile_PeriodType, "space", "bytes")
b.pb.int64Opt(tagProfile_Period, rate) b.pb.int64Opt(tagProfile_Period, rate)
@ -29,7 +30,7 @@ func writeHeapProto(w io.Writer, p []runtime.MemProfileRecord, rate int64, defau
for _, r := range p { for _, r := range p {
hideRuntime := true hideRuntime := true
for tries := 0; tries < 2; tries++ { for tries := 0; tries < 2; tries++ {
stk := r.Stack() stk := r.Stack
// For heap profiles, all stack // For heap profiles, all stack
// addresses are return PCs, which is // addresses are return PCs, which is
// what appendLocsForStack expects. // what appendLocsForStack expects.

View File

@ -8,6 +8,7 @@ import (
"bytes" "bytes"
"fmt" "fmt"
"internal/profile" "internal/profile"
"internal/profilerecord"
"internal/testenv" "internal/testenv"
"runtime" "runtime"
"slices" "slices"
@ -24,10 +25,10 @@ func TestConvertMemProfile(t *testing.T) {
// from these and get back to addr1 and addr2. // from these and get back to addr1 and addr2.
a1, a2 := uintptr(addr1)+1, uintptr(addr2)+1 a1, a2 := uintptr(addr1)+1, uintptr(addr2)+1
rate := int64(512 * 1024) rate := int64(512 * 1024)
rec := []runtime.MemProfileRecord{ rec := []profilerecord.MemProfileRecord{
{AllocBytes: 4096, FreeBytes: 1024, AllocObjects: 4, FreeObjects: 1, Stack0: [32]uintptr{a1, a2}}, {AllocBytes: 4096, FreeBytes: 1024, AllocObjects: 4, FreeObjects: 1, Stack: []uintptr{a1, a2}},
{AllocBytes: 512 * 1024, FreeBytes: 0, AllocObjects: 1, FreeObjects: 0, Stack0: [32]uintptr{a2 + 1, a2 + 2}}, {AllocBytes: 512 * 1024, FreeBytes: 0, AllocObjects: 1, FreeObjects: 0, Stack: []uintptr{a2 + 1, a2 + 2}},
{AllocBytes: 512 * 1024, FreeBytes: 512 * 1024, AllocObjects: 1, FreeObjects: 1, Stack0: [32]uintptr{a1 + 1, a1 + 2, a2 + 3}}, {AllocBytes: 512 * 1024, FreeBytes: 512 * 1024, AllocObjects: 1, FreeObjects: 1, Stack: []uintptr{a1 + 1, a1 + 2, a2 + 3}},
} }
periodType := &profile.ValueType{Type: "space", Unit: "bytes"} periodType := &profile.ValueType{Type: "space", Unit: "bytes"}

View File

@ -930,10 +930,30 @@ func mcommoninit(mp *m, id int64) {
// malloc and runtime locks for mLockProfile. // malloc and runtime locks for mLockProfile.
// TODO(mknyszek): Implement lazy allocation if this becomes a problem. // TODO(mknyszek): Implement lazy allocation if this becomes a problem.
func mProfStackInit(mp *m) { func mProfStackInit(mp *m) {
mp.profStack = make([]uintptr, maxStack) mp.profStack = makeProfStackFP()
mp.mLockProfile.stack = make([]uintptr, maxStack) mp.mLockProfile.stack = makeProfStackFP()
} }
// makeProfStackFP creates a buffer large enough to hold a maximum-sized stack
// trace as well as any additional frames needed for frame pointer unwinding
// with delayed inline expansion.
func makeProfStackFP() []uintptr {
// The "1" term is to account for the first stack entry being
// taken up by a "skip" sentinel value for profilers which
// defer inline frame expansion until the profile is reported.
// The "maxSkip" term is for frame pointer unwinding, where we
// want to end up with debug.profstackdebth frames but will discard
// some "physical" frames to account for skipping.
return make([]uintptr, 1+maxSkip+maxLogicalStack)
}
// makeProfStack returns a buffer large enough to hold a maximum-sized stack
// trace.
func makeProfStack() []uintptr { return make([]uintptr, maxLogicalStack) }
//go:linkname pprof_makeProfStack
func pprof_makeProfStack() []uintptr { return makeProfStack() }
func (mp *m) becomeSpinning() { func (mp *m) becomeSpinning() {
mp.spinning = true mp.spinning = true
sched.nmspinning.Add(1) sched.nmspinning.Add(1)
@ -3132,7 +3152,7 @@ func execute(gp *g, inheritTime bool) {
// Make sure that gp has had its stack written out to the goroutine // Make sure that gp has had its stack written out to the goroutine
// profile, exactly as it was when the goroutine profiler first stopped // profile, exactly as it was when the goroutine profiler first stopped
// the world. // the world.
tryRecordGoroutineProfile(gp, osyield) tryRecordGoroutineProfile(gp, nil, osyield)
} }
// Assign gp.m before entering _Grunning so running Gs have an // Assign gp.m before entering _Grunning so running Gs have an

View File

@ -262,6 +262,11 @@ func fpTracebackPCs(fp unsafe.Pointer, pcBuf []uintptr) (i int) {
return i return i
} }
//go:linkname pprof_fpunwindExpand
func pprof_fpunwindExpand(dst, src []uintptr) int {
return fpunwindExpand(dst, src)
}
// fpunwindExpand expands a call stack from pcBuf into dst, // fpunwindExpand expands a call stack from pcBuf into dst,
// returning the number of PCs written to dst. // returning the number of PCs written to dst.
// pcBuf and dst should not overlap. // pcBuf and dst should not overlap.