runtime/pprof: rerun magnitude test on failure

Restructure TestCPUProfileMultithreadMagnitude so it will run again with
a longer duration on failure. Log the split between the user vs system
CPU time that rusage reports.

For #50232

Change-Id: Ice5b38ee7594dbee1eaa5686d32b968c306e3e85
Reviewed-on: https://go-review.googlesource.com/c/go/+/393934
Run-TryBot: Rhys Hiltner <rhys@justin.tv>
TryBot-Result: Gopher Robot <gobot@golang.org>
Reviewed-by: Michael Pratt <mpratt@google.com>
Trust: Michael Knyszek <mknyszek@google.com>
This commit is contained in:
Rhys Hiltner 2022-03-18 11:36:39 -07:00 committed by Michael Pratt
parent ac3efc83e6
commit 212bda0669
2 changed files with 44 additions and 30 deletions

View File

@ -188,10 +188,43 @@ func TestCPUProfileMultithreadMagnitude(t *testing.T) {
t.Run(tc.name, func(t *testing.T) {
t.Logf("Running with %d workers", tc.workers)
var cpuTime time.Duration
var userTime, systemTime time.Duration
matches := matchAndAvoidStacks(stackContains, []string{"runtime/pprof.cpuHog1"}, avoidFunctions())
p := testCPUProfile(t, matches, func(dur time.Duration) {
cpuTime = diffCPUTime(t, func() {
acceptProfile := func(t *testing.T, p *profile.Profile) bool {
if !matches(t, p) {
return false
}
ok := true
for i, unit := range []string{"count", "nanoseconds"} {
if have, want := p.SampleType[i].Unit, unit; have != want {
t.Logf("pN SampleType[%d]; %q != %q", i, have, want)
ok = false
}
}
// cpuHog1 called below is the primary source of CPU
// load, but there may be some background work by the
// runtime. Since the OS rusage measurement will
// include all work done by the process, also compare
// against all samples in our profile.
var value time.Duration
for _, sample := range p.Sample {
value += time.Duration(sample.Value[1]) * time.Nanosecond
}
totalTime := userTime + systemTime
t.Logf("compare %s user + %s system = %s vs %s", userTime, systemTime, totalTime, value)
if err := compare(totalTime, value, maxDiff); err != nil {
t.Logf("compare got %v want nil", err)
ok = false
}
return ok
}
testCPUProfile(t, acceptProfile, func(dur time.Duration) {
userTime, systemTime = diffCPUTime(t, func() {
var wg sync.WaitGroup
var once sync.Once
for i := 0; i < tc.workers; i++ {
@ -206,27 +239,6 @@ func TestCPUProfileMultithreadMagnitude(t *testing.T) {
wg.Wait()
})
})
for i, unit := range []string{"count", "nanoseconds"} {
if have, want := p.SampleType[i].Unit, unit; have != want {
t.Errorf("pN SampleType[%d]; %q != %q", i, have, want)
}
}
// cpuHog1 called above is the primary source of CPU
// load, but there may be some background work by the
// runtime. Since the OS rusage measurement will
// include all work done by the process, also compare
// against all samples in our profile.
var value time.Duration
for _, sample := range p.Sample {
value += time.Duration(sample.Value[1]) * time.Nanosecond
}
t.Logf("compare %s vs %s", cpuTime, value)
if err := compare(cpuTime, value, maxDiff); err != nil {
t.Errorf("compare got %v want nil", err)
}
})
}
}
@ -476,14 +488,14 @@ func testCPUProfile(t *testing.T, matches profileMatchFunc, f func(dur time.Dura
return nil
}
var diffCPUTimeImpl func(f func()) time.Duration
var diffCPUTimeImpl func(f func()) (user, system time.Duration)
func diffCPUTime(t *testing.T, f func()) time.Duration {
func diffCPUTime(t *testing.T, f func()) (user, system time.Duration) {
if fn := diffCPUTimeImpl; fn != nil {
return fn(f)
}
t.Fatalf("cannot measure CPU time on GOOS=%s GOARCH=%s", runtime.GOOS, runtime.GOARCH)
return 0
return 0, 0
}
func contains(slice []string, s string) bool {

View File

@ -15,7 +15,7 @@ func init() {
diffCPUTimeImpl = diffCPUTimeRUsage
}
func diffCPUTimeRUsage(f func()) time.Duration {
func diffCPUTimeRUsage(f func()) (user, system time.Duration) {
ok := true
var before, after syscall.Rusage
@ -32,8 +32,10 @@ func diffCPUTimeRUsage(f func()) time.Duration {
}
if !ok {
return 0
return 0, 0
}
return time.Duration((after.Utime.Nano() + after.Stime.Nano()) - (before.Utime.Nano() + before.Stime.Nano()))
user = time.Duration(after.Utime.Nano() - before.Utime.Nano())
system = time.Duration(after.Stime.Nano() - before.Stime.Nano())
return user, system
}