runtime/pprof: rerun magnitude test on failure

Restructure TestCPUProfileMultithreadMagnitude so it will run again with a longer duration on failure. Log the split between the user vs system CPU time that rusage reports. For #50232 Change-Id: Ice5b38ee7594dbee1eaa5686d32b968c306e3e85 Reviewed-on: https://go-review.googlesource.com/c/go/+/393934 Run-TryBot: Rhys Hiltner <rhys@justin.tv> TryBot-Result: Gopher Robot <gobot@golang.org> Reviewed-by: Michael Pratt <mpratt@google.com> Trust: Michael Knyszek <mknyszek@google.com>
2022-03-18 11:36:39 -07:00 · 2022-03-18 11:36:39 -07:00 · 212bda0669
parent ac3efc83e6
commit 212bda0669
2 changed files with 44 additions and 30 deletions
--- a/src/runtime/pprof/pprof_test.go
+++ b/src/runtime/pprof/pprof_test.go
@ -188,10 +188,43 @@ func TestCPUProfileMultithreadMagnitude(t *testing.T) {
 		t.Run(tc.name, func(t *testing.T) {
 			t.Logf("Running with %d workers", tc.workers)

-			var cpuTime time.Duration
+			var userTime, systemTime time.Duration
 			matches := matchAndAvoidStacks(stackContains, []string{"runtime/pprof.cpuHog1"}, avoidFunctions())
-			p := testCPUProfile(t, matches, func(dur time.Duration) {
-				cpuTime = diffCPUTime(t, func() {
+			acceptProfile := func(t *testing.T, p *profile.Profile) bool {
+				if !matches(t, p) {
+					return false
+				}
+
+				ok := true
+				for i, unit := range []string{"count", "nanoseconds"} {
+					if have, want := p.SampleType[i].Unit, unit; have != want {
+						t.Logf("pN SampleType[%d]; %q != %q", i, have, want)
+						ok = false
+					}
+				}
+
+				// cpuHog1 called below is the primary source of CPU
+				// load, but there may be some background work by the
+				// runtime. Since the OS rusage measurement will
+				// include all work done by the process, also compare
+				// against all samples in our profile.
+				var value time.Duration
+				for _, sample := range p.Sample {
+					value += time.Duration(sample.Value[1]) * time.Nanosecond
+				}
+
+				totalTime := userTime + systemTime
+				t.Logf("compare %s user + %s system = %s vs %s", userTime, systemTime, totalTime, value)
+				if err := compare(totalTime, value, maxDiff); err != nil {
+					t.Logf("compare got %v want nil", err)
+					ok = false
+				}
+
+				return ok
+			}
+
+			testCPUProfile(t, acceptProfile, func(dur time.Duration) {
+				userTime, systemTime = diffCPUTime(t, func() {
 					var wg sync.WaitGroup
 					var once sync.Once
 					for i := 0; i < tc.workers; i++ {
@ -206,27 +239,6 @@ func TestCPUProfileMultithreadMagnitude(t *testing.T) {
 					wg.Wait()
 				})
 			})
-
-			for i, unit := range []string{"count", "nanoseconds"} {
-				if have, want := p.SampleType[i].Unit, unit; have != want {
-					t.Errorf("pN SampleType[%d]; %q != %q", i, have, want)
-				}
-			}
-
-			// cpuHog1 called above is the primary source of CPU
-			// load, but there may be some background work by the
-			// runtime. Since the OS rusage measurement will
-			// include all work done by the process, also compare
-			// against all samples in our profile.
-			var value time.Duration
-			for _, sample := range p.Sample {
-				value += time.Duration(sample.Value[1]) * time.Nanosecond
-			}
-
-			t.Logf("compare %s vs %s", cpuTime, value)
-			if err := compare(cpuTime, value, maxDiff); err != nil {
-				t.Errorf("compare got %v want nil", err)
-			}
 		})
 	}
 }
@ -476,14 +488,14 @@ func testCPUProfile(t *testing.T, matches profileMatchFunc, f func(dur time.Dura
 	return nil
 }

-var diffCPUTimeImpl func(f func()) time.Duration
+var diffCPUTimeImpl func(f func()) (user, system time.Duration)

-func diffCPUTime(t *testing.T, f func()) time.Duration {
+func diffCPUTime(t *testing.T, f func()) (user, system time.Duration) {
 	if fn := diffCPUTimeImpl; fn != nil {
 		return fn(f)
 	}
 	t.Fatalf("cannot measure CPU time on GOOS=%s GOARCH=%s", runtime.GOOS, runtime.GOARCH)
-	return 0
+	return 0, 0
 }

 func contains(slice []string, s string) bool {
--- a/src/runtime/pprof/rusage_test.go
+++ b/src/runtime/pprof/rusage_test.go
@ -15,7 +15,7 @@ func init() {
 	diffCPUTimeImpl = diffCPUTimeRUsage
 }

-func diffCPUTimeRUsage(f func()) time.Duration {
+func diffCPUTimeRUsage(f func()) (user, system time.Duration) {
 	ok := true
 	var before, after syscall.Rusage

@ -32,8 +32,10 @@ func diffCPUTimeRUsage(f func()) time.Duration {
 	}

 	if !ok {
-		return 0
+		return 0, 0
 	}

-	return time.Duration((after.Utime.Nano() + after.Stime.Nano()) - (before.Utime.Nano() + before.Stime.Nano()))
+	user = time.Duration(after.Utime.Nano() - before.Utime.Nano())
+	system = time.Duration(after.Stime.Nano() - before.Stime.Nano())
+	return user, system
 }