diff --git a/src/runtime/metrics_test.go b/src/runtime/metrics_test.go
index 83f6ecddf3..c138a2a1ba 100644
--- a/src/runtime/metrics_test.go
+++ b/src/runtime/metrics_test.go
@@ -5,7 +5,6 @@
 package runtime_test
 
 import (
-	"internal/testenv"
 	"reflect"
 	"runtime"
 	"runtime/debug"
@@ -661,17 +660,6 @@ func TestCPUMetricsSleep(t *testing.T) {
 		t.Skip("wasip1 currently busy-waits in idle time; test not applicable")
 	}
 
-	// Unconditionally skip this test as flaky.
-	//
-	// There's a fundamental issue with this test, which is that there's no
-	// guarantee the application will go idle; background goroutines and
-	// time spent in the scheduler going to sleep can always erode idle time
-	// sufficiently such that minimum idle time (or maximum user time) stays
-	// within some threshold.
-	//
-	// Leave this as skipped while we figure out a better way to check this.
-	testenv.SkipFlaky(t, 60376)
-
 	names := []string{
 		"/cpu/classes/idle:cpu-seconds",
 
@@ -695,33 +683,70 @@ func TestCPUMetricsSleep(t *testing.T) {
 	}
 	m1, m2 := prep(), prep()
 
-	// Read 1.
-	runtime.GC() // Update /cpu/classes metrics.
-	metrics.Read(m1)
+	const (
+		// Expected time spent idle.
+		dur = 100 * time.Millisecond
 
-	// Sleep.
-	const dur = 100 * time.Millisecond
-	time.Sleep(dur)
+		// maxFailures is the number of consecutive failures requires to cause the test to fail.
+		maxFailures = 10
+	)
 
-	// Read 2.
-	runtime.GC() // Update /cpu/classes metrics.
-	metrics.Read(m2)
+	failureIdleTimes := make([]float64, 0, maxFailures)
 
 	// If the bug we expect is happening, then the Sleep CPU time will be accounted for
-	// as user time rather than idle time.
+	// as user time rather than idle time. In an ideal world we'd expect the whole application
+	// to go instantly idle the moment this goroutine goes to sleep, and stay asleep for that
+	// duration. However, the Go runtime can easily eat into idle time while this goroutine is
+	// blocked in a sleep. For example, slow platforms might spend more time expected in the
+	// scheduler. Another example is that a Go runtime background goroutine could run while
+	// everything else is idle. Lastly, if a running goroutine is descheduled by the OS, enough
+	// time may pass such that the goroutine is ready to wake, even though the runtime couldn't
+	// observe itself as idle with nanotime.
 	//
-	// TODO(mknyszek): This number here is wrong. Background goroutines and just slow
-	// platforms spending a non-trivial amount of time in the scheduler doing things
-	// could easily erode idle time beyond this minimum.
-	minIdleCPUSeconds := dur.Seconds() * float64(runtime.GOMAXPROCS(-1))
+	// To deal with all this, we give a half-proc's worth of leniency.
+	//
+	// We also retry multiple times to deal with the fact that the OS might deschedule us before
+	// we yield and go idle. That has a rare enough chance that retries should resolve it.
+	// If the issue we expect is happening, it should be persistent.
+	minIdleCPUSeconds := dur.Seconds() * (float64(runtime.GOMAXPROCS(-1)) - 0.5)
 
-	if dt := m2[0].Value.Float64() - m1[0].Value.Float64(); dt < minIdleCPUSeconds {
-		for i := range names {
-			if m1[i].Value.Kind() == metrics.KindBad {
-				continue
-			}
-			t.Logf("%s %0.3f\n", names[i], m2[i].Value.Float64()-m1[i].Value.Float64())
+	// Let's make sure there's no background scavenge work to do.
+	//
+	// The runtime.GC calls below ensure the background sweeper
+	// will not run during the idle period.
+	debug.FreeOSMemory()
+
+	for retries := 0; retries < maxFailures; retries++ {
+		// Read 1.
+		runtime.GC() // Update /cpu/classes metrics.
+		metrics.Read(m1)
+
+		// Sleep.
+		time.Sleep(dur)
+
+		// Read 2.
+		runtime.GC() // Update /cpu/classes metrics.
+		metrics.Read(m2)
+
+		dt := m2[0].Value.Float64() - m1[0].Value.Float64()
+		if dt >= minIdleCPUSeconds {
+			// All is well. Test passed.
+			return
 		}
-		t.Errorf(`time.Sleep did not contribute enough to "idle" class: %.5fs < %.5fs`, dt, minIdleCPUSeconds)
+		failureIdleTimes = append(failureIdleTimes, dt)
+		// Try again.
 	}
+
+	// We couldn't observe the expected idle time even once.
+	for i, dt := range failureIdleTimes {
+		t.Logf("try %2d: idle time = %.5fs\n", i+1, dt)
+	}
+	t.Logf("try %d breakdown:\n", len(failureIdleTimes))
+	for i := range names {
+		if m1[i].Value.Kind() == metrics.KindBad {
+			continue
+		}
+		t.Logf("\t%s %0.3f\n", names[i], m2[i].Value.Float64()-m1[i].Value.Float64())
+	}
+	t.Errorf(`time.Sleep did not contribute enough to "idle" class: minimum idle time = %.5fs`, minIdleCPUSeconds)
 }